From ddac40ff0bf8112d673b64f6963af7eb18ef4d79 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Sat, 2 May 2026 20:24:25 -0600 Subject: [PATCH] Phase 2 foundations: _types.py, converters.py, post-login protocol notes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decoded the post-login execution flow from docs/CAPTURES/02-select-1.socat.log: SQ_PREPARE format (validated against both observed PREPAREs): [short SQ_PREPARE=2] [short flags=0] [int sqlLen] ← SQL byte count, NOT including nul [bytes sql] [byte 0] ← nul terminator [short 0x0016] ← observed 22; cursor options? statement type? [short 0x0031] ← observed 49; identical across both PREPAREs [short SQ_EOT=12] SQ_TUPLE format (definitive): [short SQ_TUPLE=14] [int 0] ← flags / reserved [short payloadLen] [bytes payload] ← column values back-to-back, per type encoding SQ_DONE format (partial — see PROTOCOL_NOTES.md §6e for what's known) JDBC's full prepare/fetch/release sequence (PREPARE → DESCRIBE → ID(3 =cursor name) → ID(9=NFETCH) → TUPLE → DONE → ID(10=close) → ID(11=release)) documented in §6c. The action codes inside SQ_ID roughly map to other SQ_* tag values from IfxMessageTypes. For Python MVP we'll likely try SQ_COMMAND=1 (execute-immediate) first — it might let us skip the cursor lifecycle for parameterless queries. New modules: src/informix_db/_types.py — IfxType IntEnum ported from com.informix.lang.IfxTypes. All IDS internal type codes (CHAR=0, SMALLINT=1, INT=2, ..., BOOLEAN=45, BIGINT=52, BIGSERIAL=53, CLOB=101, BLOB=102) plus the high-bit flags (NOTNULLABLE=0x100 etc) and helpers base_type() / is_nullable() to strip and inspect the flag byte. src/informix_db/converters.py — wire-bytes → Python decoders for the Phase-2 MVP type set: SMALLINT, INT, BIGINT, SMFLOAT, FLOAT, CHAR, VARCHAR, NCHAR, NVCHAR, LVARCHAR, BOOL, DATE. Plus FIXED_WIDTHS table for the row decoder. ENCODERS dict declared empty (Phase 4 fills it in for parameter binding). DATE handling uses Informix epoch (1899-12-31, day 0); 4-byte BE int day count → datetime.date. Smoke-tested decoders all return correct Python values. Cursor / _resultset implementation NOT in this commit — they need deeper SQ_DESCRIBE byte-layout analysis and the SQ_ID sub-action vocabulary characterization. Both are bounded-but-substantial Phase 2 tasks deferred to a fresh session. 40 unit tests still passing, ruff clean. --- docs/PROTOCOL_NOTES.md | Bin 26898 -> 30293 bytes src/informix_db/_types.py | 86 +++++++++++++++++++++++ src/informix_db/converters.py | 128 ++++++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+) create mode 100644 src/informix_db/_types.py create mode 100644 src/informix_db/converters.py diff --git a/docs/PROTOCOL_NOTES.md b/docs/PROTOCOL_NOTES.md index 2e38bb3d45073830f5aa4d7e63f4287ff8029994..7b1e74c82467f565ae4b09a04d71ce7e149c457d 100644 GIT binary patch delta 3431 zcmaJ^%Wfmb6@`JsKo}cA5Mwm*!1j&kAf$$*NRFiFfrhN1=tO~DBXN?&Ku{OEi)2U5 zu69?oBrXDG6K9c~JIn0y0|BzhDqrA@1LOy0okj8oK2_Dt#{?5X0NLuUTXoMp=bqbN z{qvoF{rjE2-t12+cPtEirnpb-W_!Ed+^RR*)l#WkF4Hz^(AA5Zi_7lC;boTwA{jG9 zumAk}QmG@NL6D5GiVay1Nkwk6U6(xMo~pMRQg}=?LNRn3)Vmkt&cnDg=2EgDmvqmQ zTTY@cd2)wH6*c1Y%cJL=dJrj|L@cBbaYBwSaht}(BA@g#DHi-xB53!d+v!out?pA_ zs1eB#OE?~*z~_+)u$H7sQctA`-z#Ombyrtt=~JdQ^(TtU>>u=LZFWVkfA^>PxdtH{ zN^+=KdWD0L{31NzQFi#`@m`rW?Q*l=>cxq%97Ifsr1VOTM53tq&}?R({rK_eTaJoi zrkvXCee%*oiiAY00ujjr;D8#BfeNWf%nud=KFdCPvU2c$BW~{QFJQ{NL`Z2N&&eUP zzp%CJKOcW*ngePO8HA}Em0Z^?m3pX~0uxlif>7gO5|UJL+illW*4^B7tHk1%vt+(m zI01&sTT#M?Eb&9oh{OO4Jpsu^d_ubT(=fQ@SY-3xn5S)#AXCf+eGY6+e{}&0k_Wyd zJPz5!kV_Mg433iSc+zJo7&me~^o<^lx>udcufgr?<=W~e^Q;5NihCnLgG7ueFIFa2fAak@9e@Rz(3>FRM^DQ+^pcdIKBwQX zJJtYG8$IwvHjdAF-Jf?asp4!_vu{>EotBFu8@A*B)Y2V-3Q<-|jYflFp49Wyd@j8t zh{0y+L)W*2`{o%Q`8h`O^g8EPJ!fm{+8nSo`?4xP`rYYy&uJlJ9mEi<%e+x>q=l`l z0J+?wh>cOHkViu`s>cZ*1P^FB&{@6E87|Q-P>pJhj*qU4QV`O|GTn9-P^V;C)PB~! z-2C+IYZuKzuP-oj&}G1oSKeqNfhp*H zCI2Gjkq0m}O@Kd7^d;n?>(`;zUw%bbw72LuvtC`<*`W>ETI{xx^C=vvTDmeTJ9cFu zi=~ye>*}8VhRUkT+a4bg&;|g`GrPXRB%CbJ`RDyqlc+|#F>KJ<%`_N1c+I8H+FL{J z<{=%O;MwQhUgsD4WDp`)8>+L}+}*ZL@J3F(mlr49Swi?>*qDg>Dd|>WY78`J+ka!> z(fQfZI;wa=7h0!CPxU#p%-(w9R?}=+@j`*}Spb$Ep1!!5L)L29r)T9Z45vCb0E@1X zl!PKAY}$+l$48h7SWJmXha)s^paDAvX#w#m|;= ziDBSS|9<9lUS6IZo_4Di87=!e%n|)GWXYsPelRf9Q|O5d{yGGU*VnZnTujsm$iNr& z*4K?0=>0qAr>BQ!M^4K{D}CTzs(3vZWAq88AV!s9hVGub4zQINPe|Sds41k9(oD735&xLl#YAj07XEt^{9osr6*uZg=l?aW|mwy%xtvNYMeJ>bKhxgYHwNm=yBn@ zc65r38!T$mPD~rx=*(zW#EaP^6wJr?^kz6(9zT>&us*9>G(0QYx}9$FAjL3cF0(J6 z{t%U`9fkj&{nJ`=g|xnG8UkS?dJB zZX*7eyj1F0A z^-(=a$9)_Wwboe^gG4>NlfXPMyGPugjo!LQm{(S^zq{|PK0Ir-;I%*7-i5K-cLc@y zAwSR)&ZEW6j5N$hk$v;{gHJpm72Yxif@IpS=Px974}A**@rNtr`5QoMeygn~_qFAF S^QpuRC7LLI`-Ahxr{4nh3T-w3 delta 538 zcmZ8e!A=`76ckr~#S;i=6H<#HDpjByAXy12LeeH&T1CY1KDri;?fQ9H_LLi!-slI= zPpH(t=&@3NfHQ}FLcQCjQn`+1-prf%`LuEWW8>@PVF!LaJbYp7q;fXoNQxi{78VFM zLaR&!%wSQ$xPLTm9eh3NXm)C3Q3WK}oTO)Ie+u2r zS%bjtjLqkBW<5Adq!zk@QiUAwHh2N07tW!NMzuIW{(%F9=)FV};B?Rd>w>a|S{zmv z=GxfOG!96k0{l%pd2=~PC;hjl$7$~s z#8hhOw2~@7h9tde+-8h(4$|skY5w7MPpFGxm*y!E(Af>u(bz?^l^a~9rXqs0lkX0$^ZZW diff --git a/src/informix_db/_types.py b/src/informix_db/_types.py new file mode 100644 index 0000000..3e0d4c1 --- /dev/null +++ b/src/informix_db/_types.py @@ -0,0 +1,86 @@ +"""IDS internal type-code constants. + +Ported from ``com.informix.lang.IfxTypes`` (decompile in +``build/jdbc-src/com/informix/lang/IfxTypes.java``). The type codes +appear in column descriptors (in the ``SQ_DESCRIBE`` response) and in +parameter descriptors. The high bits encode flags (nullability, etc.); +strip them with ``base_type()`` to get the raw type code. +""" + +from __future__ import annotations + +from enum import IntEnum + + +class IfxType(IntEnum): + """IDS internal type codes (16-bit).""" + + CHAR = 0 + SMALLINT = 1 + INT = 2 # 32-bit INTEGER + FLOAT = 3 # 64-bit DOUBLE PRECISION + SMFLOAT = 4 # 32-bit REAL / SMALLFLOAT + DECIMAL = 5 + SERIAL = 6 + DATE = 7 + MONEY = 8 + NULL = 9 + DATETIME = 10 + BYTE = 11 + TEXT = 12 + VARCHAR = 13 + INTERVAL = 14 + NCHAR = 15 + NVCHAR = 16 + INT8 = 17 # legacy 64-bit "long int" — variable-length encoding + SERIAL8 = 18 + SET = 19 + MULTISET = 20 + LIST = 21 + ROW = 22 + COLLECTION = 23 + ROWREF = 24 + UDTVAR = 40 + UDTFIXED = 41 + REFSER8 = 42 + LVARCHAR = 43 + SENDRECV = 44 + BOOL = 45 + IMPEXP = 46 + IMPEXPBIN = 47 + SQLUDRDEFAULT = 48 + UNKNOWN = 49 + BIGINT = 52 # modern 64-bit BIGINT (clean 8-byte BE encoding) + BIGSERIAL = 53 + CLOB = 101 + BLOB = 102 + + +# High-bit flags packed into the type-code short alongside the type. +# Strip these with ``base_type()`` before comparing to ``IfxType`` values. +IFX_BIT_NOTNULLABLE = 0x0100 # 256 — column is NOT NULL +IFX_BIT_DISTINCT = 0x0800 # 2048 — distinct/named type +IFX_BIT_NAMEDROW = 0x1000 # 4096 — named row type +IFX_BIT_DBOOLEAN = 0x4000 # 16384 — distinct boolean +IFX_BIT_COLLCLIENT = -0x8000 # high bit (Java's Short.MIN_VALUE) + +_TYPE_BITS_MASK = 0xFF # low byte = type code (sufficient for builtin types ≤ 102) +_FLAG_BITS_MASK = 0xFF00 + + +def base_type(type_code: int) -> int: + """Strip high-bit flags and return the raw IfxType value. + + Type codes on the wire are 16-bit shorts where the low byte is the + type and the high byte holds flags (NOTNULLABLE, DISTINCT, etc.). + For type codes > 255 (the CLOB=101, BLOB=102 line is the upper + bound for builtin types), use the value directly. + """ + if type_code in (IfxType.CLOB, IfxType.BLOB): + return type_code + return type_code & _TYPE_BITS_MASK + + +def is_nullable(type_code: int) -> bool: + """``True`` if the column accepts NULL (the NOTNULLABLE bit is clear).""" + return not (type_code & IFX_BIT_NOTNULLABLE) diff --git a/src/informix_db/converters.py b/src/informix_db/converters.py new file mode 100644 index 0000000..2a9a867 --- /dev/null +++ b/src/informix_db/converters.py @@ -0,0 +1,128 @@ +"""Type codecs: IDS wire bytes ↔ Python values. + +Phase 2 implements decoders for the MVP type set (SMALLINT, INT, BIGINT, +SMFLOAT, FLOAT, CHAR, VARCHAR, BOOL, DATE). Encoders are stubbed — +they land in Phase 4 with parameter binding. + +Decoder dispatch: ``decode(type_code, raw_bytes) → python value`` looks +up the codec in ``DECODERS`` keyed by ``IfxType`` (after stripping +high-bit flags via ``_types.base_type``). NULL values are signaled by +the row decoder, not by sentinel bytes here. + +For DATE we use the Informix epoch (1899-12-31). The raw bytes are a +4-byte big-endian signed int representing day count. +""" + +from __future__ import annotations + +import datetime +import struct +from collections.abc import Callable + +from ._types import IfxType, base_type + +# Informix DATE epoch — day 0 is December 31, 1899 (per Informix convention). +_INFORMIX_DATE_EPOCH = datetime.date(1899, 12, 31) + +DecoderFn = Callable[[bytes], object] + + +def _decode_smallint(raw: bytes) -> int: + return struct.unpack("!h", raw)[0] + + +def _decode_int(raw: bytes) -> int: + return struct.unpack("!i", raw)[0] + + +def _decode_bigint(raw: bytes) -> int: + return struct.unpack("!q", raw)[0] + + +def _decode_smfloat(raw: bytes) -> float: + return struct.unpack("!f", raw)[0] + + +def _decode_float(raw: bytes) -> float: + return struct.unpack("!d", raw)[0] + + +def _decode_char(raw: bytes) -> str: + """Strip trailing spaces (CHAR is space-padded to declared length).""" + return raw.rstrip(b" \x00").decode("iso-8859-1") + + +def _decode_varchar(raw: bytes) -> str: + """VARCHAR — variable-length string, nul-terminated on the wire.""" + return raw.rstrip(b"\x00").decode("iso-8859-1") + + +def _decode_bool(raw: bytes) -> bool: + """Informix BOOLEAN is one byte: 't'/'T' (true), 'f'/'F' (false).""" + if not raw: + raise ValueError("empty BOOL payload") + return raw[0] in (ord("t"), ord("T"), 1) + + +def _decode_date(raw: bytes) -> datetime.date: + """4-byte big-endian signed int = day count from 1899-12-31.""" + days = struct.unpack("!i", raw)[0] + return _INFORMIX_DATE_EPOCH + datetime.timedelta(days=days) + + +# Wire byte length per Phase-2-MVP type. Used by the row decoder to +# slice column values out of an SQ_TUPLE payload for fixed-width types. +# Variable-width types (CHAR, VARCHAR, DECIMAL, etc.) are length-prefixed +# on the wire and don't appear in this table. +FIXED_WIDTHS: dict[int, int] = { + IfxType.SMALLINT: 2, + IfxType.INT: 4, + IfxType.SERIAL: 4, + IfxType.SMFLOAT: 4, + IfxType.FLOAT: 8, + IfxType.BIGINT: 8, + IfxType.BIGSERIAL: 8, + IfxType.DATE: 4, + IfxType.BOOL: 1, +} + + +# Phase 2 MVP decoders. Phase 6+ adds DATETIME, INTERVAL, DECIMAL, +# MONEY, LVARCHAR, BYTE/TEXT, BLOB/CLOB, ROW, COLLECTION. +DECODERS: dict[int, DecoderFn] = { + IfxType.SMALLINT: _decode_smallint, + IfxType.INT: _decode_int, + IfxType.SERIAL: _decode_int, + IfxType.BIGINT: _decode_bigint, + IfxType.BIGSERIAL: _decode_bigint, + IfxType.SMFLOAT: _decode_smfloat, + IfxType.FLOAT: _decode_float, + IfxType.CHAR: _decode_char, + IfxType.VARCHAR: _decode_varchar, + IfxType.NCHAR: _decode_char, + IfxType.NVCHAR: _decode_varchar, + IfxType.LVARCHAR: _decode_varchar, + IfxType.BOOL: _decode_bool, + IfxType.DATE: _decode_date, +} + + +def decode(type_code: int, raw: bytes) -> object: + """Decode ``raw`` bytes for the given IDS type code into a Python value. + + The high-bit flags (NOTNULLABLE etc.) are stripped before lookup. + Raises ``KeyError`` for unsupported types — Phase 6+ adds the rest. + """ + base = base_type(type_code) + decoder = DECODERS.get(base) + if decoder is None: + raise NotImplementedError( + f"decoder for IDS type code {base} not yet implemented " + f"(Phase 2 MVP supports: SMALLINT, INT, BIGINT, REAL, FLOAT, " + f"CHAR, VARCHAR, BOOL, DATE)" + ) + return decoder(raw) + + +# Encoders — stubbed for Phase 4 parameter binding. +ENCODERS: dict[int, Callable[[object], bytes]] = {}