diff --git a/docs/PROTOCOL_NOTES.md b/docs/PROTOCOL_NOTES.md index 2e38bb3..7b1e74c 100644 Binary files a/docs/PROTOCOL_NOTES.md and b/docs/PROTOCOL_NOTES.md differ diff --git a/src/informix_db/_types.py b/src/informix_db/_types.py new file mode 100644 index 0000000..3e0d4c1 --- /dev/null +++ b/src/informix_db/_types.py @@ -0,0 +1,86 @@ +"""IDS internal type-code constants. + +Ported from ``com.informix.lang.IfxTypes`` (decompile in +``build/jdbc-src/com/informix/lang/IfxTypes.java``). The type codes +appear in column descriptors (in the ``SQ_DESCRIBE`` response) and in +parameter descriptors. The high bits encode flags (nullability, etc.); +strip them with ``base_type()`` to get the raw type code. +""" + +from __future__ import annotations + +from enum import IntEnum + + +class IfxType(IntEnum): + """IDS internal type codes (16-bit).""" + + CHAR = 0 + SMALLINT = 1 + INT = 2 # 32-bit INTEGER + FLOAT = 3 # 64-bit DOUBLE PRECISION + SMFLOAT = 4 # 32-bit REAL / SMALLFLOAT + DECIMAL = 5 + SERIAL = 6 + DATE = 7 + MONEY = 8 + NULL = 9 + DATETIME = 10 + BYTE = 11 + TEXT = 12 + VARCHAR = 13 + INTERVAL = 14 + NCHAR = 15 + NVCHAR = 16 + INT8 = 17 # legacy 64-bit "long int" — variable-length encoding + SERIAL8 = 18 + SET = 19 + MULTISET = 20 + LIST = 21 + ROW = 22 + COLLECTION = 23 + ROWREF = 24 + UDTVAR = 40 + UDTFIXED = 41 + REFSER8 = 42 + LVARCHAR = 43 + SENDRECV = 44 + BOOL = 45 + IMPEXP = 46 + IMPEXPBIN = 47 + SQLUDRDEFAULT = 48 + UNKNOWN = 49 + BIGINT = 52 # modern 64-bit BIGINT (clean 8-byte BE encoding) + BIGSERIAL = 53 + CLOB = 101 + BLOB = 102 + + +# High-bit flags packed into the type-code short alongside the type. +# Strip these with ``base_type()`` before comparing to ``IfxType`` values. +IFX_BIT_NOTNULLABLE = 0x0100 # 256 — column is NOT NULL +IFX_BIT_DISTINCT = 0x0800 # 2048 — distinct/named type +IFX_BIT_NAMEDROW = 0x1000 # 4096 — named row type +IFX_BIT_DBOOLEAN = 0x4000 # 16384 — distinct boolean +IFX_BIT_COLLCLIENT = -0x8000 # high bit (Java's Short.MIN_VALUE) + +_TYPE_BITS_MASK = 0xFF # low byte = type code (sufficient for builtin types ≤ 102) +_FLAG_BITS_MASK = 0xFF00 + + +def base_type(type_code: int) -> int: + """Strip high-bit flags and return the raw IfxType value. + + Type codes on the wire are 16-bit shorts where the low byte is the + type and the high byte holds flags (NOTNULLABLE, DISTINCT, etc.). + For type codes > 255 (the CLOB=101, BLOB=102 line is the upper + bound for builtin types), use the value directly. + """ + if type_code in (IfxType.CLOB, IfxType.BLOB): + return type_code + return type_code & _TYPE_BITS_MASK + + +def is_nullable(type_code: int) -> bool: + """``True`` if the column accepts NULL (the NOTNULLABLE bit is clear).""" + return not (type_code & IFX_BIT_NOTNULLABLE) diff --git a/src/informix_db/converters.py b/src/informix_db/converters.py new file mode 100644 index 0000000..2a9a867 --- /dev/null +++ b/src/informix_db/converters.py @@ -0,0 +1,128 @@ +"""Type codecs: IDS wire bytes ↔ Python values. + +Phase 2 implements decoders for the MVP type set (SMALLINT, INT, BIGINT, +SMFLOAT, FLOAT, CHAR, VARCHAR, BOOL, DATE). Encoders are stubbed — +they land in Phase 4 with parameter binding. + +Decoder dispatch: ``decode(type_code, raw_bytes) → python value`` looks +up the codec in ``DECODERS`` keyed by ``IfxType`` (after stripping +high-bit flags via ``_types.base_type``). NULL values are signaled by +the row decoder, not by sentinel bytes here. + +For DATE we use the Informix epoch (1899-12-31). The raw bytes are a +4-byte big-endian signed int representing day count. +""" + +from __future__ import annotations + +import datetime +import struct +from collections.abc import Callable + +from ._types import IfxType, base_type + +# Informix DATE epoch — day 0 is December 31, 1899 (per Informix convention). +_INFORMIX_DATE_EPOCH = datetime.date(1899, 12, 31) + +DecoderFn = Callable[[bytes], object] + + +def _decode_smallint(raw: bytes) -> int: + return struct.unpack("!h", raw)[0] + + +def _decode_int(raw: bytes) -> int: + return struct.unpack("!i", raw)[0] + + +def _decode_bigint(raw: bytes) -> int: + return struct.unpack("!q", raw)[0] + + +def _decode_smfloat(raw: bytes) -> float: + return struct.unpack("!f", raw)[0] + + +def _decode_float(raw: bytes) -> float: + return struct.unpack("!d", raw)[0] + + +def _decode_char(raw: bytes) -> str: + """Strip trailing spaces (CHAR is space-padded to declared length).""" + return raw.rstrip(b" \x00").decode("iso-8859-1") + + +def _decode_varchar(raw: bytes) -> str: + """VARCHAR — variable-length string, nul-terminated on the wire.""" + return raw.rstrip(b"\x00").decode("iso-8859-1") + + +def _decode_bool(raw: bytes) -> bool: + """Informix BOOLEAN is one byte: 't'/'T' (true), 'f'/'F' (false).""" + if not raw: + raise ValueError("empty BOOL payload") + return raw[0] in (ord("t"), ord("T"), 1) + + +def _decode_date(raw: bytes) -> datetime.date: + """4-byte big-endian signed int = day count from 1899-12-31.""" + days = struct.unpack("!i", raw)[0] + return _INFORMIX_DATE_EPOCH + datetime.timedelta(days=days) + + +# Wire byte length per Phase-2-MVP type. Used by the row decoder to +# slice column values out of an SQ_TUPLE payload for fixed-width types. +# Variable-width types (CHAR, VARCHAR, DECIMAL, etc.) are length-prefixed +# on the wire and don't appear in this table. +FIXED_WIDTHS: dict[int, int] = { + IfxType.SMALLINT: 2, + IfxType.INT: 4, + IfxType.SERIAL: 4, + IfxType.SMFLOAT: 4, + IfxType.FLOAT: 8, + IfxType.BIGINT: 8, + IfxType.BIGSERIAL: 8, + IfxType.DATE: 4, + IfxType.BOOL: 1, +} + + +# Phase 2 MVP decoders. Phase 6+ adds DATETIME, INTERVAL, DECIMAL, +# MONEY, LVARCHAR, BYTE/TEXT, BLOB/CLOB, ROW, COLLECTION. +DECODERS: dict[int, DecoderFn] = { + IfxType.SMALLINT: _decode_smallint, + IfxType.INT: _decode_int, + IfxType.SERIAL: _decode_int, + IfxType.BIGINT: _decode_bigint, + IfxType.BIGSERIAL: _decode_bigint, + IfxType.SMFLOAT: _decode_smfloat, + IfxType.FLOAT: _decode_float, + IfxType.CHAR: _decode_char, + IfxType.VARCHAR: _decode_varchar, + IfxType.NCHAR: _decode_char, + IfxType.NVCHAR: _decode_varchar, + IfxType.LVARCHAR: _decode_varchar, + IfxType.BOOL: _decode_bool, + IfxType.DATE: _decode_date, +} + + +def decode(type_code: int, raw: bytes) -> object: + """Decode ``raw`` bytes for the given IDS type code into a Python value. + + The high-bit flags (NOTNULLABLE etc.) are stripped before lookup. + Raises ``KeyError`` for unsupported types — Phase 6+ adds the rest. + """ + base = base_type(type_code) + decoder = DECODERS.get(base) + if decoder is None: + raise NotImplementedError( + f"decoder for IDS type code {base} not yet implemented " + f"(Phase 2 MVP supports: SMALLINT, INT, BIGINT, REAL, FLOAT, " + f"CHAR, VARCHAR, BOOL, DATE)" + ) + return decoder(raw) + + +# Encoders — stubbed for Phase 4 parameter binding. +ENCODERS: dict[int, Callable[[object], bytes]] = {}