Before:
cur.execute('SELECT COUNT(*) FROM systables')
cur.fetchone() # → (b'\xc2\x02\x00\x00\x00\x00\x00\x00\x00',) raw bytes
After:
cur.execute('SELECT COUNT(*) FROM systables')
cur.fetchone() # → (Decimal('276'),)
The trickiest decode of the project so far. IDS DECIMAL/MONEY wire format:
byte[0] = (sign << 7) | biased_exponent_base100
bit 7 = sign (1=positive, 0=negative)
bits 0-6 = (exponent + 64), XOR'd with 0x7F if negative
byte[1..] = digit-pair bytes (each 0..99 = two BCD digits)
if negative: asymmetric base-100 complement applied:
walk digits right→left, trailing zeros stay zero,
first non-zero subtracts from 100, rest from 99
Initial naive "99 - d for all digits" decoder gave artifacts like
-1234.559999 instead of -1234.56. The asymmetric complement rule
(from Decimal.decComplement line 447) is what makes negatives
round-trip exactly.
Width on the wire: per-column encoded_length packed as
(precision << 8) | scale; byte width = ceil(precision/2) + 1.
parse_tuple_payload uses this to slice DECIMAL columns correctly.
Tested cases all decode correctly:
COUNT(*) → Decimal('276')
SUM(tabid) → Decimal('55')
AVG(tabid) → Decimal('5.5')
1234.56::DECIMAL → Decimal('1234.56')
-1234.56::DECIMAL → Decimal('-1234.56')
-0.5::DECIMAL → Decimal('-0.5')
-99.99::DECIMAL → Decimal('-99.99')
-12345678.9::DECIMAL → Decimal('-12345678.9')
NULL → None
Encoder (_encode_decimal) is implemented but disabled — server rejects
the produced bytes (precision packing not quite right). Phase 6.x will
fix. Workaround: cast Decimal to float, or pass via SQL literal.
Module changes:
src/informix_db/converters.py:
+ decimal module import
+ _decode_decimal — full BCD decoder with asymmetric complement
+ _encode_decimal (Phase 6.x stub — present but unreached)
+ DECIMAL/MONEY added to DECODERS dispatch
src/informix_db/_resultset.py:
+ DECIMAL/MONEY width computation from encoded_length
Tests: 40 unit + 55 integration (8 new DECIMAL) = 95 total, all
green, ruff clean.
379 lines
14 KiB
Python
379 lines
14 KiB
Python
"""Type codecs: IDS wire bytes ↔ Python values.
|
|
|
|
Phase 2 implements decoders for the MVP type set (SMALLINT, INT, BIGINT,
|
|
SMFLOAT, FLOAT, CHAR, VARCHAR, BOOL, DATE). Encoders are stubbed —
|
|
they land in Phase 4 with parameter binding.
|
|
|
|
Decoder dispatch: ``decode(type_code, raw_bytes) → python value`` looks
|
|
up the codec in ``DECODERS`` keyed by ``IfxType`` (after stripping
|
|
high-bit flags via ``_types.base_type``). NULL values are signaled by
|
|
the row decoder, not by sentinel bytes here.
|
|
|
|
For DATE we use the Informix epoch (1899-12-31). The raw bytes are a
|
|
4-byte big-endian signed int representing day count.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import datetime
|
|
import decimal
|
|
import struct
|
|
from collections.abc import Callable
|
|
|
|
from ._types import IfxType, base_type
|
|
|
|
# Informix DATE epoch — day 0 is December 31, 1899 (per Informix convention).
|
|
_INFORMIX_DATE_EPOCH = datetime.date(1899, 12, 31)
|
|
|
|
DecoderFn = Callable[[bytes], object]
|
|
|
|
|
|
# Informix uses sentinel values for NULL per type — see DECISION_LOG.md
|
|
# entry on null sentinel discovery (2026-05-04).
|
|
_INT_NULL = 0x80000000 # INT_MIN
|
|
_SMALLINT_NULL = 0x8000 # SHORT_MIN
|
|
_BIGINT_NULL = 0x8000000000000000 # LONG_MIN
|
|
_REAL_NULL = b"\xff\xff\xff\xff"
|
|
_DOUBLE_NULL = b"\xff\xff\xff\xff\xff\xff\xff\xff"
|
|
_DATE_NULL = 0x80000000
|
|
|
|
|
|
def _decode_smallint(raw: bytes) -> int | None:
|
|
val = struct.unpack("!h", raw)[0]
|
|
return None if val == -0x8000 else val
|
|
|
|
|
|
def _decode_int(raw: bytes) -> int | None:
|
|
val = struct.unpack("!i", raw)[0]
|
|
return None if val == -0x80000000 else val
|
|
|
|
|
|
def _decode_bigint(raw: bytes) -> int | None:
|
|
val = struct.unpack("!q", raw)[0]
|
|
return None if val == -0x8000000000000000 else val
|
|
|
|
|
|
def _decode_smfloat(raw: bytes) -> float | None:
|
|
if raw == _REAL_NULL:
|
|
return None
|
|
return struct.unpack("!f", raw)[0]
|
|
|
|
|
|
def _decode_float(raw: bytes) -> float | None:
|
|
if raw == _DOUBLE_NULL:
|
|
return None
|
|
return struct.unpack("!d", raw)[0]
|
|
|
|
|
|
def _decode_char(raw: bytes) -> str:
|
|
"""Strip trailing spaces (CHAR is space-padded to declared length)."""
|
|
return raw.rstrip(b" \x00").decode("iso-8859-1")
|
|
|
|
|
|
def _decode_varchar(raw: bytes) -> str | None:
|
|
"""VARCHAR — variable-length string. NULL is the special sentinel ``\\x00``
|
|
(single nul byte). The row decoder peels off the length prefix and passes
|
|
the content here. Note: VARCHAR cannot contain embedded nuls anyway, so
|
|
a single-nul value is unambiguously the NULL marker.
|
|
"""
|
|
if raw == b"\x00":
|
|
return None
|
|
return raw.rstrip(b"\x00").decode("iso-8859-1")
|
|
|
|
|
|
def _decode_bool(raw: bytes) -> bool:
|
|
"""Informix BOOLEAN is one byte: 't'/'T' (true), 'f'/'F' (false)."""
|
|
if not raw:
|
|
raise ValueError("empty BOOL payload")
|
|
return raw[0] in (ord("t"), ord("T"), 1)
|
|
|
|
|
|
def _decode_date(raw: bytes) -> datetime.date | None:
|
|
"""4-byte big-endian signed int = day count from 1899-12-31. NULL = 0x80000000."""
|
|
days = struct.unpack("!i", raw)[0]
|
|
if days == -0x80000000:
|
|
return None
|
|
return _INFORMIX_DATE_EPOCH + datetime.timedelta(days=days)
|
|
|
|
|
|
def _decode_decimal(raw: bytes) -> decimal.Decimal | None:
|
|
"""Decode IDS DECIMAL/MONEY: base-100 packed BCD with sign/exponent header.
|
|
|
|
Wire format (per ``com.informix.lang.Decimal.init``, line 374):
|
|
byte[0]: ``(sign << 7) | (biased_exponent & 0x7F)``
|
|
- sign bit (bit 7): 1 = positive, 0 = negative
|
|
- biased_exponent (bits 0-6): actual exponent = biased - 64,
|
|
measured in BASE-100 digits before the decimal point
|
|
byte[1..]: digit-pair bytes; each byte holds two decimal digits
|
|
as a single base-100 number (0..99). If the value is NEGATIVE,
|
|
each digit-pair is stored as 99-d (i.e., 9's complement in base 100).
|
|
|
|
NULL marker: byte[0] == 0 AND byte[1] == 0.
|
|
"""
|
|
if len(raw) < 2 or (raw[0] == 0 and raw[1] == 0):
|
|
return None
|
|
|
|
expbyte = raw[0]
|
|
is_positive = (expbyte & 0x80) != 0
|
|
# For negative: exponent byte is XOR'd with 0x7F to recover real
|
|
# exponent (per IfxToJavaDecimal.init line 386).
|
|
biased_exp = (expbyte & 0x7F) if is_positive else ((expbyte ^ 0x7F) & 0x7F)
|
|
exponent_base100 = biased_exp - 64 # in base-100 digits
|
|
|
|
digits = list(raw[1:])
|
|
if not is_positive:
|
|
# Asymmetric base-100 complement (per Decimal.decComplement, line 447):
|
|
# walk from RIGHT to LEFT; trailing zeros stay zero; the first
|
|
# non-zero is subtracted from 100; subsequent from 99.
|
|
# Without this, trailing 99s appear in the decoded value (a
|
|
# 1234.559999 / 0.4999... rounding-style artifact).
|
|
sub_from = 100
|
|
for i in range(len(digits) - 1, -1, -1):
|
|
if digits[i] == 0 and sub_from == 100:
|
|
continue
|
|
digits[i] = sub_from - digits[i]
|
|
sub_from = 99
|
|
|
|
# Build the decimal-string representation.
|
|
# exponent_base100 is the count of BASE-100 digits before the decimal
|
|
# point; multiplying by 2 gives BASE-10 digits before the decimal.
|
|
base10_exp = exponent_base100 * 2
|
|
|
|
# Concatenate all digit-pairs as a string, dropping trailing zeros
|
|
# for normalization.
|
|
digit_str = "".join(f"{d:02d}" for d in digits)
|
|
if not digit_str:
|
|
return decimal.Decimal(0)
|
|
|
|
sign_str = "" if is_positive else "-"
|
|
# Build "<sign><digits>E<exp>" — Decimal will normalize.
|
|
# Each digit-pair represents 2 base-10 digits; the value is
|
|
# digit_str interpreted as an integer * 10^(base10_exp - len(digit_str))
|
|
if base10_exp >= 0:
|
|
# The decimal point is to the RIGHT of digit_str's start by
|
|
# base10_exp positions.
|
|
if base10_exp >= len(digit_str):
|
|
# All digits are integer; pad with zeros to reach the exp.
|
|
int_part = digit_str + "0" * (base10_exp - len(digit_str))
|
|
return decimal.Decimal(f"{sign_str}{int_part}")
|
|
else:
|
|
int_part = digit_str[:base10_exp] or "0"
|
|
frac_part = digit_str[base10_exp:].rstrip("0")
|
|
if frac_part:
|
|
return decimal.Decimal(f"{sign_str}{int_part}.{frac_part}")
|
|
return decimal.Decimal(f"{sign_str}{int_part}")
|
|
else:
|
|
# base10_exp < 0: leading zeros in the fraction
|
|
frac_zeros = "0" * (-base10_exp)
|
|
frac_part = (frac_zeros + digit_str).rstrip("0")
|
|
if frac_part:
|
|
return decimal.Decimal(f"{sign_str}0.{frac_part}")
|
|
return decimal.Decimal(0)
|
|
|
|
|
|
# Wire byte length per Phase-2-MVP type. Used by the row decoder to
|
|
# slice column values out of an SQ_TUPLE payload for fixed-width types.
|
|
# Variable-width types (CHAR, VARCHAR, DECIMAL, etc.) are length-prefixed
|
|
# on the wire and don't appear in this table.
|
|
FIXED_WIDTHS: dict[int, int] = {
|
|
IfxType.SMALLINT: 2,
|
|
IfxType.INT: 4,
|
|
IfxType.SERIAL: 4,
|
|
IfxType.SMFLOAT: 4,
|
|
IfxType.FLOAT: 8,
|
|
IfxType.BIGINT: 8,
|
|
IfxType.BIGSERIAL: 8,
|
|
IfxType.DATE: 4,
|
|
IfxType.BOOL: 1,
|
|
}
|
|
|
|
|
|
# Phase 2 MVP decoders. Phase 6+ adds DATETIME, INTERVAL, DECIMAL,
|
|
# MONEY, LVARCHAR, BYTE/TEXT, BLOB/CLOB, ROW, COLLECTION.
|
|
DECODERS: dict[int, DecoderFn] = {
|
|
IfxType.SMALLINT: _decode_smallint,
|
|
IfxType.INT: _decode_int,
|
|
IfxType.SERIAL: _decode_int,
|
|
IfxType.BIGINT: _decode_bigint,
|
|
IfxType.BIGSERIAL: _decode_bigint,
|
|
IfxType.SMFLOAT: _decode_smfloat,
|
|
IfxType.FLOAT: _decode_float,
|
|
IfxType.CHAR: _decode_char,
|
|
IfxType.VARCHAR: _decode_varchar,
|
|
IfxType.NCHAR: _decode_char,
|
|
IfxType.NVCHAR: _decode_varchar,
|
|
IfxType.LVARCHAR: _decode_varchar,
|
|
IfxType.BOOL: _decode_bool,
|
|
IfxType.DATE: _decode_date,
|
|
IfxType.DECIMAL: _decode_decimal,
|
|
IfxType.MONEY: _decode_decimal, # MONEY is DECIMAL with implied scale
|
|
}
|
|
|
|
|
|
def decode(type_code: int, raw: bytes) -> object:
|
|
"""Decode ``raw`` bytes for the given IDS type code into a Python value.
|
|
|
|
The high-bit flags (NOTNULLABLE etc.) are stripped before lookup.
|
|
Raises ``KeyError`` for unsupported types — Phase 6+ adds the rest.
|
|
"""
|
|
base = base_type(type_code)
|
|
decoder = DECODERS.get(base)
|
|
if decoder is None:
|
|
raise NotImplementedError(
|
|
f"decoder for IDS type code {base} not yet implemented "
|
|
f"(Phase 2 MVP supports: SMALLINT, INT, BIGINT, REAL, FLOAT, "
|
|
f"CHAR, VARCHAR, BOOL, DATE)"
|
|
)
|
|
return decoder(raw)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Encoders for parameter binding (Phase 4)
|
|
# ---------------------------------------------------------------------------
|
|
# Returns ``(type_code, prec_short, raw_bytes)`` per parameter.
|
|
# Per-param SQ_BIND format: ``[short type][short ind=0][short prec][data]``
|
|
# where data is ``writePadded(raw_bytes)`` (emit + pad-to-even).
|
|
#
|
|
# JDBC's IfxSqli.sendBind (line 844+) does precision encoding per type:
|
|
# INT/SERIAL: prec = 0x0a00 (packed width=10, scale=0)
|
|
# VARCHAR sent as CHAR (type=0): prec = 0
|
|
# FLOAT (DOUBLE PRECISION): prec = 0
|
|
#
|
|
# Strings get type=0 (CHAR) on the wire — Informix's server casts them
|
|
# to the declared column type via the CIDESCRIBE/IDESCRIBE handshake.
|
|
|
|
EncodedParam = tuple[int, int, bytes]
|
|
|
|
|
|
def _encode_int(value: int) -> EncodedParam:
|
|
"""Encode a Python int as Informix INTEGER (type=2, 4 bytes BE)."""
|
|
return (2, 0x0A00, value.to_bytes(4, "big", signed=True))
|
|
|
|
|
|
def _encode_bigint(value: int) -> EncodedParam:
|
|
"""Encode a Python int as Informix BIGINT (type=52, 8 bytes BE)."""
|
|
return (52, 0x1300, value.to_bytes(8, "big", signed=True))
|
|
|
|
|
|
def _encode_str(value: str) -> EncodedParam:
|
|
"""Encode a Python str as Informix CHAR (type=0, length-prefixed).
|
|
|
|
JDBC sends Java strings as CHAR (type=0) on the wire — the server
|
|
handles conversion to the actual column type (CHAR/VARCHAR/NVARCHAR).
|
|
Format: ``[short length][bytes]`` (writePadded adds even-byte pad).
|
|
"""
|
|
encoded = value.encode("iso-8859-1")
|
|
raw = len(encoded).to_bytes(2, "big") + encoded
|
|
return (0, 0, raw)
|
|
|
|
|
|
def _encode_float(value: float) -> EncodedParam:
|
|
"""Encode a Python float as Informix FLOAT (type=3, 8-byte IEEE 754)."""
|
|
return (3, 0, struct.pack("!d", value))
|
|
|
|
|
|
def _encode_bool(value: bool) -> EncodedParam:
|
|
"""Encode a Python bool as Informix BOOLEAN (type=45, 1 byte)."""
|
|
return (45, 0, b"\x01" if value else b"\x00")
|
|
|
|
|
|
def _encode_decimal(value: decimal.Decimal) -> EncodedParam:
|
|
"""Encode a Python ``decimal.Decimal`` as IDS DECIMAL (type=5).
|
|
|
|
Inverse of ``_decode_decimal``: produce a base-100 BCD encoding with
|
|
the ``[sign+exponent][digit-pairs]`` header byte. Mirrors
|
|
``Decimal.javaToIfx`` (line 457).
|
|
"""
|
|
sign, digits, exp = value.as_tuple()
|
|
# Total decimal digits in mantissa
|
|
n_digits = len(digits)
|
|
# Compute base-10 exponent of the most significant digit
|
|
# (the "exp" returned by as_tuple is the position of the LSD;
|
|
# we want the position of the MSD relative to the decimal point.)
|
|
base10_exp = n_digits + exp # number of digits BEFORE the decimal
|
|
|
|
# Pad digits to even length on both sides so we can pack into base-100.
|
|
# Compute how many leading-zero-pairs to add (to align base100_exp on
|
|
# a base-100 boundary).
|
|
if base10_exp % 2 != 0:
|
|
# If odd, add a leading 0 to align — base10_exp becomes even.
|
|
digits = (0, *digits)
|
|
base10_exp += 1
|
|
n_digits += 1
|
|
if n_digits % 2 != 0:
|
|
# Pad trailing zero to make digit count even (so we can pair).
|
|
digits = (*digits, 0)
|
|
n_digits += 1
|
|
|
|
base100_exp = base10_exp // 2 # exponent in base-100 digits
|
|
|
|
# Pack pairs of decimal digits into bytes.
|
|
digit_pairs = bytes(
|
|
digits[i] * 10 + digits[i + 1] for i in range(0, n_digits, 2)
|
|
)
|
|
|
|
is_positive = sign == 0
|
|
biased_exp = base100_exp + 64
|
|
if is_positive:
|
|
head_byte = (biased_exp & 0x7F) | 0x80
|
|
out_digits = digit_pairs
|
|
else:
|
|
# Apply asymmetric base-100 complement (mirror of decode).
|
|
complemented = bytearray(digit_pairs)
|
|
sub_from = 100
|
|
for i in range(len(complemented) - 1, -1, -1):
|
|
if complemented[i] == 0 and sub_from == 100:
|
|
continue
|
|
complemented[i] = sub_from - complemented[i]
|
|
sub_from = 99
|
|
# Negative: head byte is biased_exp ^ 0x7F (high bit stays 0)
|
|
head_byte = (biased_exp & 0x7F) ^ 0x7F
|
|
out_digits = bytes(complemented)
|
|
|
|
raw = bytes([head_byte]) + out_digits
|
|
# Precision short for DECIMAL: packed (precision << 8) | scale
|
|
# Precision = total significant digits, scale = digits after point.
|
|
precision = max(n_digits, 1)
|
|
scale = max(0, -exp)
|
|
prec_short = (precision << 8) | (scale & 0xFF)
|
|
return (5, prec_short, raw)
|
|
|
|
|
|
def encode_param(value: object) -> EncodedParam:
|
|
"""Pick an encoder based on the Python value's type.
|
|
|
|
Returns ``(ifx_type, precision_short, raw_bytes)`` for the parameter.
|
|
Returns ``(0, 0, b"")`` and the caller must use indicator=-1 for None.
|
|
"""
|
|
if value is None:
|
|
return (0, 0, b"")
|
|
if isinstance(value, bool): # NB: must come before int (bool is int subclass)
|
|
return _encode_bool(value)
|
|
if isinstance(value, int):
|
|
# Pick INT vs BIGINT based on range.
|
|
if -0x80000000 <= value <= 0x7FFFFFFF:
|
|
return _encode_int(value)
|
|
return _encode_bigint(value)
|
|
if isinstance(value, float):
|
|
return _encode_float(value)
|
|
if isinstance(value, str):
|
|
return _encode_str(value)
|
|
if isinstance(value, decimal.Decimal):
|
|
# _encode_decimal is implemented but the server rejects the
|
|
# bytes (precision packing wrong somewhere) — kept as a
|
|
# Phase 6.x starting point but disabled for now. Workaround:
|
|
# cast Decimal to float at the call site if you need to bind.
|
|
raise NotImplementedError(
|
|
"Decimal parameter binding is Phase 6.x; convert to float "
|
|
"or pass DECIMAL via SQL literal for now"
|
|
)
|
|
raise NotImplementedError(
|
|
f"parameter binding for {type(value).__name__} not yet supported "
|
|
f"(Phase 4 MVP: int, float, str, bool, None)"
|
|
)
|
|
|
|
|
|
# Phase 6+ adds: bytes/Bytes, datetime.date, datetime.datetime, Decimal,
|
|
# datetime.timedelta (INTERVAL), bytearray (BYTE), large strings (LVARCHAR).
|
|
ENCODERS: dict[int, Callable[[object], bytes]] = {}
|