From 7f729b3a380cc2b7a3c36366f2f1a5f8b84e789a Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Tue, 5 May 2026 13:50:40 -0600 Subject: [PATCH] Phase 37: Pre-baked per-column reader strategy (2026.05.05.10) Closes some of the C-vs-Python codec gap on bulk fetch by moving per-column dispatch decisions from row time to parse_describe time. Same approach psycopg3 uses in its pure-Python mode (loader cache per column). What changed: _resultset.py: * New compile_column_readers(columns) builds a per-column dispatch tuple at parse_describe time. Each tuple is (kind, *args) where kind is a small int (FIXED/BYTE_PREFIX/CHAR/LVARCHAR/DECIMAL/ DATETIME/INTERVAL/LEGACY). * parse_tuple_payload accepts optional readers= parameter. Fast path uses int comparison + tuple unpack instead of the legacy frozenset/dict-lookup chain. * _legacy_dispatch_one_column factored out to handle rare types (UDT/composite/UDTVAR) that fall through. cursors.py: * Cursor caches self._column_readers after parse_describe, computed once via compile_column_readers. Reset on new execute. * Fetch loop passes readers=self._column_readers. Performance (median of 10+ rounds): select_scaling[1000]: 2.7 ms -> 2.51 ms (-7%) select_scaling[10000]: 25.8 ms -> 25.0 ms (-3%) select_scaling[100000]: 271 ms -> 246 ms (-9%) wide_row_select[5]: 2.4 ms -> 2.16 ms (-10%) wide_row_select[20]: 5.1 ms -> 4.14 ms (-19%) wide_row_select[50]: 10.1 ms -> 8.21 ms (-19%) wide_row_select[100]: 19.4 ms -> 14.6 ms (-25%) Wide-row workloads benefit most - per-column dispatch savings accumulate linearly with column count. At 100 cols, 25% speedup. IfxPy gap shrinks from ~2.4x to ~2.2x on bulk fetch. Real progress but not closing-the-gap. Next lever is exec()-based codegen (per-result-set decoder function) - possible Phase 38. 221 integration tests still pass. Benchmark suite acts as regression test. Architectural note: chose tuple dispatch (r[0] int compare) over object-method dispatch (loader.load(data)) for ~20-30 ns/col speed advantage in the inner loop. Slightly less extensible than psycopg3's class-based loaders but materially faster in pure Python. --- CHANGELOG.md | 45 +++++++ pyproject.toml | 2 +- src/informix_db/_resultset.py | 235 ++++++++++++++++++++++++++++++++++ src/informix_db/cursors.py | 22 +++- 4 files changed, 301 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46d73a9..2792c3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,51 @@ All notable changes to `informix-db`. Versioning is [CalVer](https://calver.org/) — `YYYY.MM.DD` for date-based releases, `YYYY.MM.DD.N` for same-day post-releases per PEP 440. +## 2026.05.05.10 — Phase 37: Pre-baked per-column reader strategy + +Closes some of the C-vs-Python codec gap on bulk fetch by moving per-column dispatch decisions from row time to `parse_describe` time. Same idea as psycopg3's pure-Python loader-cache pattern. + +### What changed + +`src/informix_db/_resultset.py`: +- New `compile_column_readers(columns)` returns a list of pre-computed dispatch tuples — one per column. Each tuple is `(kind, *args)` where `kind` is a small int identifying the reader strategy. +- `parse_tuple_payload` accepts an optional `readers=` parameter. When provided, the hot loop dispatches on the integer kind (one int comparison per column) instead of running the legacy frozenset/dict-lookup chain. +- Common types (`FIXED`, `BYTE_PREFIX`, `CHAR`, `LVARCHAR`, `DECIMAL`, `DATETIME`, `INTERVAL`) get pre-compiled fast paths. Rare types (UDT/composite) tagged `_RK_LEGACY` and fall through to a `_legacy_dispatch_one_column` helper. + +`src/informix_db/cursors.py`: +- `Cursor` now stores `self._column_readers` after `parse_describe`, computed once via `compile_column_readers`. Reset on each new `execute`. +- The fetch loop passes `readers=self._column_readers` to `parse_tuple_payload`. + +### Performance + +Real numbers from the integration container, median of 10+ rounds: + +| Benchmark | Before | After | Δ | +|---|---:|---:|---:| +| `select_scaling[1000]` | 2.7 ms | 2.51 ms | -7% | +| `select_scaling[10000]` | 25.8 ms | 25.0 ms | -3% | +| `select_scaling[100000]` | 271 ms | 246 ms | **-9%** | +| `wide_row_select[5]` | 2.4 ms | 2.16 ms | -10% | +| `wide_row_select[20]` | 5.1 ms | 4.14 ms | -19% | +| `wide_row_select[50]` | 10.1 ms | 8.21 ms | -19% | +| `wide_row_select[100]` | 19.4 ms | 14.6 ms | **-25%** | + +**Wide-row workloads benefit most** — per-column dispatch savings accumulate linearly with column count. At 100 columns the speedup is 25%; at 5 columns it's 10%. + +### Honest assessment + +Less than the ~30% I projected. The actual per-row cost is dominated by decoder bodies and slice operations more than I estimated; pre-baking the dispatch only saved ~50-100 ns/col instead of the 150-200 ns I'd hoped for. + +The IfxPy gap shrinks from ~2.4× to ~2.2× on bulk fetch. Real progress, but not closing-the-gap territory. **The next lever for materially closing the gap is `exec()`-based codegen** (build a row-decoder function per result-set shape; eliminates per-column iteration overhead entirely). Possible Phase 38. + +### Architectural note + +This is the same pattern psycopg3 uses in its pure-Python mode: cache loaders per column at execute time, dispatch via lookup in the hot loop. We pick tuple-dispatch over object-method dispatch (`r[0]` int compare vs. `loader.load(data)`) for raw speed in the inner loop — slightly less extensible but ~20-30 ns faster per column. + +### Tests + +All 221 integration tests still pass. No new test code; the benchmark suite acts as the regression test (parse_tuple_5cols / select_scaling / wide_row_select). + ## 2026.05.05.9 — IfxPy scaling comparison + honest comparison numbers (Phase 36) Adds the IfxPy side of Phase 34's scaling benchmarks (1k / 10k / 100k rows for both `executemany` and `SELECT`) and updates the README's comparison table with the **actually-correct numbers**. diff --git a/pyproject.toml b/pyproject.toml index 9b227df..e6d7fb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "informix-db" -version = "2026.05.05.9" +version = "2026.05.05.10" description = "Pure-Python driver for IBM Informix IDS — speaks the SQLI wire protocol over raw sockets. No CSDK, no JVM, no native libraries." readme = "README.md" license = { text = "MIT" } diff --git a/src/informix_db/_resultset.py b/src/informix_db/_resultset.py index c4cf511..0e217e6 100644 --- a/src/informix_db/_resultset.py +++ b/src/informix_db/_resultset.py @@ -26,6 +26,7 @@ from types import MappingProxyType from ._protocol import IfxStreamReader from ._types import IfxType, base_type, is_nullable from .converters import ( + DECODERS, FIXED_WIDTHS, BlobLocator, ClobLocator, @@ -234,10 +235,158 @@ _NUMERIC_TYPES = frozenset({_TC_DECIMAL, _TC_MONEY}) _FIXED_WIDTH_TYPES = frozenset(FIXED_WIDTHS.keys()) +# Phase 37 — per-column reader strategy. +# +# parse_tuple_payload's hot loop used to evaluate the same dispatch +# decisions per column per row: "is this a fixed-width type? a +# length-prefixed string? what's the decoder?" Those decisions only +# depend on column metadata, not row data — so we make them ONCE at +# parse_describe time and emit a per-column tuple the hot loop can +# dispatch on with a single integer comparison. +# +# Reader-strategy kinds (the first element of each compiled tuple). +# Tuple shapes are documented at each kind's compile branch in +# ``compile_column_readers`` below. Common types (covering >95% of +# real-world workloads) get pre-compiled; rare types fall through +# to the legacy dispatch in parse_tuple_payload. +_RK_FIXED = 0 # (kind, width, decoder) — INT/FLOAT/DATE/etc. +_RK_BYTE_PREFIX = 1 # (kind, decoder) — VARCHAR/NCHAR/NVCHAR +_RK_CHAR = 2 # (kind, width, decoder) — fixed-width CHAR +_RK_LVARCHAR = 3 # (kind, decoder) — LVARCHAR (4-byte prefix) +_RK_DECIMAL = 4 # (kind, width, decoder) — DECIMAL/MONEY +_RK_DATETIME = 5 # (kind, width, encoded_length) — DATETIME (uses _decode_datetime) +_RK_INTERVAL = 6 # (kind, width, encoded_length) — INTERVAL (uses _decode_interval) +_RK_LEGACY = 7 # (kind, type_code) — fall through to original dispatch + + +def compile_column_readers(columns: list[ColumnInfo]) -> list[tuple]: + """Compile a per-column reader strategy. + + Phase 37: replaces the per-row branch-dispatch in + ``parse_tuple_payload`` with a one-shot compilation pass at + ``parse_describe`` time. Each column gets a tuple the hot loop + dispatches on with a single int comparison. + + Common types (~95% of real workloads) get pre-compiled fast + paths. Rare types (UDT/composite/CHAR-with-truncation/etc.) + are tagged ``_RK_LEGACY`` and fall through to the legacy + dispatch — preserves correctness on every shape we've seen + while accelerating the hot path. + """ + readers: list[tuple] = [] + for col in columns: + tc = col.type_code + + if tc in _FIXED_WIDTH_TYPES: + readers.append((_RK_FIXED, FIXED_WIDTHS[tc], DECODERS[tc])) + continue + + if tc == _TC_CHAR: + readers.append((_RK_CHAR, col.encoded_length, DECODERS[tc])) + continue + + if tc in _LENGTH_PREFIXED_SHORT_TYPES: + # VARCHAR / NCHAR / NVCHAR — CHAR was already excluded above. + readers.append((_RK_BYTE_PREFIX, DECODERS[tc])) + continue + + if tc == _TC_LVARCHAR: + readers.append((_RK_LVARCHAR, DECODERS[tc])) + continue + + if tc in _NUMERIC_TYPES: + precision = (col.encoded_length >> 8) & 0xFF + width = (precision + 1) // 2 + 1 + readers.append((_RK_DECIMAL, width, DECODERS[tc])) + continue + + if tc == _TC_DATETIME: + digit_count = (col.encoded_length >> 8) & 0xFF + width = (digit_count + 1) // 2 + 1 + readers.append((_RK_DATETIME, width, col.encoded_length)) + continue + + if tc == _TC_INTERVAL: + digit_count = (col.encoded_length >> 8) & 0xFF + width = (digit_count + 1) // 2 + 1 + readers.append((_RK_INTERVAL, width, col.encoded_length)) + continue + + # UDT / composite / unknown — let the legacy dispatch handle it. + readers.append((_RK_LEGACY, tc)) + + return readers + + +def _legacy_dispatch_one_column( + payload: bytes, + offset: int, + tc: int, + col: ColumnInfo, + encoding: str, +) -> tuple[int, object]: + """Phase 37 fallback for rare types not covered by the pre-compiled + reader strategies (UDTFIXED, COMPOSITE UDT, UDTVAR-lvarchar, unknown). + + Mirrors the corresponding branches of the legacy ``parse_tuple_payload`` + dispatch chain but for one column at a time. Returns ``(new_offset, + decoded_value)``. + """ + # BLOB / CLOB locator (UDTFIXED + extended_id 10/11) + if tc == _TC_UDTFIXED and col.extended_id in (10, 11): + width = col.encoded_length + raw = payload[offset:offset + width] + offset += width + cls = BlobLocator if col.extended_id == 10 else ClobLocator + return offset, cls(raw=bytes(raw)) + + # ROW / COLLECTION composite UDT + if tc in _COMPOSITE_UDT_TYPES: + indicator = payload[offset] + offset += 1 + if indicator == 1: + return offset, None + length = int.from_bytes(payload[offset:offset + 4], "big", signed=True) + offset += 4 + raw = bytes(payload[offset:offset + length]) + offset += length + if tc == _TC_ROW: + return offset, RowValue(raw=raw, schema=col.extended_name) + return offset, CollectionValue( + raw=raw, + kind=_COLLECTION_KIND_MAP[tc], + element_schema=col.extended_name, + ) + + # UDTVAR with extended_name=lvarchar (e.g., result of lotofile()) + if tc == _TC_UDTVAR and col.extended_name == "lvarchar": + indicator = payload[offset] + offset += 1 + if indicator == 1: + return offset, None + length = int.from_bytes(payload[offset:offset + 4], "big", signed=True) + offset += 4 + raw = payload[offset:offset + length] + offset += length + if length & 1: + offset += 1 + return offset, raw.decode(encoding) + + # Unknown — surface ``encoded_length`` bytes raw. + width = col.encoded_length + raw = payload[offset:offset + width] + offset += width + try: + return offset, _decode_base(tc, raw, encoding) + except NotImplementedError: + return offset, raw + + def parse_tuple_payload( reader: IfxStreamReader, columns: list[ColumnInfo], encoding: str = "iso-8859-1", + readers: list[tuple] | None = None, ) -> tuple: """Parse a SQ_TUPLE payload (the SQ_TUPLE tag is already consumed). @@ -272,6 +421,92 @@ def parse_tuple_payload( values: list[object] = [] offset = 0 + + # Phase 37 fast path: if the caller pre-compiled a reader-strategy + # list, dispatch on the integer kind for each column. The compile + # step (``compile_column_readers``) made the per-column decisions + # ONCE; this loop just executes them. Common types (FIXED, BYTE_PREFIX, + # CHAR, LVARCHAR, DECIMAL, DATETIME, INTERVAL) get pre-baked tuples; + # rare types fall through to the legacy branch chain via _RK_LEGACY. + if readers is not None: + for r in readers: + kind = r[0] + + if kind == _RK_FIXED: + _, width, decoder = r + raw = payload[offset:offset + width] + offset += width + values.append(decoder(raw)) + continue + + if kind == _RK_BYTE_PREFIX: + _, decoder = r + length = payload[offset] + offset += 1 + raw = payload[offset:offset + length] + offset += length + values.append(decoder(raw, encoding)) + continue + + if kind == _RK_CHAR: + _, width, decoder = r + raw = payload[offset:offset + width] + offset += width + values.append(decoder(raw, encoding)) + continue + + if kind == _RK_LVARCHAR: + _, decoder = r + length = int.from_bytes( + payload[offset:offset + 4], "big", signed=True + ) + offset += 4 + raw = payload[offset:offset + length] + offset += length + if length & 1: + offset += 1 + values.append(decoder(raw, encoding)) + continue + + if kind == _RK_DECIMAL: + _, width, decoder = r + raw = payload[offset:offset + width] + offset += width + try: + values.append(decoder(raw)) + except NotImplementedError: + values.append(raw) + continue + + if kind == _RK_DATETIME: + _, width, enc_len = r + raw = payload[offset:offset + width] + offset += width + values.append(_decode_datetime(raw, enc_len)) + continue + + if kind == _RK_INTERVAL: + _, width, enc_len = r + raw = payload[offset:offset + width] + offset += width + values.append(_decode_interval(raw, enc_len)) + continue + + # _RK_LEGACY — rare type, fall back to the original dispatch. + # Find the matching ColumnInfo (parallel index) and run the + # legacy branch chain by recursing into the slow path. We + # do this by setting ``readers = None`` and breaking out; + # but since we're mid-loop, simpler: run the legacy code + # inline via a helper. + tc = r[1] + col = columns[len(values)] # parallel index — values has one entry per processed col + offset, value = _legacy_dispatch_one_column( + payload, offset, tc, col, encoding + ) + values.append(value) + return tuple(values) + + # Legacy slow path (no pre-compiled readers). # Note: ``col.type_code`` is *already* base-typed by ``parse_describe`` # (see INVARIANT comment there), so we don't re-strip high-bit flags # here. The original code called ``base_type(col.type_code)`` per diff --git a/src/informix_db/cursors.py b/src/informix_db/cursors.py index de216f1..33f185c 100644 --- a/src/informix_db/cursors.py +++ b/src/informix_db/cursors.py @@ -30,7 +30,12 @@ from typing import TYPE_CHECKING, Any from . import _errcodes from ._messages import MessageType from ._protocol import IfxStreamReader, make_pdu_writer -from ._resultset import ColumnInfo, parse_describe, parse_tuple_payload +from ._resultset import ( + ColumnInfo, + compile_column_readers, + parse_describe, + parse_tuple_payload, +) from .converters import encode_param from .exceptions import ( DatabaseError, @@ -186,6 +191,7 @@ class Cursor: self._scrollable = scrollable self._description: list[tuple] | None = None self._columns: list[ColumnInfo] = [] + self._column_readers: list[tuple] | None = None # Phase 37 self._rowcount: int = -1 self._rows: list[tuple] = [] # Phase 17: index-based row access enables scroll cursors. The @@ -306,6 +312,7 @@ class Cursor: # Reset previous-execute state. self._description = None self._columns = [] + self._column_readers = None # Phase 37 self._rowcount = -1 self._rows = [] self._row_index = -1 # before-first-row @@ -900,6 +907,7 @@ class Cursor: # Reset per-execute state. self._description = None self._columns = [] + self._column_readers = None # Phase 37 self._rowcount = -1 self._rows = [] self._row_index = -1 @@ -1538,6 +1546,13 @@ class Cursor: self._description = ( [c.to_description_tuple() for c in self._columns] if self._columns else None ) + # Phase 37: pre-compile per-column reader strategy. The hot + # row-decode loop in parse_tuple_payload uses this to avoid + # re-running per-row dispatch decisions that depend only + # on column metadata. + self._column_readers = ( + compile_column_readers(self._columns) if self._columns else None + ) elif tag == 94: # SQ_INSERTDONE — Informix optimization: literal # INSERT executed during PREPARE. Payload is: # readLongInt (10 bytes) — serial8 inserted @@ -1567,7 +1582,10 @@ class Cursor: return elif tag == MessageType.SQ_TUPLE: row = parse_tuple_payload( - reader, self._columns, encoding=self._conn.encoding + reader, + self._columns, + encoding=self._conn.encoding, + readers=self._column_readers, ) self._rows.append(row) elif tag == MessageType.SQ_DONE: