From 270155d2de181e854eb6c300ea2b83169bf4cd76 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Tue, 5 May 2026 12:44:52 -0600 Subject: [PATCH] Phase 36: IfxPy scaling comparison + honest comparison numbers (2026.05.05.9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the IfxPy comparison bench script with scaling workloads (1k/10k/100k rows for both executemany and SELECT). Re-runs the full comparison with consistent measurement methodology and updates the README with the actually-correct numbers. Earlier comparison runs reported informix-db winning all 5 benchmarks. Re-running select_bench_table_all with consistent measurement gives 3.04 ms, not the 891 us I cited earlier - a 3.4x discrepancy attributable to noisy warmup + small-fixture artifacts. The "we win everything" framing was wrong. Corrected comparison reveals two clear stories: Bulk-insert: pure-Python wins 1.6x at scale. executemany(10k): IfxPy 259ms -> us 161ms (1.6x faster) executemany(100k): IfxPy 2376ms -> us 1487ms (1.6x faster) Reason: Phase 33's pipelining eliminates per-row RTT. IfxPy's per-call API can't pipeline. Large-fetch: IfxPy wins 2.3-2.4x at scale. SELECT 1k rows: IfxPy 1.2ms / us 2.7ms (IfxPy 2.3x) SELECT 10k rows: IfxPy 11.3ms / us 25.8ms (IfxPy 2.3x) SELECT 100k rows: IfxPy 112ms / us 271ms (IfxPy 2.4x) Reason: C-level fetch_tuple at ~1.1us/row beats Python parse_tuple_payload at ~2.7us/row. Real C-vs-Python codec gap showing up at scale. For everyday workloads (single SELECT in a request, INSERT a handful of rows), drivers are within 5-25%. For workloads where the gap widens, direction depends on what you're doing - bulk- write favors us, bulk-read favors IfxPy. README's "Compared to IfxPy" section rewritten with the corrected numbers and an honest "when to prefer which" subsection. tests/benchmarks/compare/README.md mirror updated. Net narrative: a "faster at bulk-write, slower at bulk-read, comparable elsewhere" comparison story is more honest and more durable than a "we win everything" claim that would have collapsed the first time a user ran their own benchmark. Side note (lint): one ambiguous unicode `×` in cursors.py replaced with `x`. Phase 37 ticket: parse_tuple_payload is the bottleneck at scale. Closing the 1.6 us/row gap to IfxPy would make us competitive on bulk-fetch too. Possible approaches: Cython codec, deeper inlining, per-column dispatch pre-bake. --- CHANGELOG.md | 43 +++++++++ README.md | 32 +++++-- pyproject.toml | 2 +- src/informix_db/cursors.py | 2 +- tests/benchmarks/compare/README.md | 28 ++++-- tests/benchmarks/compare/ifxpy_bench.py | 110 ++++++++++++++++++++++++ uv.lock | 2 +- 7 files changed, 199 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0f8ae1..46d73a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,49 @@ All notable changes to `informix-db`. Versioning is [CalVer](https://calver.org/) — `YYYY.MM.DD` for date-based releases, `YYYY.MM.DD.N` for same-day post-releases per PEP 440. +## 2026.05.05.9 — IfxPy scaling comparison + honest comparison numbers (Phase 36) + +Adds the IfxPy side of Phase 34's scaling benchmarks (1k / 10k / 100k rows for both `executemany` and `SELECT`) and updates the README's comparison table with the **actually-correct numbers**. + +### What changed + +**1. `tests/benchmarks/compare/ifxpy_bench.py` extended** with `bench_executemany_scaling(n)` and `bench_select_scaling(n)` — same shapes as `test_scaling_perf.py` so the comparison is apples-to-apples. + +**2. README's comparison numbers corrected.** Earlier comparison runs reported `select_bench_table_all` at 891 µs for `informix-db`. Re-running with consistent measurement (warmup + median + 10+ rounds) reports 3.04 ms — a 3.4× discrepancy. The earlier number was probably picked up from a noisy first-run with a different warmup state, or from a benchmark that wasn't fully populating its fixture. **Either way, the "we win all 5 benchmarks" claim was based on inconsistent measurement.** + +**The corrected comparison reveals two clear stories:** + +| Benchmark | IfxPy | informix-db | Result | +|---|---:|---:|---| +| `executemany(1k)` in txn | 23.5 ms | 23.2 ms | tied | +| `executemany(10k)` in txn | 259 ms | **161 ms** | **us 1.6× faster** | +| `executemany(100k)` in txn | 2376 ms | **1487 ms** | **us 1.6× faster** | +| `SELECT 1k rows` | 1.2 ms | 2.7 ms | IfxPy 2.3× faster | +| `SELECT 10k rows` | 11.3 ms | 25.8 ms | IfxPy 2.3× faster | +| `SELECT 100k rows` | 112 ms | 271 ms | IfxPy 2.4× faster | + +**Bulk-insert: pure-Python wins 1.6× at scale** because pipelining (Phase 33) eliminates per-row RTT. IfxPy's `IfxPy.execute(stmt, tuple)` per-call API can't pipeline. + +**Large-fetch: IfxPy wins 2.3-2.4× at scale.** Their C-level `fetch_tuple` decoder runs at ~1.1 µs/row; our `parse_tuple_payload` runs at ~2.7 µs/row. **This is the real C-vs-Python codec cost showing up at scale where it matters.** + +### Why correcting this matters + +A "we win everything" claim that's based on noisy measurements would have collapsed the first time a user ran their own benchmark and got different numbers. Naming the trade-off honestly — "we're faster at bulk write, slower at bulk read, comparable elsewhere" — is the right framing. + +### When to prefer `informix-db` + +- ETL pipelines, log shipping, bulk writes (1.6× faster at scale) +- Containerized / minimal-dependency environments (50 KB wheel vs IfxPy's 92 MB OneDB tarball + libcrypt.so.1 dependency hell) +- Modern Python (works on 3.10–3.14; IfxPy is broken on Python 3.12+) +- Async / FastAPI workloads (we have native async; IfxPy doesn't) + +### When IfxPy may be faster + +- Analytical reporting queries pulling 10k+ rows in a single SELECT +- Workloads where the per-row decode cost dominates (wide rows, tight read loops) + +The actionable takeaway for `informix-db`'s future: the parse_tuple_payload hot path is now the bottleneck at scale. Phase 25's branch reorder shaved 22%; further work (Cython codec? deeper inlining? per-column dispatch pre-bake?) could close the C-vs-Python gap. Tracked as a possible Phase 37+. + ## 2026.05.05.8 — Scaling benchmarks (Phase 34) Adds `tests/benchmarks/test_scaling_perf.py` — parametrized benchmarks that exercise the driver at row counts and column widths well beyond what the existing 1k-row benchmarks cover. The first thing this suite did was catch the NFETCH-loop data-loss bug fixed in Phase 35. diff --git a/README.md b/README.md index 597c999..f6e5edc 100644 --- a/README.md +++ b/README.md @@ -176,17 +176,33 @@ Head-to-head benchmarks against [IfxPy](https://pypi.org/project/IfxPy/) on iden | Benchmark | IfxPy 3.0.5 (C-bound) | `informix-db` (pure Python) | Result | |---|---:|---:|---:| -| Single-row SELECT round-trip | 118 µs | **114 µs** | **`informix-db` 3% faster** | -| ~10-row server-side query | 164 µs | **159 µs** | **`informix-db` 3% faster** | -| 1000-row SELECT (full fetch) | 984 µs | **891 µs** | **`informix-db` 9% faster** | -| **`executemany(1000)` in transaction** | 21.4 ms | **10.4 ms** | **`informix-db` 2.05× faster** | -| Cold connect (login handshake) | 11.0 ms | **10.4 ms** | **`informix-db` 5% faster** | +| Single-row SELECT round-trip | 118 µs | 114 µs | comparable | +| ~10-row server-side query | 130 µs | 159 µs | IfxPy 22% faster | +| Cold connect (login handshake) | 11.0 ms | 10.5 ms | comparable | +| **`executemany(1k)` in transaction** | 23.5 ms | 23.2 ms | tied | +| **`executemany(10k)` in transaction** | 259 ms | **161 ms** | **`informix-db` 1.6× faster** | +| **`executemany(100k)` in transaction** | 2376 ms | **1487 ms** | **`informix-db` 1.6× faster** | +| `SELECT` 1k rows | 1.2 ms | 2.7 ms | IfxPy 2.3× faster | +| `SELECT` 10k rows | 11.3 ms | 25.8 ms | IfxPy 2.3× faster | +| `SELECT` 100k rows | 112 ms | 271 ms | IfxPy 2.4× faster | -**`informix-db` wins on all 5 benchmarks against the C-bound driver, including a 2× win on bulk inserts.** +**The honest summary:** -**Why pure-Python wins the round-trip-bound work:** IfxPy's code path is `Python → OneDB ODBC driver → libifdmr.so → wire`. Ours is `Python → wire`. The abstraction-layer overhead IfxPy carries on every call costs more than the C-vs-Python codec gap saves. +- **Bulk-insert workloads: `informix-db` wins 1.6× at scale.** The pipelined `executemany` (Phase 33) sends all N BIND+EXECUTE PDUs before draining responses, eliminating per-row RTT. IfxPy still pays one round-trip per `IfxPy.execute(stmt, tuple)` call. +- **Large-fetch workloads: IfxPy wins 2.3× at scale.** Their C-level `fetch_tuple` decoder is genuinely faster than our Python `parse_tuple_payload` (~1.1 µs/row vs ~2.7 µs/row). At 100k rows, that 1.6 µs/row gap accumulates into a 160 ms wall-clock difference. +- **Small queries: comparable.** Both spend ~120 µs waiting for the server; the per-call codec cost is small relative to the round-trip. -**Why we win bulk inserts dramatically:** `executemany` pipelines all N BIND+EXECUTE PDUs to the wire before draining responses (Phase 33), eliminating the per-row round-trip that the older serial loop incurred. IfxPy still does one synchronous round-trip per row. +**When to prefer `informix-db`:** +- ETL pipelines, log shipping, bulk writes (1.6× faster at scale) +- Containerized / minimal-dependency environments (50 KB wheel vs IfxPy's 92 MB OneDB tarball + libcrypt.so.1 dependency hell) +- Modern Python (works on 3.10–3.14; IfxPy is broken on Python 3.12+) +- Async / FastAPI workloads (we have native async; IfxPy doesn't) + +**When IfxPy may be faster:** +- Analytical reporting queries pulling 10k+ rows in a single SELECT +- Workloads where the per-row decode cost dominates (wide rows, tight read loops) + +These results are reproducible from `tests/benchmarks/compare/` — the Dockerfile, bench script, and README walk through every step. Full methodology, IQR caveats, install gauntlet, and reproduction in [`tests/benchmarks/compare/README.md`](tests/benchmarks/compare/README.md). diff --git a/pyproject.toml b/pyproject.toml index 739b9c9..9b227df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "informix-db" -version = "2026.05.05.8" +version = "2026.05.05.9" description = "Pure-Python driver for IBM Informix IDS — speaks the SQLI wire protocol over raw sockets. No CSDK, no JVM, no native libraries." readme = "README.md" license = { text = "MIT" } diff --git a/src/informix_db/cursors.py b/src/informix_db/cursors.py index 1b990ef..de216f1 100644 --- a/src/informix_db/cursors.py +++ b/src/informix_db/cursors.py @@ -401,7 +401,7 @@ class Cursor: # Phase 35: NFETCH loop — keep fetching until a response yields # zero new tuples. The previous "two NFETCHes" pattern silently # truncated any result set whose tuples didn't fit in 1-2 server - # batches (~200 rows at default 4096-byte buffer × 5-col rows). + # batches (~200 rows at default 4096-byte buffer x 5-col rows). # This bug was latent for ~30 phases because no test used a # large enough result set to trigger it. self._conn._send_pdu(self._build_curname_nfetch_pdu(cursor_name)) diff --git a/tests/benchmarks/compare/README.md b/tests/benchmarks/compare/README.md index a7e701d..bf6cbee 100644 --- a/tests/benchmarks/compare/README.md +++ b/tests/benchmarks/compare/README.md @@ -4,19 +4,29 @@ Head-to-head benchmarks against [IfxPy](https://pypi.org/project/IfxPy/), the IB ## TL;DR -Using **median + IQR over 10+ rounds** (mean was unreliable on the slow benchmarks — see "Statistical robustness" below): +Using **median + IQR over 10+ rounds** (mean was unreliable on the slow benchmarks — see "Statistical robustness" below). Phase 36 added scaling benchmarks at 1k / 10k / 100k rows so the comparison shape is clearer: -| Benchmark | IfxPy 3.0.5 (C-bound) | informix-db (pure Python) | Result | +| Benchmark | IfxPy 3.0.5 | informix-db | Result | |---|---:|---:|---:| -| `select_one_row` (single-row latency) | 118 µs | **114 µs** | **`informix-db` 3% faster** | -| `select_systables_first_10` (~10 rows) | 164 µs | **159 µs** | **`informix-db` 3% faster** | -| `select_bench_table_all` (1000-row fetch) | 984 µs | **891 µs** | **`informix-db` 9% faster** | -| **`executemany(1000)` in transaction (bulk write)** | 21.4 ms | **10.4 ms** | **`informix-db` 2.05× faster** | -| `cold_connect_disconnect` (login handshake) | 11.0 ms | **10.4 ms** | **`informix-db` 5% faster** | +| `select_one_row` | 118 µs | 114 µs | comparable | +| `select_systables_first_10` | 130 µs | 159 µs | IfxPy 22% faster | +| `cold_connect_disconnect` | 11.0 ms | 10.5 ms | comparable | +| **`executemany(1k)` in txn** | 23.5 ms | 23.2 ms | tied | +| **`executemany(10k)` in txn** | 259 ms | **161 ms** | **`informix-db` 1.6× faster** | +| **`executemany(100k)` in txn** | 2376 ms | **1487 ms** | **`informix-db` 1.6× faster** | +| `SELECT 1k rows` | 1.2 ms | 2.7 ms | IfxPy 2.3× faster | +| `SELECT 10k rows` | 11.3 ms | 25.8 ms | IfxPy 2.3× faster | +| `SELECT 100k rows` | 112 ms | 271 ms | IfxPy 2.4× faster | -**`informix-db` wins all 5 benchmarks against the C-bound driver, including a 2× win on bulk inserts.** +**Two clear stories:** -The bulk-insert win comes from Phase 33's pipelined `executemany`: all N BIND+EXECUTE PDUs are sent to the wire before any response is drained, eliminating the per-row round-trip latency that the older serial loop (and IfxPy's per-call API) incur. The wire-alignment assumption that makes this safe — that Informix sends exactly N responses for N pipelined PDUs even when one row fails — is verified by `tests/test_executemany_pipeline.py` (constraint violation at row 0/100, 99/100, 500/1000). +**1. Bulk insert: `informix-db` wins 1.6× at scale.** The pipelined `executemany` (Phase 33) sends all N BIND+EXECUTE PDUs to the wire before draining responses, eliminating per-row RTT. IfxPy still pays one synchronous round-trip per `IfxPy.execute(stmt, tuple)` call — that's ~24 µs/row regardless of N. We pay ~15 µs/row at scale (the prepare/release overhead amortizes better at larger N). + +**2. Large fetch: IfxPy wins 2.3-2.4× at scale.** Their C-level `fetch_tuple` decoder runs at ~1.1 µs/row; our pure-Python `parse_tuple_payload` runs at ~2.7 µs/row. At 100k rows, the 1.6 µs/row gap accumulates into a 160 ms wall-clock difference. **This is the C-vs-Python codec cost showing up at scale, where it actually matters.** + +For everyday-application workloads (single SELECT in a request, INSERT a handful of rows, transactional UPDATE), the two drivers are within 5-25% of each other. For the workloads where the gap widens, the direction depends on what you're doing — bulk-write favors us, bulk-read favors IfxPy. + +**The wire-alignment assumption** that makes pipelined `executemany` safe — that Informix sends exactly N responses for N pipelined PDUs even when one row fails — is verified by `tests/test_executemany_pipeline.py` (constraint violation at row 0/100, 99/100, 500/1000). ## Statistical robustness — why median, not mean diff --git a/tests/benchmarks/compare/ifxpy_bench.py b/tests/benchmarks/compare/ifxpy_bench.py index 094f25b..eee102e 100644 --- a/tests/benchmarks/compare/ifxpy_bench.py +++ b/tests/benchmarks/compare/ifxpy_bench.py @@ -171,6 +171,107 @@ def bench_cold_connect_disconnect() -> dict: return measure("cold_connect_disconnect", ROUNDS_SLOW, run) +# ---------------------------------------------------------------------------- +# Phase 36 — scaling benchmarks (matched to test_scaling_perf.py) +# ---------------------------------------------------------------------------- + + +def bench_executemany_scaling(n_rows: int) -> dict: + """N-row insert in a single transaction. IfxPy doesn't pipeline — + each ``IfxPy.execute(stmt, params)`` is a synchronous round-trip + to the server. So per-row cost is roughly constant in N.""" + rounds_for = {1_000: 10, 10_000: 5, 100_000: 3} + name = f"executemany_scaling_{n_rows}" + try: + conn = IfxPy.connect( + CONN_STR.replace("DATABASE=sysmaster", "DATABASE=testdb"), "", "" + ) + except Exception as e: + return {"name": name, "skipped": f"testdb: {e}"} + IfxPy.autocommit(conn, IfxPy.SQL_AUTOCOMMIT_OFF) + + table = f"p36_em_{n_rows}" + try: + try: + IfxPy.exec_immediate(conn, f"DROP TABLE {table}") + IfxPy.commit(conn) + except Exception: + pass + IfxPy.exec_immediate( + conn, f"CREATE TABLE {table} (id INT, name VARCHAR(64), value FLOAT)" + ) + IfxPy.commit(conn) + + counter = [0] + + def run() -> None: + counter[0] += 1 + base = counter[0] * n_rows + stmt = IfxPy.prepare( + conn, f"INSERT INTO {table} VALUES (?, ?, ?)" + ) + for i in range(n_rows): + IfxPy.execute(stmt, (base + i, f"row_{base + i}", float(base + i))) + IfxPy.free_stmt(stmt) + IfxPy.commit(conn) + + return measure(name, rounds_for[n_rows], run) + finally: + try: + IfxPy.exec_immediate(conn, f"DROP TABLE {table}") + IfxPy.commit(conn) + except Exception: + pass + IfxPy.close(conn) + + +def bench_select_scaling(n_rows: int) -> dict: + """SELECT FIRST N from the pre-populated 100k-row p34_select table. + Tests IfxPy's per-row fetch cost at scale; should be roughly linear + in N like ours.""" + rounds_for = {1_000: 10, 10_000: 5, 100_000: 3} + name = f"select_scaling_{n_rows}" + + try: + conn = IfxPy.connect( + CONN_STR.replace("DATABASE=sysmaster", "DATABASE=testdb"), "", "" + ) + except Exception as e: + return {"name": name, "skipped": f"testdb: {e}"} + try: + # Probe: does p34_select exist? + try: + stmt = IfxPy.exec_immediate(conn, "SELECT COUNT(*) FROM p34_select") + row = IfxPy.fetch_tuple(stmt) + IfxPy.free_stmt(stmt) + available = int(row[0]) + if available < n_rows: + return {"name": name, "skipped": ( + f"p34_select has only {available} rows; " + "run informix-db scaling benchmarks first to seed " + "the table" + )} + except Exception as e: + return {"name": name, "skipped": f"p34_select missing: {e}"} + + def run() -> None: + stmt = IfxPy.exec_immediate( + conn, f"SELECT FIRST {n_rows} * FROM p34_select" + ) + count = 0 + while IfxPy.fetch_tuple(stmt): + count += 1 + IfxPy.free_stmt(stmt) + if count != n_rows: + raise RuntimeError( + f"expected {n_rows} rows, got {count}" + ) + + return measure(name, rounds_for[n_rows], run) + finally: + IfxPy.close(conn) + + def main() -> None: print("# IfxPy benchmark results", file=sys.stderr) print(f"# IfxPy version: {IfxPy.__version__ if hasattr(IfxPy, '__version__') else 'unknown'}", file=sys.stderr) @@ -187,6 +288,15 @@ def main() -> None: results.append(bench_executemany_1000_rows_in_txn()) results.append(bench_cold_connect_disconnect()) + # Phase 36 — scaling comparison. Skip 100k cases when --short is + # passed (e.g., for fast smoke runs); otherwise run all sizes. + short = "--short" in sys.argv + sizes = [1_000, 10_000] if short else [1_000, 10_000, 100_000] + for n in sizes: + results.append(bench_executemany_scaling(n)) + for n in sizes: + results.append(bench_select_scaling(n)) + # Emit machine-parseable lines on stdout. Reporting median (not # mean) and IQR (not stddev) so a single outlier round can't # dominate the comparison numbers — mirrors pytest-benchmark's diff --git a/uv.lock b/uv.lock index 831df8a..39e346e 100644 --- a/uv.lock +++ b/uv.lock @@ -34,7 +34,7 @@ wheels = [ [[package]] name = "informix-db" -version = "2026.5.5.6" +version = "2026.5.5.8" source = { editable = "." } [package.optional-dependencies]