Ryan Malloy 270155d2de Phase 36: IfxPy scaling comparison + honest comparison numbers (2026.05.05.9)
Extends the IfxPy comparison bench script with scaling workloads
(1k/10k/100k rows for both executemany and SELECT). Re-runs the
full comparison with consistent measurement methodology and updates
the README with the actually-correct numbers.

Earlier comparison runs reported informix-db winning all 5
benchmarks. Re-running select_bench_table_all with consistent
measurement gives 3.04 ms, not the 891 us I cited earlier - a
3.4x discrepancy attributable to noisy warmup + small-fixture
artifacts. The "we win everything" framing was wrong.

Corrected comparison reveals two clear stories:

Bulk-insert: pure-Python wins 1.6x at scale.
  executemany(10k):  IfxPy 259ms  -> us 161ms (1.6x faster)
  executemany(100k): IfxPy 2376ms -> us 1487ms (1.6x faster)
Reason: Phase 33's pipelining eliminates per-row RTT. IfxPy's
per-call API can't pipeline.

Large-fetch: IfxPy wins 2.3-2.4x at scale.
  SELECT 1k rows:   IfxPy 1.2ms  / us 2.7ms (IfxPy 2.3x)
  SELECT 10k rows:  IfxPy 11.3ms / us 25.8ms (IfxPy 2.3x)
  SELECT 100k rows: IfxPy 112ms  / us 271ms (IfxPy 2.4x)
Reason: C-level fetch_tuple at ~1.1us/row beats Python
parse_tuple_payload at ~2.7us/row. Real C-vs-Python codec gap
showing up at scale.

For everyday workloads (single SELECT in a request, INSERT a
handful of rows), drivers are within 5-25%. For workloads where
the gap widens, direction depends on what you're doing - bulk-
write favors us, bulk-read favors IfxPy.

README's "Compared to IfxPy" section rewritten with the corrected
numbers and an honest "when to prefer which" subsection.
tests/benchmarks/compare/README.md mirror updated.

Net narrative: a "faster at bulk-write, slower at bulk-read,
comparable elsewhere" comparison story is more honest and more
durable than a "we win everything" claim that would have collapsed
the first time a user ran their own benchmark.

Side note (lint): one ambiguous unicode `×` in cursors.py replaced
with `x`.

Phase 37 ticket: parse_tuple_payload is the bottleneck at scale.
Closing the 1.6 us/row gap to IfxPy would make us competitive on
bulk-fetch too. Possible approaches: Cython codec, deeper inlining,
per-column dispatch pre-bake.
2026-05-05 12:44:52 -06:00

318 lines
10 KiB
Python

"""IfxPy comparison benchmark.
Runs the same workloads as ``tests/benchmarks/test_*_perf.py`` against
the same dev-container Informix instance, but using IfxPy (the C-bound
PyPI driver) instead of ``informix-db``. Numbers go straight to stdout;
the host parses them and produces a side-by-side table.
Workloads:
* ``select_one_row`` — single-row SELECT round-trip latency
* ``select_systables_first_10`` — small server-side query
* ``select_bench_table_all`` — 1k-row sustained fetch
* ``executemany_1000_rows_in_txn`` — bulk INSERT throughput
* ``cold_connect_disconnect`` — login handshake cost
Each workload runs N times; we report mean and stddev.
"""
from __future__ import annotations
import statistics
import sys
import time
from collections.abc import Callable
import IfxPy
# Connect string — mirrors the conftest.py defaults the host uses.
CONN_STR = (
"SERVER=informix;"
"DATABASE=sysmaster;"
"HOST=127.0.0.1;"
"SERVICE=9088;"
"UID=informix;"
"PWD=in4mix;"
"PROTOCOL=onsoctcp"
)
ROUNDS_FAST = 100 # for sub-millisecond ops
ROUNDS_MED = 20 # for 1-100ms ops
ROUNDS_SLOW = 10 # for >1s ops; bumped from 3 in Tier 1 — the smaller
# sample produced unreliable means (cold-connect's stddev was 4.98 ms
# across 3 rounds; with 10 rounds the median is stable run-to-run).
def measure(name: str, rounds: int, body: Callable[[], None]) -> dict:
"""Run ``body`` ``rounds`` times; return median + IQR in seconds.
Median is more robust than mean against single-round outliers (GC
pauses, server scheduler hiccups). IQR (interquartile range) is
a noise estimator that also resists outliers — much better than
stddev when one bad round can dominate.
"""
timings: list[float] = []
for _ in range(rounds):
t0 = time.perf_counter()
body()
t1 = time.perf_counter()
timings.append(t1 - t0)
timings.sort()
median_s = timings[len(timings) // 2]
q1 = timings[len(timings) // 4]
q3 = timings[(3 * len(timings)) // 4]
return {
"name": name,
"rounds": rounds,
"median_s": median_s,
"iqr_s": q3 - q1,
"min_s": timings[0],
"max_s": timings[-1],
"mean_s": statistics.mean(timings), # kept for cross-checking
"stddev_s": statistics.stdev(timings) if len(timings) > 1 else 0.0,
}
def bench_select_one_row(conn) -> dict:
def run() -> None:
stmt = IfxPy.exec_immediate(
conn, "SELECT 1 FROM systables WHERE tabid = 1"
)
IfxPy.fetch_tuple(stmt)
IfxPy.free_stmt(stmt)
return measure("select_one_row", ROUNDS_FAST, run)
def bench_select_systables_first_10(conn) -> dict:
def run() -> None:
stmt = IfxPy.exec_immediate(
conn,
"SELECT FIRST 10 tabname, owner, tabid, ncols FROM systables",
)
while IfxPy.fetch_tuple(stmt):
pass
IfxPy.free_stmt(stmt)
return measure("select_systables_first_10", ROUNDS_FAST, run)
def bench_select_bench_table_all(conn) -> dict:
"""Requires p21_bench table to exist (created by host-side fixture)."""
# Probe whether the table exists; if not, skip
try:
stmt = IfxPy.exec_immediate(conn, "SELECT COUNT(*) FROM p21_bench")
row = IfxPy.fetch_tuple(stmt)
IfxPy.free_stmt(stmt)
if not row or row[0] == 0:
return {"name": "select_bench_table_all", "skipped": "p21_bench empty"}
except Exception as e:
return {"name": "select_bench_table_all", "skipped": f"p21_bench missing: {e}"}
def run() -> None:
stmt = IfxPy.exec_immediate(conn, "SELECT * FROM p21_bench")
while IfxPy.fetch_tuple(stmt):
pass
IfxPy.free_stmt(stmt)
return measure("select_bench_table_all", ROUNDS_MED, run)
def bench_executemany_1000_rows_in_txn() -> dict:
"""Open a connection on testdb, autocommit OFF, executemany 1000."""
try:
conn = IfxPy.connect(
CONN_STR.replace("DATABASE=sysmaster", "DATABASE=testdb"), "", ""
)
except Exception as e:
return {"name": "executemany_1000_rows_in_txn", "skipped": f"testdb: {e}"}
IfxPy.autocommit(conn, IfxPy.SQL_AUTOCOMMIT_OFF)
table = "p21_ifxpy_bench"
try:
try:
IfxPy.exec_immediate(conn, f"DROP TABLE {table}")
IfxPy.commit(conn)
except Exception:
pass
IfxPy.exec_immediate(
conn, f"CREATE TABLE {table} (id INT, name VARCHAR(64), value FLOAT)"
)
IfxPy.commit(conn)
counter = [0]
def run() -> None:
counter[0] += 1
base = counter[0] * 1000
stmt = IfxPy.prepare(
conn, f"INSERT INTO {table} VALUES (?, ?, ?)"
)
for i in range(1000):
IfxPy.execute(stmt, (base + i, f"row_{base + i}", float(base + i)))
IfxPy.free_stmt(stmt)
IfxPy.commit(conn)
result = measure("executemany_1000_rows_in_txn", ROUNDS_SLOW, run)
return result
finally:
try:
IfxPy.exec_immediate(conn, f"DROP TABLE {table}")
IfxPy.commit(conn)
except Exception:
pass
IfxPy.close(conn)
def bench_cold_connect_disconnect() -> dict:
def run() -> None:
conn = IfxPy.connect(CONN_STR, "", "")
IfxPy.close(conn)
return measure("cold_connect_disconnect", ROUNDS_SLOW, run)
# ----------------------------------------------------------------------------
# Phase 36 — scaling benchmarks (matched to test_scaling_perf.py)
# ----------------------------------------------------------------------------
def bench_executemany_scaling(n_rows: int) -> dict:
"""N-row insert in a single transaction. IfxPy doesn't pipeline —
each ``IfxPy.execute(stmt, params)`` is a synchronous round-trip
to the server. So per-row cost is roughly constant in N."""
rounds_for = {1_000: 10, 10_000: 5, 100_000: 3}
name = f"executemany_scaling_{n_rows}"
try:
conn = IfxPy.connect(
CONN_STR.replace("DATABASE=sysmaster", "DATABASE=testdb"), "", ""
)
except Exception as e:
return {"name": name, "skipped": f"testdb: {e}"}
IfxPy.autocommit(conn, IfxPy.SQL_AUTOCOMMIT_OFF)
table = f"p36_em_{n_rows}"
try:
try:
IfxPy.exec_immediate(conn, f"DROP TABLE {table}")
IfxPy.commit(conn)
except Exception:
pass
IfxPy.exec_immediate(
conn, f"CREATE TABLE {table} (id INT, name VARCHAR(64), value FLOAT)"
)
IfxPy.commit(conn)
counter = [0]
def run() -> None:
counter[0] += 1
base = counter[0] * n_rows
stmt = IfxPy.prepare(
conn, f"INSERT INTO {table} VALUES (?, ?, ?)"
)
for i in range(n_rows):
IfxPy.execute(stmt, (base + i, f"row_{base + i}", float(base + i)))
IfxPy.free_stmt(stmt)
IfxPy.commit(conn)
return measure(name, rounds_for[n_rows], run)
finally:
try:
IfxPy.exec_immediate(conn, f"DROP TABLE {table}")
IfxPy.commit(conn)
except Exception:
pass
IfxPy.close(conn)
def bench_select_scaling(n_rows: int) -> dict:
"""SELECT FIRST N from the pre-populated 100k-row p34_select table.
Tests IfxPy's per-row fetch cost at scale; should be roughly linear
in N like ours."""
rounds_for = {1_000: 10, 10_000: 5, 100_000: 3}
name = f"select_scaling_{n_rows}"
try:
conn = IfxPy.connect(
CONN_STR.replace("DATABASE=sysmaster", "DATABASE=testdb"), "", ""
)
except Exception as e:
return {"name": name, "skipped": f"testdb: {e}"}
try:
# Probe: does p34_select exist?
try:
stmt = IfxPy.exec_immediate(conn, "SELECT COUNT(*) FROM p34_select")
row = IfxPy.fetch_tuple(stmt)
IfxPy.free_stmt(stmt)
available = int(row[0])
if available < n_rows:
return {"name": name, "skipped": (
f"p34_select has only {available} rows; "
"run informix-db scaling benchmarks first to seed "
"the table"
)}
except Exception as e:
return {"name": name, "skipped": f"p34_select missing: {e}"}
def run() -> None:
stmt = IfxPy.exec_immediate(
conn, f"SELECT FIRST {n_rows} * FROM p34_select"
)
count = 0
while IfxPy.fetch_tuple(stmt):
count += 1
IfxPy.free_stmt(stmt)
if count != n_rows:
raise RuntimeError(
f"expected {n_rows} rows, got {count}"
)
return measure(name, rounds_for[n_rows], run)
finally:
IfxPy.close(conn)
def main() -> None:
print("# IfxPy benchmark results", file=sys.stderr)
print(f"# IfxPy version: {IfxPy.__version__ if hasattr(IfxPy, '__version__') else 'unknown'}", file=sys.stderr)
# Persistent connection for the read-mostly benchmarks
conn = IfxPy.connect(CONN_STR, "", "")
results = []
results.append(bench_select_one_row(conn))
results.append(bench_select_systables_first_10(conn))
results.append(bench_select_bench_table_all(conn))
IfxPy.close(conn)
results.append(bench_executemany_1000_rows_in_txn())
results.append(bench_cold_connect_disconnect())
# Phase 36 — scaling comparison. Skip 100k cases when --short is
# passed (e.g., for fast smoke runs); otherwise run all sizes.
short = "--short" in sys.argv
sizes = [1_000, 10_000] if short else [1_000, 10_000, 100_000]
for n in sizes:
results.append(bench_executemany_scaling(n))
for n in sizes:
results.append(bench_select_scaling(n))
# Emit machine-parseable lines on stdout. Reporting median (not
# mean) and IQR (not stddev) so a single outlier round can't
# dominate the comparison numbers — mirrors pytest-benchmark's
# ``--benchmark-columns=median,iqr`` reporting on the host side.
for r in results:
if r.get("skipped"):
print(f"SKIP {r['name']}: {r['skipped']}")
else:
print(
f"RESULT {r['name']} median={r['median_s']:.6f}s "
f"iqr={r['iqr_s']:.6f}s min={r['min_s']:.6f}s "
f"max={r['max_s']:.6f}s mean={r['mean_s']:.6f}s "
f"stddev={r['stddev_s']:.6f}s rounds={r['rounds']}"
)
if __name__ == "__main__":
main()