Tier 1 — make existing benchmarks reliable: * Bumped slow-bench rounds: cold_connect_disconnect 5->15, executemany series 3->10. Single-round outliers no longer dominate. * Switched bench reporting to median + IQR. Mean was being moved by individual GC pauses / scheduler hiccups (IfxPy executemany IQR was 8.2 ms on a 28 ms median - 29% spread - mean was unreliable). * Updated ifxpy_bench.py to also report median + IQR alongside mean for cross-comparable numbers. * Makefile bench targets now show median, iqr, mean, stddev, ops, rounds. The robust statistics flipped the comparison story: Old (mean, 3 rounds): us 9% faster / IfxPy 30% faster on 2 of 5 New (median, 10+ rds): us faster on 4 of 5 benchmarks | Benchmark | IfxPy | informix-db | Δ | |---|---|---|---| | select_one_row | 170us | 119us | us 30% faster | | select_systables_first_10 | 186us | 142us | us 24% faster | | select_bench_table_all 1k | 980us | 832us | us 15% faster | | executemany 1k in txn | 28.3ms | 31.3ms | us 10% slower | | cold_connect_disconnect | 12.0ms | 10.7ms | us 11% faster | Tier 2 — add benchmarks for claims we make but don't verify: tests/benchmarks/test_observability_perf.py: * test_streaming_fetch_memory_profile — RSS sampling during a cursor iteration. Documents memory growth shape; regression wall at 100 MB / 1k rows. Currently flat (in-memory cursor doesn't grow detectably for 278 rows). * test_select_1_latency_percentiles — 1000-query distribution with p50/p90/p95/p99/max. Result: p99/p50 = 1.42x (tight tail). p50=108us, p99=153us. * test_concurrent_pool_throughput[2,4,8] — N worker threads through pool, measures aggregate QPS + per-thread fairness. Plateaus at ~6K QPS (server-bound); per-thread latency scales ~linearly with N (server serialization expected). README.md (project root): updated Compared-to-IfxPy table with the median-based numbers + IQR awareness note. tests/benchmarks/compare/README.md: added "Statistical robustness" section explaining why median over mean for fair comparison. 236 integration tests pass; ruff clean.
208 lines
6.7 KiB
Python
208 lines
6.7 KiB
Python
"""IfxPy comparison benchmark.
|
|
|
|
Runs the same workloads as ``tests/benchmarks/test_*_perf.py`` against
|
|
the same dev-container Informix instance, but using IfxPy (the C-bound
|
|
PyPI driver) instead of ``informix-db``. Numbers go straight to stdout;
|
|
the host parses them and produces a side-by-side table.
|
|
|
|
Workloads:
|
|
* ``select_one_row`` — single-row SELECT round-trip latency
|
|
* ``select_systables_first_10`` — small server-side query
|
|
* ``select_bench_table_all`` — 1k-row sustained fetch
|
|
* ``executemany_1000_rows_in_txn`` — bulk INSERT throughput
|
|
* ``cold_connect_disconnect`` — login handshake cost
|
|
|
|
Each workload runs N times; we report mean and stddev.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import statistics
|
|
import sys
|
|
import time
|
|
from collections.abc import Callable
|
|
|
|
import IfxPy
|
|
|
|
# Connect string — mirrors the conftest.py defaults the host uses.
|
|
CONN_STR = (
|
|
"SERVER=informix;"
|
|
"DATABASE=sysmaster;"
|
|
"HOST=127.0.0.1;"
|
|
"SERVICE=9088;"
|
|
"UID=informix;"
|
|
"PWD=in4mix;"
|
|
"PROTOCOL=onsoctcp"
|
|
)
|
|
|
|
ROUNDS_FAST = 100 # for sub-millisecond ops
|
|
ROUNDS_MED = 20 # for 1-100ms ops
|
|
ROUNDS_SLOW = 10 # for >1s ops; bumped from 3 in Tier 1 — the smaller
|
|
# sample produced unreliable means (cold-connect's stddev was 4.98 ms
|
|
# across 3 rounds; with 10 rounds the median is stable run-to-run).
|
|
|
|
|
|
def measure(name: str, rounds: int, body: Callable[[], None]) -> dict:
|
|
"""Run ``body`` ``rounds`` times; return median + IQR in seconds.
|
|
|
|
Median is more robust than mean against single-round outliers (GC
|
|
pauses, server scheduler hiccups). IQR (interquartile range) is
|
|
a noise estimator that also resists outliers — much better than
|
|
stddev when one bad round can dominate.
|
|
"""
|
|
timings: list[float] = []
|
|
for _ in range(rounds):
|
|
t0 = time.perf_counter()
|
|
body()
|
|
t1 = time.perf_counter()
|
|
timings.append(t1 - t0)
|
|
timings.sort()
|
|
median_s = timings[len(timings) // 2]
|
|
q1 = timings[len(timings) // 4]
|
|
q3 = timings[(3 * len(timings)) // 4]
|
|
return {
|
|
"name": name,
|
|
"rounds": rounds,
|
|
"median_s": median_s,
|
|
"iqr_s": q3 - q1,
|
|
"min_s": timings[0],
|
|
"max_s": timings[-1],
|
|
"mean_s": statistics.mean(timings), # kept for cross-checking
|
|
"stddev_s": statistics.stdev(timings) if len(timings) > 1 else 0.0,
|
|
}
|
|
|
|
|
|
def bench_select_one_row(conn) -> dict:
|
|
def run() -> None:
|
|
stmt = IfxPy.exec_immediate(
|
|
conn, "SELECT 1 FROM systables WHERE tabid = 1"
|
|
)
|
|
IfxPy.fetch_tuple(stmt)
|
|
IfxPy.free_stmt(stmt)
|
|
|
|
return measure("select_one_row", ROUNDS_FAST, run)
|
|
|
|
|
|
def bench_select_systables_first_10(conn) -> dict:
|
|
def run() -> None:
|
|
stmt = IfxPy.exec_immediate(
|
|
conn,
|
|
"SELECT FIRST 10 tabname, owner, tabid, ncols FROM systables",
|
|
)
|
|
while IfxPy.fetch_tuple(stmt):
|
|
pass
|
|
IfxPy.free_stmt(stmt)
|
|
|
|
return measure("select_systables_first_10", ROUNDS_FAST, run)
|
|
|
|
|
|
def bench_select_bench_table_all(conn) -> dict:
|
|
"""Requires p21_bench table to exist (created by host-side fixture)."""
|
|
# Probe whether the table exists; if not, skip
|
|
try:
|
|
stmt = IfxPy.exec_immediate(conn, "SELECT COUNT(*) FROM p21_bench")
|
|
row = IfxPy.fetch_tuple(stmt)
|
|
IfxPy.free_stmt(stmt)
|
|
if not row or row[0] == 0:
|
|
return {"name": "select_bench_table_all", "skipped": "p21_bench empty"}
|
|
except Exception as e:
|
|
return {"name": "select_bench_table_all", "skipped": f"p21_bench missing: {e}"}
|
|
|
|
def run() -> None:
|
|
stmt = IfxPy.exec_immediate(conn, "SELECT * FROM p21_bench")
|
|
while IfxPy.fetch_tuple(stmt):
|
|
pass
|
|
IfxPy.free_stmt(stmt)
|
|
|
|
return measure("select_bench_table_all", ROUNDS_MED, run)
|
|
|
|
|
|
def bench_executemany_1000_rows_in_txn() -> dict:
|
|
"""Open a connection on testdb, autocommit OFF, executemany 1000."""
|
|
try:
|
|
conn = IfxPy.connect(
|
|
CONN_STR.replace("DATABASE=sysmaster", "DATABASE=testdb"), "", ""
|
|
)
|
|
except Exception as e:
|
|
return {"name": "executemany_1000_rows_in_txn", "skipped": f"testdb: {e}"}
|
|
IfxPy.autocommit(conn, IfxPy.SQL_AUTOCOMMIT_OFF)
|
|
|
|
table = "p21_ifxpy_bench"
|
|
try:
|
|
try:
|
|
IfxPy.exec_immediate(conn, f"DROP TABLE {table}")
|
|
IfxPy.commit(conn)
|
|
except Exception:
|
|
pass
|
|
IfxPy.exec_immediate(
|
|
conn, f"CREATE TABLE {table} (id INT, name VARCHAR(64), value FLOAT)"
|
|
)
|
|
IfxPy.commit(conn)
|
|
|
|
counter = [0]
|
|
|
|
def run() -> None:
|
|
counter[0] += 1
|
|
base = counter[0] * 1000
|
|
stmt = IfxPy.prepare(
|
|
conn, f"INSERT INTO {table} VALUES (?, ?, ?)"
|
|
)
|
|
for i in range(1000):
|
|
IfxPy.execute(stmt, (base + i, f"row_{base + i}", float(base + i)))
|
|
IfxPy.free_stmt(stmt)
|
|
IfxPy.commit(conn)
|
|
|
|
result = measure("executemany_1000_rows_in_txn", ROUNDS_SLOW, run)
|
|
return result
|
|
finally:
|
|
try:
|
|
IfxPy.exec_immediate(conn, f"DROP TABLE {table}")
|
|
IfxPy.commit(conn)
|
|
except Exception:
|
|
pass
|
|
IfxPy.close(conn)
|
|
|
|
|
|
def bench_cold_connect_disconnect() -> dict:
|
|
def run() -> None:
|
|
conn = IfxPy.connect(CONN_STR, "", "")
|
|
IfxPy.close(conn)
|
|
|
|
return measure("cold_connect_disconnect", ROUNDS_SLOW, run)
|
|
|
|
|
|
def main() -> None:
|
|
print("# IfxPy benchmark results", file=sys.stderr)
|
|
print(f"# IfxPy version: {IfxPy.__version__ if hasattr(IfxPy, '__version__') else 'unknown'}", file=sys.stderr)
|
|
|
|
# Persistent connection for the read-mostly benchmarks
|
|
conn = IfxPy.connect(CONN_STR, "", "")
|
|
|
|
results = []
|
|
results.append(bench_select_one_row(conn))
|
|
results.append(bench_select_systables_first_10(conn))
|
|
results.append(bench_select_bench_table_all(conn))
|
|
IfxPy.close(conn)
|
|
|
|
results.append(bench_executemany_1000_rows_in_txn())
|
|
results.append(bench_cold_connect_disconnect())
|
|
|
|
# Emit machine-parseable lines on stdout. Reporting median (not
|
|
# mean) and IQR (not stddev) so a single outlier round can't
|
|
# dominate the comparison numbers — mirrors pytest-benchmark's
|
|
# ``--benchmark-columns=median,iqr`` reporting on the host side.
|
|
for r in results:
|
|
if r.get("skipped"):
|
|
print(f"SKIP {r['name']}: {r['skipped']}")
|
|
else:
|
|
print(
|
|
f"RESULT {r['name']} median={r['median_s']:.6f}s "
|
|
f"iqr={r['iqr_s']:.6f}s min={r['min_s']:.6f}s "
|
|
f"max={r['max_s']:.6f}s mean={r['mean_s']:.6f}s "
|
|
f"stddev={r['stddev_s']:.6f}s rounds={r['rounds']}"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|