informix-db/tests/benchmarks/test_insert_perf.py
Ryan Malloy 495128c679 Phase 21.1: executemany perf - it was the autocommit cliff (2026.05.04.6)
Investigation of the Phase 21 baseline finding that executemany(N) cost
scaled linearly per-row (1.74 ms x N) regardless of batch size.

Root cause: every autocommit=True INSERT forces a server-side
transaction-log flush. Not a wire-protocol bug.

Numbers:
* executemany(1000) autocommit=True: 1.72 s (1.72 ms/row)
* executemany(1000) in single txn:    32 ms (32 us/row)

53x speedup from changing the transaction boundary, not the driver.
Pure protocol overhead is ~32 us/row -> ~31K rows/sec sustained
throughput on a single connection. Comparable to pg8000.

Added test_executemany_1000_rows_in_txn benchmark to make this
visible. Updated README headline numbers and added a "Performance
gotchas" section explaining when autocommit=False matters.

Decision: don't pipeline. The remaining 32 us is already excellent;
the autocommit gotcha is the real user-facing footgun. Docs > code.
If someone reports needing >31K rows/sec single-connection, that
becomes Phase 22.
2026-05-04 17:26:16 -06:00

171 lines
5.0 KiB
Python

"""End-to-end INSERT benchmarks — single-row, executemany, and the gap.
The single-row vs. executemany delta is the ``executemany`` win — we
PREPARE+RELEASE once and BIND+EXECUTE per row, vs PREPARE+RELEASE per
row. On any decent network this is 10-50x.
The autocommit-True vs. autocommit-False delta is the **transaction-flush
cost** — every autocommit INSERT forces the server to flush its
transaction log per row, drowning out everything else. The benchmark
splits these so we can see protocol overhead independently.
"""
from __future__ import annotations
import contextlib
from collections.abc import Iterator
import pytest
import informix_db
from tests.conftest import ConnParams
pytestmark = [pytest.mark.benchmark, pytest.mark.integration]
@pytest.fixture(scope="module")
def txn_conn(conn_params: ConnParams) -> Iterator[informix_db.Connection]:
"""A separate connection with autocommit=False so we can wrap an
executemany call in a single explicit transaction. Uses ``testdb``
(the logged user DB) — autocommit-off is meaningless on unlogged DBs.
"""
conn = informix_db.connect(
host=conn_params.host,
port=conn_params.port,
user=conn_params.user,
password=conn_params.password,
database="testdb",
server=conn_params.server,
autocommit=False,
)
try:
yield conn
finally:
conn.close()
def _setup_temp_table(conn: informix_db.Connection, name: str) -> None:
cur = conn.cursor()
with contextlib.suppress(informix_db.Error):
cur.execute(f"DROP TABLE {name}")
cur.execute(
f"CREATE TABLE {name} (id INT, name VARCHAR(64), value FLOAT)"
)
def _drop_temp_table(conn: informix_db.Connection, name: str) -> None:
cur = conn.cursor()
with contextlib.suppress(informix_db.Error):
cur.execute(f"DROP TABLE {name}")
def test_insert_single_row(benchmark, bench_conn: informix_db.Connection) -> None:
"""Single INSERT per call — full PREPARE+BIND+EXECUTE+RELEASE cycle."""
table = "p21_ins_single"
_setup_temp_table(bench_conn, table)
counter = [0]
def run() -> None:
counter[0] += 1
cur = bench_conn.cursor()
cur.execute(
f"INSERT INTO {table} VALUES (?, ?, ?)",
(counter[0], f"name_{counter[0]}", float(counter[0])),
)
cur.close()
try:
benchmark(run)
finally:
_drop_temp_table(bench_conn, table)
def test_executemany_100_rows(
benchmark, bench_conn: informix_db.Connection
) -> None:
"""100 INSERTs via executemany — one PREPARE, 100 BIND+EXECUTEs, one RELEASE."""
table = "p21_ins_emany_100"
_setup_temp_table(bench_conn, table)
counter = [0]
def run() -> None:
counter[0] += 1
base = counter[0] * 100
rows = [
(base + i, f"row_{base + i}", float(base + i)) for i in range(100)
]
cur = bench_conn.cursor()
cur.executemany(
f"INSERT INTO {table} VALUES (?, ?, ?)",
rows,
)
cur.close()
try:
benchmark(run)
finally:
_drop_temp_table(bench_conn, table)
def test_executemany_1000_rows(
benchmark, bench_conn: informix_db.Connection
) -> None:
"""1000 INSERTs via executemany under autocommit=True — every row
forces a transaction-log flush. Worst-case protocol *plus* server
storage cost."""
table = "p21_ins_emany_1000"
_setup_temp_table(bench_conn, table)
counter = [0]
def run() -> None:
counter[0] += 1
base = counter[0] * 1000
rows = [
(base + i, f"row_{base + i}", float(base + i)) for i in range(1000)
]
cur = bench_conn.cursor()
cur.executemany(
f"INSERT INTO {table} VALUES (?, ?, ?)",
rows,
)
cur.close()
try:
benchmark.pedantic(run, rounds=3, iterations=1)
finally:
_drop_temp_table(bench_conn, table)
def test_executemany_1000_rows_in_txn(
benchmark, txn_conn: informix_db.Connection
) -> None:
"""1000 INSERTs via executemany inside ONE transaction — single
log flush at COMMIT time. Isolates the protocol cost from the
autocommit-flush cost. The delta vs the autocommit variant is the
server-side log-flush penalty (un-fixable from the client side)."""
table = "p21_ins_emany_txn"
_setup_temp_table(txn_conn, table)
txn_conn.commit() # Land the CREATE TABLE before timing
counter = [0]
def run() -> None:
counter[0] += 1
base = counter[0] * 1000
rows = [
(base + i, f"row_{base + i}", float(base + i)) for i in range(1000)
]
cur = txn_conn.cursor()
cur.executemany(
f"INSERT INTO {table} VALUES (?, ?, ?)",
rows,
)
cur.close()
txn_conn.commit()
try:
benchmark.pedantic(run, rounds=3, iterations=1)
finally:
with contextlib.suppress(informix_db.Error):
_drop_temp_table(txn_conn, table)
txn_conn.commit()