Investigation of the Phase 21 baseline finding that executemany(N) cost scaled linearly per-row (1.74 ms x N) regardless of batch size. Root cause: every autocommit=True INSERT forces a server-side transaction-log flush. Not a wire-protocol bug. Numbers: * executemany(1000) autocommit=True: 1.72 s (1.72 ms/row) * executemany(1000) in single txn: 32 ms (32 us/row) 53x speedup from changing the transaction boundary, not the driver. Pure protocol overhead is ~32 us/row -> ~31K rows/sec sustained throughput on a single connection. Comparable to pg8000. Added test_executemany_1000_rows_in_txn benchmark to make this visible. Updated README headline numbers and added a "Performance gotchas" section explaining when autocommit=False matters. Decision: don't pipeline. The remaining 32 us is already excellent; the autocommit gotcha is the real user-facing footgun. Docs > code. If someone reports needing >31K rows/sec single-connection, that becomes Phase 22.
171 lines
5.0 KiB
Python
171 lines
5.0 KiB
Python
"""End-to-end INSERT benchmarks — single-row, executemany, and the gap.
|
|
|
|
The single-row vs. executemany delta is the ``executemany`` win — we
|
|
PREPARE+RELEASE once and BIND+EXECUTE per row, vs PREPARE+RELEASE per
|
|
row. On any decent network this is 10-50x.
|
|
|
|
The autocommit-True vs. autocommit-False delta is the **transaction-flush
|
|
cost** — every autocommit INSERT forces the server to flush its
|
|
transaction log per row, drowning out everything else. The benchmark
|
|
splits these so we can see protocol overhead independently.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import contextlib
|
|
from collections.abc import Iterator
|
|
|
|
import pytest
|
|
|
|
import informix_db
|
|
from tests.conftest import ConnParams
|
|
|
|
pytestmark = [pytest.mark.benchmark, pytest.mark.integration]
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def txn_conn(conn_params: ConnParams) -> Iterator[informix_db.Connection]:
|
|
"""A separate connection with autocommit=False so we can wrap an
|
|
executemany call in a single explicit transaction. Uses ``testdb``
|
|
(the logged user DB) — autocommit-off is meaningless on unlogged DBs.
|
|
"""
|
|
conn = informix_db.connect(
|
|
host=conn_params.host,
|
|
port=conn_params.port,
|
|
user=conn_params.user,
|
|
password=conn_params.password,
|
|
database="testdb",
|
|
server=conn_params.server,
|
|
autocommit=False,
|
|
)
|
|
try:
|
|
yield conn
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def _setup_temp_table(conn: informix_db.Connection, name: str) -> None:
|
|
cur = conn.cursor()
|
|
with contextlib.suppress(informix_db.Error):
|
|
cur.execute(f"DROP TABLE {name}")
|
|
cur.execute(
|
|
f"CREATE TABLE {name} (id INT, name VARCHAR(64), value FLOAT)"
|
|
)
|
|
|
|
|
|
def _drop_temp_table(conn: informix_db.Connection, name: str) -> None:
|
|
cur = conn.cursor()
|
|
with contextlib.suppress(informix_db.Error):
|
|
cur.execute(f"DROP TABLE {name}")
|
|
|
|
|
|
def test_insert_single_row(benchmark, bench_conn: informix_db.Connection) -> None:
|
|
"""Single INSERT per call — full PREPARE+BIND+EXECUTE+RELEASE cycle."""
|
|
table = "p21_ins_single"
|
|
_setup_temp_table(bench_conn, table)
|
|
counter = [0]
|
|
|
|
def run() -> None:
|
|
counter[0] += 1
|
|
cur = bench_conn.cursor()
|
|
cur.execute(
|
|
f"INSERT INTO {table} VALUES (?, ?, ?)",
|
|
(counter[0], f"name_{counter[0]}", float(counter[0])),
|
|
)
|
|
cur.close()
|
|
|
|
try:
|
|
benchmark(run)
|
|
finally:
|
|
_drop_temp_table(bench_conn, table)
|
|
|
|
|
|
def test_executemany_100_rows(
|
|
benchmark, bench_conn: informix_db.Connection
|
|
) -> None:
|
|
"""100 INSERTs via executemany — one PREPARE, 100 BIND+EXECUTEs, one RELEASE."""
|
|
table = "p21_ins_emany_100"
|
|
_setup_temp_table(bench_conn, table)
|
|
counter = [0]
|
|
|
|
def run() -> None:
|
|
counter[0] += 1
|
|
base = counter[0] * 100
|
|
rows = [
|
|
(base + i, f"row_{base + i}", float(base + i)) for i in range(100)
|
|
]
|
|
cur = bench_conn.cursor()
|
|
cur.executemany(
|
|
f"INSERT INTO {table} VALUES (?, ?, ?)",
|
|
rows,
|
|
)
|
|
cur.close()
|
|
|
|
try:
|
|
benchmark(run)
|
|
finally:
|
|
_drop_temp_table(bench_conn, table)
|
|
|
|
|
|
def test_executemany_1000_rows(
|
|
benchmark, bench_conn: informix_db.Connection
|
|
) -> None:
|
|
"""1000 INSERTs via executemany under autocommit=True — every row
|
|
forces a transaction-log flush. Worst-case protocol *plus* server
|
|
storage cost."""
|
|
table = "p21_ins_emany_1000"
|
|
_setup_temp_table(bench_conn, table)
|
|
counter = [0]
|
|
|
|
def run() -> None:
|
|
counter[0] += 1
|
|
base = counter[0] * 1000
|
|
rows = [
|
|
(base + i, f"row_{base + i}", float(base + i)) for i in range(1000)
|
|
]
|
|
cur = bench_conn.cursor()
|
|
cur.executemany(
|
|
f"INSERT INTO {table} VALUES (?, ?, ?)",
|
|
rows,
|
|
)
|
|
cur.close()
|
|
|
|
try:
|
|
benchmark.pedantic(run, rounds=3, iterations=1)
|
|
finally:
|
|
_drop_temp_table(bench_conn, table)
|
|
|
|
|
|
def test_executemany_1000_rows_in_txn(
|
|
benchmark, txn_conn: informix_db.Connection
|
|
) -> None:
|
|
"""1000 INSERTs via executemany inside ONE transaction — single
|
|
log flush at COMMIT time. Isolates the protocol cost from the
|
|
autocommit-flush cost. The delta vs the autocommit variant is the
|
|
server-side log-flush penalty (un-fixable from the client side)."""
|
|
table = "p21_ins_emany_txn"
|
|
_setup_temp_table(txn_conn, table)
|
|
txn_conn.commit() # Land the CREATE TABLE before timing
|
|
counter = [0]
|
|
|
|
def run() -> None:
|
|
counter[0] += 1
|
|
base = counter[0] * 1000
|
|
rows = [
|
|
(base + i, f"row_{base + i}", float(base + i)) for i in range(1000)
|
|
]
|
|
cur = txn_conn.cursor()
|
|
cur.executemany(
|
|
f"INSERT INTO {table} VALUES (?, ?, ?)",
|
|
rows,
|
|
)
|
|
cur.close()
|
|
txn_conn.commit()
|
|
|
|
try:
|
|
benchmark.pedantic(run, rounds=3, iterations=1)
|
|
finally:
|
|
with contextlib.suppress(informix_db.Error):
|
|
_drop_temp_table(txn_conn, table)
|
|
txn_conn.commit()
|