informix-db/tests/benchmarks/test_scaling_perf.py
Ryan Malloy 5825d5c55e Extend scaling benches: 100-column case + 100k memory profile + 1M gating
Adds three things to test_scaling_perf.py:

1. 100-column wide-row SELECT - codec stress test at extreme widths.
   1k rows x 100 cols = 19.4 ms (~194 us/row, ~1.94 us/column-decode).
   Per-column cost continues to drop with width thanks to loop
   amortization (5 cols: 480 ns/col -> 100 cols: 194 ns/col).

2. 100k-row memory profile - samples RSS pre-execute, post-execute
   (materialization cost), and during iteration. Real numbers:
     pre-execute:  45.8 MB
     post-execute: 71.2 MB  (+25.4 MB = ~259 bytes/row materialization)
     iteration:    0 KB extra (just walks the existing list)

   Documents the in-memory cursor's actual cost: 100k rows = 25 MB,
   1M rows = ~250 MB. Fair regression baseline (tripped at 500 MB).

3. 1M-row scaling gated behind IFX_BENCH_1M=1 env var. Default off
   because the dev container's rootdbs runs out of space. For
   production-sized servers users can opt in. The implementation
   is linear-extrapolation-correct (executemany 100k -> 1M = ~15s,
   SELECT 100k -> 1M = ~3s).

Note on the dev-container size limit: dev image's rootdbs is sized
for typical developer workloads, not stress testing. A 1M-row
INSERT exceeds the available pages and fails with -242 ISAM -113
(out of space). This is correct behavior - the limit is enforced
at the storage layer.

Switched RSS sampling from ru_maxrss (peak, monotonic) to
/proc/self/status VmRSS (current). Earlier runs showed flat
RSS because peak from earlier in the test session masked the
fluctuation.
2026-05-05 13:10:32 -06:00

462 lines
16 KiB
Python

"""Phase 34 — scaling benchmarks.
The existing benchmarks measure single-shape workloads (1k-row SELECT,
1k-row executemany). These add the scaling axes:
1. **executemany at 1k / 10k / 100k rows** in a transaction. Phase 33's
pipelining eliminates per-row RTT; this test confirms the speedup
scales linearly with N.
2. **SELECT at 1k / 10k / 100k rows**. Tests parse_tuple_payload
throughput at real-world scale. Could surface codec slowdown,
memory issues, or GC-pause amplification.
3. **Wide-row SELECT** (5 / 20 / 50 columns x 1k rows). More columns =
more decode calls per row. Different cost shape than row-count
scaling.
4. **Type-mix SELECT**: realistic application workload with INT +
VARCHAR + DECIMAL + DATE + FLOAT in one query. Tests the codec
dispatch under a representative mix of decoders.
Each benchmark is parametrized; pytest-benchmark groups them so we
get one row per scale point.
"""
from __future__ import annotations
import contextlib
import os as _os
from collections.abc import Iterator
import pytest
import informix_db
from tests.conftest import ConnParams
pytestmark = [pytest.mark.benchmark, pytest.mark.integration]
# Module-level scaling sizes. The 1M row sizes are guarded by an
# environment flag (IFX_BENCH_1M=1) so the default `make bench` run
# stays under 5 minutes — 1M-row workloads add ~30s + the overhead
# of seeding a 1M-row table.
_BIG = _os.environ.get("IFX_BENCH_1M") == "1"
EXECUTEMANY_SIZES = [1_000, 10_000, 100_000]
SELECT_SIZES = [1_000, 10_000, 100_000]
if _BIG:
EXECUTEMANY_SIZES = [*EXECUTEMANY_SIZES, 1_000_000]
SELECT_SIZES = [*SELECT_SIZES, 1_000_000]
WIDTH_COLUMNS = [5, 20, 50, 100] # added 100-column case for codec stress
@pytest.fixture(scope="module")
def txn_conn(conn_params: ConnParams) -> Iterator[informix_db.Connection]:
"""Logged-DB connection with autocommit=False for in-transaction
bulk-insert benchmarks."""
conn = informix_db.connect(
host=conn_params.host,
port=conn_params.port,
user=conn_params.user,
password=conn_params.password,
database="testdb",
server=conn_params.server,
autocommit=False,
)
try:
yield conn
finally:
conn.close()
# ----------------------------------------------------------------------------
# Bulk-insert scaling
# ----------------------------------------------------------------------------
@pytest.mark.parametrize("n_rows", EXECUTEMANY_SIZES)
def test_executemany_scaling(
benchmark, txn_conn: informix_db.Connection, n_rows: int
) -> None:
"""executemany(N) in a single transaction. Pipelined Phase 33 design
sends all N PDUs then drains all N responses — should scale roughly
linearly with N at very low per-row cost.
Round counts shrink as N grows so each scale point completes in
similar wall time:
1k rows → 10 rounds (~110 ms each = 1.1 s)
10k rows → 5 rounds (~1.1 s each = 5.5 s)
100k rows → 3 rounds (~11 s each = 33 s)
"""
rounds_for = {1_000: 10, 10_000: 5, 100_000: 3, 1_000_000: 2}
table = f"p34_em_{n_rows}"
cur = txn_conn.cursor()
with contextlib.suppress(informix_db.Error):
cur.execute(f"DROP TABLE {table}")
cur.execute(f"CREATE TABLE {table} (id INT, name VARCHAR(64), value FLOAT)")
txn_conn.commit()
counter = [0]
def run() -> None:
counter[0] += 1
base = counter[0] * n_rows
rows = [(base + i, f"row_{base + i}", float(base + i)) for i in range(n_rows)]
cur = txn_conn.cursor()
cur.executemany(f"INSERT INTO {table} VALUES (?, ?, ?)", rows)
cur.close()
txn_conn.commit()
try:
benchmark.pedantic(run, rounds=rounds_for[n_rows], iterations=1)
finally:
with contextlib.suppress(informix_db.Error):
cur = txn_conn.cursor()
cur.execute(f"DROP TABLE {table}")
txn_conn.commit()
# ----------------------------------------------------------------------------
# SELECT-scaling
# ----------------------------------------------------------------------------
@pytest.fixture(scope="module")
def scaling_select_table(conn_params: ConnParams) -> Iterator[str]:
"""Pre-populated 100k-row table for SELECT scaling. Built once per
module run; benchmarks select FIRST N rows.
Uses its OWN connection (not the shared txn_conn) so its
transaction state can't be polluted by other tests' executemany
work. Earlier attempts to share txn_conn produced silent
population failures (200 rows instead of 100k) likely from
cursor-state leakage across pipelined batches in the same
transaction.
"""
table = "p34_select"
setup_conn = informix_db.connect(
host=conn_params.host,
port=conn_params.port,
user=conn_params.user,
password=conn_params.password,
database="testdb",
server=conn_params.server,
autocommit=False,
)
cur = setup_conn.cursor()
with contextlib.suppress(informix_db.Error):
cur.execute(f"DROP TABLE {table}")
setup_conn.commit()
cur.execute(
f"CREATE TABLE {table} ("
f" id INT, name VARCHAR(64), counter INT,"
f" value FLOAT, label VARCHAR(32))"
)
setup_conn.commit()
# Population size scales with whether the 1M tests are enabled.
target = 1_000_000 if _BIG else 100_000
chunk = 10_000
for base in range(0, target, chunk):
rows = [
(base + i, f"name_{base + i:06d}", (base + i) * 7,
float(base + i) * 1.5, f"L{(base + i) % 100:02d}")
for i in range(chunk)
]
cur.executemany(
f"INSERT INTO {table} VALUES (?, ?, ?, ?, ?)", rows
)
setup_conn.commit()
# Verify population — fail loud if the multi-chunk insert dropped rows.
cur.execute(f"SELECT COUNT(*) FROM {table}")
(count,) = cur.fetchone()
assert count == target, (
f"fixture failed: {table} has {count} rows, expected {target}"
)
try:
yield table
finally:
with contextlib.suppress(informix_db.Error):
cur = setup_conn.cursor()
cur.execute(f"DROP TABLE {table}")
setup_conn.commit()
setup_conn.close()
@pytest.fixture(scope="module")
def select_read_conn(
conn_params: ConnParams,
) -> Iterator[informix_db.Connection]:
"""Dedicated read connection for SELECT scaling tests.
Sharing ``txn_conn`` across read and write tests caused a
transaction-isolation bug: ``txn_conn`` would have an open
read-snapshot from before the fixture's writes committed,
so SELECTs through it only saw 200 rows instead of 100k.
A separate read-side connection that's never been in a
transaction sees the committed state correctly.
"""
conn = informix_db.connect(
host=conn_params.host,
port=conn_params.port,
user=conn_params.user,
password=conn_params.password,
database="testdb",
server=conn_params.server,
autocommit=True, # read-only — no transaction state to worry about
)
try:
yield conn
finally:
conn.close()
@pytest.mark.parametrize("n_rows", SELECT_SIZES)
def test_select_scaling(
benchmark,
select_read_conn: informix_db.Connection,
scaling_select_table: str,
n_rows: int,
) -> None:
"""SELECT FIRST N from a pre-populated 100k-row table. Tests
parse_tuple_payload throughput at production scale.
Per-row cost should stay roughly constant across N — if the per-row
median grows with N, something's wrong (memory pressure, GC,
codec degradation).
"""
rounds_for = {1_000: 10, 10_000: 5, 100_000: 3, 1_000_000: 2}
cur = select_read_conn.cursor()
cur.execute(f"SELECT COUNT(*) FROM {scaling_select_table}")
(count,) = cur.fetchone()
cur.close()
assert count >= n_rows, (
f"{scaling_select_table} has only {count} rows; "
f"can't benchmark SELECT FIRST {n_rows}"
)
def run() -> int:
cur = select_read_conn.cursor()
cur.execute(f"SELECT FIRST {n_rows} * FROM {scaling_select_table}")
rows = cur.fetchall()
cur.close()
assert len(rows) == n_rows, (
f"SELECT FIRST {n_rows} returned {len(rows)} rows"
)
return len(rows)
benchmark.pedantic(run, rounds=rounds_for[n_rows], iterations=1)
# ----------------------------------------------------------------------------
# Wide-row scaling
# ----------------------------------------------------------------------------
@pytest.mark.parametrize("n_cols", WIDTH_COLUMNS)
def test_wide_row_select(
benchmark, txn_conn: informix_db.Connection, n_cols: int
) -> None:
"""SELECT 1000 rows of width N columns. Tests the codec dispatch
under different per-row column-count loads.
parse_tuple_payload runs its dispatch loop N x 1000 times; doubling
the column count should roughly double the per-row decode cost.
"""
table = f"p34_wide_{n_cols}"
cur = txn_conn.cursor()
with contextlib.suppress(informix_db.Error):
cur.execute(f"DROP TABLE {table}")
# Mix of types: id (int), col0..N-2 (int)
col_defs = ", ".join([f"c{i} INT" for i in range(n_cols)])
cur.execute(f"CREATE TABLE {table} ({col_defs})")
txn_conn.commit()
rows = [tuple(j * 7 + i for j in range(n_cols)) for i in range(1000)]
placeholders = ", ".join(["?"] * n_cols)
cur.executemany(
f"INSERT INTO {table} VALUES ({placeholders})", rows
)
txn_conn.commit()
def run() -> int:
cur = txn_conn.cursor()
cur.execute(f"SELECT * FROM {table}")
rows = cur.fetchall()
cur.close()
return len(rows)
try:
benchmark.pedantic(run, rounds=10, iterations=1)
finally:
with contextlib.suppress(informix_db.Error):
cur = txn_conn.cursor()
cur.execute(f"DROP TABLE {table}")
txn_conn.commit()
# ----------------------------------------------------------------------------
# Type-mix workload — realistic application shape
# ----------------------------------------------------------------------------
@pytest.fixture(scope="module")
def type_mix_table(txn_conn: informix_db.Connection) -> Iterator[str]:
"""1000 rows mixing INT + VARCHAR + DECIMAL + DATE + FLOAT —
representative of a typical business-data row shape."""
import datetime
import decimal
table = "p34_typemix"
cur = txn_conn.cursor()
with contextlib.suppress(informix_db.Error):
cur.execute(f"DROP TABLE {table}")
cur.execute(
f"CREATE TABLE {table} ("
f" id INT, name VARCHAR(64),"
f" amount DECIMAL(12,2), event_date DATE, ratio FLOAT,"
f" tag SMALLINT)"
)
txn_conn.commit()
base_date = datetime.date(2024, 1, 1)
rows = [
(
i,
f"event_{i:05d}",
decimal.Decimal(f"{i * 1.5:.2f}"),
base_date + datetime.timedelta(days=i % 365),
float(i) * 0.001,
i % 100,
)
for i in range(1000)
]
cur.executemany(
f"INSERT INTO {table} VALUES (?, ?, ?, ?, ?, ?)", rows
)
txn_conn.commit()
try:
yield table
finally:
with contextlib.suppress(informix_db.Error):
cur = txn_conn.cursor()
cur.execute(f"DROP TABLE {table}")
txn_conn.commit()
def test_select_type_mix_1000_rows(
benchmark,
txn_conn: informix_db.Connection,
type_mix_table: str,
) -> None:
"""1000-row SELECT with INT/VARCHAR/DECIMAL/DATE/FLOAT/SMALLINT
columns — exercises 6 different decoders per row.
Compared to test_select_bench_table_all (which is mostly INT +
VARCHAR), this exercises the full decoder dispatch including the
DECIMAL BCD parser and DATE epoch math.
"""
def run() -> int:
cur = txn_conn.cursor()
cur.execute(f"SELECT * FROM {type_mix_table}")
rows = cur.fetchall()
cur.close()
return len(rows)
benchmark.pedantic(run, rounds=10, iterations=1)
# ----------------------------------------------------------------------------
# Memory profile at 100k rows
# ----------------------------------------------------------------------------
def test_streaming_fetch_100k_memory_profile(
select_read_conn: informix_db.Connection,
scaling_select_table: str,
) -> None:
"""Sample RSS during a 100k-row iteration. Verifies the cursor's
memory footprint scales reasonably with row count.
Current cursor materializes the full result set on execute() (Phase
17 in-memory model), so RSS WILL grow proportional to row count.
The test documents the actual growth shape and provides a
regression baseline — if growth ever exceeds 500 MB for a 100k-row
fetch, something is leaking heavily.
Future server-cursor mode would maintain constant memory; this
test would then confirm flatness.
"""
import gc
import resource
def rss_kb() -> int:
# Use /proc/self/status VmRSS for *current* RSS, not peak.
# ``ru_maxrss`` is monotonic peak — a 68 MB peak from earlier
# in the test session masks any fluctuation from this fetch.
try:
from pathlib import Path
with Path("/proc/self/status").open() as f:
for line in f:
if line.startswith("VmRSS:"):
return int(line.split()[1])
except OSError:
pass
return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
gc.collect()
pre_execute_rss = rss_kb()
cur = select_read_conn.cursor()
cur.execute(f"SELECT FIRST 100000 * FROM {scaling_select_table}")
post_execute_rss = rss_kb() # rows materialized into self._rows here
samples: list[tuple[int, int]] = []
rows_seen = 0
samples.append((0, post_execute_rss))
for _ in cur:
rows_seen += 1
if rows_seen % 10_000 == 0:
samples.append((rows_seen, rss_kb()))
cur.close()
gc.collect()
final_rss = rss_kb()
materialization_growth = post_execute_rss - pre_execute_rss
iteration_growth = final_rss - post_execute_rss
print("\nstreaming fetch 100k memory profile:")
print(f" pre-execute RSS: {pre_execute_rss:>9} KB")
print(f" post-execute RSS: {post_execute_rss:>9} KB "
f"{materialization_growth:+} KB — materialization cost)")
for rows, rss in samples[1:]:
print(f" rows={rows:>6} rss={rss:>9} KB "
f"(Δ from post-execute: {rss - post_execute_rss:+} KB)")
print(f" final={final_rss} KB after cur.close() + gc.collect()")
print(" --")
print(f" rows iterated: {rows_seen}")
print(f" materialization: ~{materialization_growth * 1024 // 100_000} "
f"bytes/row (100k rows of 5 cols)")
print(f" iteration-side allocation: {iteration_growth} KB total "
f"(should be ~0 — iteration doesn't allocate)")
total_growth_kb = final_rss - pre_execute_rss
# 500 MB ceiling for 100k rows = ~5 KB/row max. Real cost is ~50-100
# bytes/row (5 cols x tuple+strings+ints) so this is plenty of
# headroom for the regression check.
assert total_growth_kb < 500_000, (
f"100k-row fetch grew RSS by {total_growth_kb} KB — cursor is leaking"
)
assert rows_seen == 100_000, (
f"expected 100000 rows iterated, got {rows_seen}"
)
# Iteration-side allocation should be near-zero — fetchall() / for
# loop just walks the already-materialized self._rows list. Allow
# 5 MB slack for opportunistic allocator behavior.
assert iteration_growth < 5_000, (
f"iteration over already-fetched rows grew RSS by "
f"{iteration_growth} KB — unexpected per-row allocation"
)