From 5825d5c55e4a0f3da832581f099063a99fe0fb1b Mon Sep 17 00:00:00 2001
From: Ryan Malloy <ryan@supported.systems>
Date: Tue, 5 May 2026 13:10:32 -0600
Subject: [PATCH] Extend scaling benches: 100-column case + 100k memory profile
 + 1M gating

Adds three things to test_scaling_perf.py:

1. 100-column wide-row SELECT - codec stress test at extreme widths.
   1k rows x 100 cols = 19.4 ms (~194 us/row, ~1.94 us/column-decode).
   Per-column cost continues to drop with width thanks to loop
   amortization (5 cols: 480 ns/col -> 100 cols: 194 ns/col).

2. 100k-row memory profile - samples RSS pre-execute, post-execute
   (materialization cost), and during iteration. Real numbers:
     pre-execute:  45.8 MB
     post-execute: 71.2 MB  (+25.4 MB = ~259 bytes/row materialization)
     iteration:    0 KB extra (just walks the existing list)

   Documents the in-memory cursor's actual cost: 100k rows = 25 MB,
   1M rows = ~250 MB. Fair regression baseline (tripped at 500 MB).

3. 1M-row scaling gated behind IFX_BENCH_1M=1 env var. Default off
   because the dev container's rootdbs runs out of space. For
   production-sized servers users can opt in. The implementation
   is linear-extrapolation-correct (executemany 100k -> 1M = ~15s,
   SELECT 100k -> 1M = ~3s).

Note on the dev-container size limit: dev image's rootdbs is sized
for typical developer workloads, not stress testing. A 1M-row
INSERT exceeds the available pages and fails with -242 ISAM -113
(out of space). This is correct behavior - the limit is enforced
at the storage layer.

Switched RSS sampling from ru_maxrss (peak, monotonic) to
/proc/self/status VmRSS (current). Earlier runs showed flat
RSS because peak from earlier in the test session masked the
fluctuation.
---
 tests/benchmarks/test_scaling_perf.py | 122 ++++++++++++++++++++++++--
 uv.lock                               |   2 +-
 2 files changed, 114 insertions(+), 10 deletions(-)

diff --git a/tests/benchmarks/test_scaling_perf.py b/tests/benchmarks/test_scaling_perf.py
index 6d33bda..339ca01 100644
--- a/tests/benchmarks/test_scaling_perf.py
+++ b/tests/benchmarks/test_scaling_perf.py
@@ -26,6 +26,7 @@ get one row per scale point.
 from __future__ import annotations
 
 import contextlib
+import os as _os
 from collections.abc import Iterator
 
 import pytest
@@ -36,10 +37,19 @@ from tests.conftest import ConnParams
 pytestmark = [pytest.mark.benchmark, pytest.mark.integration]
 
 
-# Module-level scaling sizes
+# Module-level scaling sizes. The 1M row sizes are guarded by an
+# environment flag (IFX_BENCH_1M=1) so the default `make bench` run
+# stays under 5 minutes — 1M-row workloads add ~30s + the overhead
+# of seeding a 1M-row table.
+_BIG = _os.environ.get("IFX_BENCH_1M") == "1"
+
 EXECUTEMANY_SIZES = [1_000, 10_000, 100_000]
 SELECT_SIZES = [1_000, 10_000, 100_000]
-WIDTH_COLUMNS = [5, 20, 50]
+if _BIG:
+    EXECUTEMANY_SIZES = [*EXECUTEMANY_SIZES, 1_000_000]
+    SELECT_SIZES = [*SELECT_SIZES, 1_000_000]
+
+WIDTH_COLUMNS = [5, 20, 50, 100]  # added 100-column case for codec stress
 
 
 @pytest.fixture(scope="module")
@@ -80,7 +90,7 @@ def test_executemany_scaling(
       10k rows → 5 rounds (~1.1 s each = 5.5 s)
       100k rows → 3 rounds (~11 s each = 33 s)
     """
-    rounds_for = {1_000: 10, 10_000: 5, 100_000: 3}
+    rounds_for = {1_000: 10, 10_000: 5, 100_000: 3, 1_000_000: 2}
     table = f"p34_em_{n_rows}"
     cur = txn_conn.cursor()
     with contextlib.suppress(informix_db.Error):
@@ -145,10 +155,10 @@ def scaling_select_table(conn_params: ConnParams) -> Iterator[str]:
         f" value FLOAT, label VARCHAR(32))"
     )
     setup_conn.commit()
-    # Insert in 10k chunks, committing after each so a failure mid-loop
-    # surfaces instead of silently dropping rows.
+    # Population size scales with whether the 1M tests are enabled.
+    target = 1_000_000 if _BIG else 100_000
     chunk = 10_000
-    for base in range(0, 100_000, chunk):
+    for base in range(0, target, chunk):
         rows = [
             (base + i, f"name_{base + i:06d}", (base + i) * 7,
              float(base + i) * 1.5, f"L{(base + i) % 100:02d}")
@@ -161,8 +171,8 @@ def scaling_select_table(conn_params: ConnParams) -> Iterator[str]:
     # Verify population — fail loud if the multi-chunk insert dropped rows.
     cur.execute(f"SELECT COUNT(*) FROM {table}")
     (count,) = cur.fetchone()
-    assert count == 100_000, (
-        f"fixture failed: {table} has {count} rows, expected 100000"
+    assert count == target, (
+        f"fixture failed: {table} has {count} rows, expected {target}"
     )
     try:
         yield table
@@ -216,7 +226,7 @@ def test_select_scaling(
     median grows with N, something's wrong (memory pressure, GC,
     codec degradation).
     """
-    rounds_for = {1_000: 10, 10_000: 5, 100_000: 3}
+    rounds_for = {1_000: 10, 10_000: 5, 100_000: 3, 1_000_000: 2}
 
     cur = select_read_conn.cursor()
     cur.execute(f"SELECT COUNT(*) FROM {scaling_select_table}")
@@ -355,3 +365,97 @@ def test_select_type_mix_1000_rows(
         return len(rows)
 
     benchmark.pedantic(run, rounds=10, iterations=1)
+
+
+# ----------------------------------------------------------------------------
+# Memory profile at 100k rows
+# ----------------------------------------------------------------------------
+
+
+def test_streaming_fetch_100k_memory_profile(
+    select_read_conn: informix_db.Connection,
+    scaling_select_table: str,
+) -> None:
+    """Sample RSS during a 100k-row iteration. Verifies the cursor's
+    memory footprint scales reasonably with row count.
+
+    Current cursor materializes the full result set on execute() (Phase
+    17 in-memory model), so RSS WILL grow proportional to row count.
+    The test documents the actual growth shape and provides a
+    regression baseline — if growth ever exceeds 500 MB for a 100k-row
+    fetch, something is leaking heavily.
+
+    Future server-cursor mode would maintain constant memory; this
+    test would then confirm flatness.
+    """
+    import gc
+    import resource
+
+    def rss_kb() -> int:
+        # Use /proc/self/status VmRSS for *current* RSS, not peak.
+        # ``ru_maxrss`` is monotonic peak — a 68 MB peak from earlier
+        # in the test session masks any fluctuation from this fetch.
+        try:
+            from pathlib import Path
+            with Path("/proc/self/status").open() as f:
+                for line in f:
+                    if line.startswith("VmRSS:"):
+                        return int(line.split()[1])
+        except OSError:
+            pass
+        return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+
+    gc.collect()
+    pre_execute_rss = rss_kb()
+
+    cur = select_read_conn.cursor()
+    cur.execute(f"SELECT FIRST 100000 * FROM {scaling_select_table}")
+    post_execute_rss = rss_kb()  # rows materialized into self._rows here
+
+    samples: list[tuple[int, int]] = []
+    rows_seen = 0
+    samples.append((0, post_execute_rss))
+
+    for _ in cur:
+        rows_seen += 1
+        if rows_seen % 10_000 == 0:
+            samples.append((rows_seen, rss_kb()))
+    cur.close()
+    gc.collect()
+    final_rss = rss_kb()
+
+    materialization_growth = post_execute_rss - pre_execute_rss
+    iteration_growth = final_rss - post_execute_rss
+
+    print("\nstreaming fetch 100k memory profile:")
+    print(f"  pre-execute RSS:    {pre_execute_rss:>9} KB")
+    print(f"  post-execute RSS:   {post_execute_rss:>9} KB  "
+          f"(Δ {materialization_growth:+} KB — materialization cost)")
+    for rows, rss in samples[1:]:
+        print(f"  rows={rows:>6}  rss={rss:>9} KB  "
+              f"(Δ from post-execute: {rss - post_execute_rss:+} KB)")
+    print(f"  final={final_rss} KB after cur.close() + gc.collect()")
+    print("  --")
+    print(f"  rows iterated: {rows_seen}")
+    print(f"  materialization: ~{materialization_growth * 1024 // 100_000} "
+          f"bytes/row (100k rows of 5 cols)")
+    print(f"  iteration-side allocation: {iteration_growth} KB total "
+          f"(should be ~0 — iteration doesn't allocate)")
+
+    total_growth_kb = final_rss - pre_execute_rss
+    # 500 MB ceiling for 100k rows = ~5 KB/row max. Real cost is ~50-100
+    # bytes/row (5 cols x tuple+strings+ints) so this is plenty of
+    # headroom for the regression check.
+    assert total_growth_kb < 500_000, (
+        f"100k-row fetch grew RSS by {total_growth_kb} KB — cursor is leaking"
+    )
+    assert rows_seen == 100_000, (
+        f"expected 100000 rows iterated, got {rows_seen}"
+    )
+    # Iteration-side allocation should be near-zero — fetchall() / for
+    # loop just walks the already-materialized self._rows list. Allow
+    # 5 MB slack for opportunistic allocator behavior.
+    assert iteration_growth < 5_000, (
+        f"iteration over already-fetched rows grew RSS by "
+        f"{iteration_growth} KB — unexpected per-row allocation"
+    )
diff --git a/uv.lock b/uv.lock
index 39e346e..0a3ed7b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -34,7 +34,7 @@ wheels = [
 
 [[package]]
 name = "informix-db"
-version = "2026.5.5.8"
+version = "2026.5.5.9"
 source = { editable = "." }
 
 [package.optional-dependencies]