informix-db/tests/test_pdu_match.py
Ryan Malloy ea00990774 Phase 1 polish: PDU match test catches a real capability-int bug
Polish item #1: byte-for-byte regression test that asserts our
generated login PDU is structurally identical to JDBC's reference
captured in docs/CAPTURES/01-connect-only.socat.log.

The test (tests/test_pdu_match.py) immediately caught a real bug:
the capability section was misread during Phase 0 byte-decoding.
Earlier text claimed Cap_1=1, Cap_2=0x3c000000, Cap_3=0 — actually:

  Cap_1 = 0x0000013c   (= (capability_class << 8) | protocol_version
                          where protocol_version = 0x3c = PF_PROT_SQLI_0600)
  Cap_2 = 0
  Cap_3 = 0

The misalignment was: the 0x3c byte I attributed to Cap_2's high
byte was actually Cap_1's low byte. The dev-image server is
permissive enough to accept arbitrary capability values, so the
connection succeeded even with the wrong bytes — but the PDU wasn't
structurally identical to JDBC's reference. SERVER-ACCEPTS ≠
STRUCTURALLY-CORRECT. This is exactly why the byte-for-byte diff
was the right polish item; "it connects" was a false ceiling.

After fix:
- 6 PDU-match tests assert byte-for-byte equality at offsets 2..280
  (the structural prefix: SLheader sans length, all login markers,
  capability ints, username, password, protocol IDs, env vars).
- Bytes 280+ legitimately differ per process (PID, TID, hostname,
  cwd, AppName) — those are NOT asserted.
- Length field (offsets 0..1) also legitimately differs because our
  PDU has shorter env list and AppName.
- Test uses monkey-patched IfxSocket so no network is needed.

Polish item #2: Makefile per global CLAUDE.md convention. Targets:
install, lint, format, test, test-integration, test-all, test-pdu,
ifx-up/down/logs/shell/status, capture (re-run JDBC scenarios under
socat), clean. `make` (no target) prints help.

Doc updates:
- PROTOCOL_NOTES.md §12: corrected capability section with the
  actual values and an explanation of the methodology lesson
- DECISION_LOG.md: new entry recording the correction with a
  pointer to the regression test and the takeaway

Side artifacts:
- docs/CAPTURES/03-py-connect-only.socat.log
- docs/CAPTURES/04-py-no-database.socat.log
- docs/CAPTURES/05-py-fixed-caps.socat.log

Test counts: 40 unit + 6 integration = 46 total, all green, ruff clean.
2026-05-02 20:18:03 -06:00

158 lines
5.8 KiB
Python

"""Regression test: our generated login PDU is byte-identical to JDBC's.
Phase 1 polish artifact. We monkeypatch ``IfxSocket`` with a fake that
captures the bytes we send, then compare those bytes to the captured
JDBC reference PDU in ``docs/CAPTURES/01-connect-only.socat.log``.
Bytes 2..280 of the PDU are the *structural* prefix — SLheader (sans
length field), all login markers, the three capability ints, username,
password, protocol identifiers, and environment variables. These MUST
be byte-identical to JDBC's PDU; any divergence is a real bug (we
caught one this way already — the misaligned capability ints).
Bytes 280+ contain process-specific fields (PID, thread ID, hostname,
cwd, AppName) that legitimately differ per Python process. The test
asserts only the structural prefix.
"""
from __future__ import annotations
import re
from pathlib import Path
import pytest
import informix_db
from informix_db import connections
def _extract_first_client_pdu(log_path: Path) -> bytes:
"""Pull the first '>' (client→server) hex dump out of a socat -x log."""
text = log_path.read_text()
match = re.search(r"^> .*?length=\d+.*?\n (.*?)\n", text, re.MULTILINE | re.DOTALL)
assert match, f"no client→server message found in {log_path}"
return bytes.fromhex(match.group(1).strip().replace(" ", ""))
@pytest.fixture
def jdbc_reference_pdu() -> bytes:
"""The IBM JDBC reference login PDU, captured under socat in Phase 0."""
return _extract_first_client_pdu(
Path(__file__).parent.parent / "docs/CAPTURES/01-connect-only.socat.log"
)
@pytest.fixture
def python_login_pdu(monkeypatch: pytest.MonkeyPatch) -> bytes:
"""Capture the bytes our pure-Python client emits without touching the network."""
captured = bytearray()
class _CapturingSocket:
"""Fake socket: captures writes, then raises to stop the connect flow."""
def __init__(self, *_args: object, **_kwargs: object) -> None:
self._closed = False
@property
def closed(self) -> bool:
return self._closed
def write_all(self, data: bytes) -> None:
captured.extend(data)
# Stop the connect flow before it tries to read a server response.
raise informix_db.OperationalError("stub: stop after login PDU")
def read_exact(self, _n: int) -> bytes:
raise informix_db.OperationalError("stub: never reached")
def close(self) -> None:
self._closed = True
monkeypatch.setattr(connections, "IfxSocket", _CapturingSocket)
with pytest.raises(informix_db.OperationalError, match="stub"):
informix_db.connect(
host="dont.care", port=9088,
user="informix", password="in4mix",
database=None, server="informix",
)
return bytes(captured)
# ---------------------------------------------------------------------------
# Structural-prefix tests
# ---------------------------------------------------------------------------
# Offset where process-specific fields begin (PID/TID/hostname/cwd/AppName).
# Empirically determined by running the diff after fixing the caps ints
# (see DECISION_LOG.md). Anything before this MUST match byte-for-byte.
STRUCTURAL_PREFIX_END = 280
def test_slheader_protocol_version_matches(
python_login_pdu: bytes, jdbc_reference_pdu: bytes
) -> None:
"""The SLheader's protocol-version byte (offset 3) must be 60 (PF_PROT_SQLI_0600)."""
assert python_login_pdu[3] == jdbc_reference_pdu[3] == 0x3C
def test_slheader_type_byte_matches(
python_login_pdu: bytes, jdbc_reference_pdu: bytes
) -> None:
"""The SLheader's slType byte (offset 2) must be 1 (SLTYPE_CONREQ)."""
assert python_login_pdu[2] == jdbc_reference_pdu[2] == 0x01
def test_capability_ints_match_reference(
python_login_pdu: bytes, jdbc_reference_pdu: bytes
) -> None:
"""Cap_1 / Cap_2 / Cap_3 (offsets 65..76) must be byte-identical to JDBC.
This is the test that would have caught the original capability-int bug
(where we sent caps_1=1, caps_2=0x3c000000 instead of caps_1=0x13c, caps_2=0).
"""
assert python_login_pdu[65:77] == jdbc_reference_pdu[65:77]
def test_structural_prefix_matches(
python_login_pdu: bytes, jdbc_reference_pdu: bytes
) -> None:
"""Everything from byte 2 to ``STRUCTURAL_PREFIX_END`` must match exactly.
Skips:
* Bytes 0..1 (SLheader length): differs because Python sends fewer
env vars / shorter AppName, so total length differs.
* Bytes ``STRUCTURAL_PREFIX_END``..end: process-specific fields
(PID, TID, hostname, cwd, AppName).
"""
py_prefix = python_login_pdu[2:STRUCTURAL_PREFIX_END]
ja_prefix = jdbc_reference_pdu[2:STRUCTURAL_PREFIX_END]
if py_prefix != ja_prefix:
# Find first divergence and report it with context.
for i, (a, b) in enumerate(zip(py_prefix, ja_prefix, strict=False)):
if a != b:
off = i + 2
pytest.fail(
f"structural-prefix mismatch at offset {off}: "
f"Python={a:#04x} JDBC={b:#04x}\n"
f" Python[{off - 4}..{off + 4}]: "
f"{python_login_pdu[off - 4:off + 5].hex(' ')}\n"
f" JDBC [{off - 4}..{off + 4}]: "
f"{jdbc_reference_pdu[off - 4:off + 5].hex(' ')}"
)
assert py_prefix == ja_prefix
def test_pdu_is_correctly_length_prefixed(python_login_pdu: bytes) -> None:
"""The SLheader's first 2 bytes must equal the total PDU length."""
declared_length = int.from_bytes(python_login_pdu[0:2], "big", signed=False)
assert declared_length == len(python_login_pdu)
def test_pdu_ends_with_sq_asceot(python_login_pdu: bytes) -> None:
"""Every login PDU must end with [short SQ_ASCEOT=127] (= 0x00 0x7f)."""
assert python_login_pdu[-2:] == b"\x00\x7f"