diff --git a/docs/DECISION_LOG.md b/docs/DECISION_LOG.md index 17f885a..469c2c5 100644 --- a/docs/DECISION_LOG.md +++ b/docs/DECISION_LOG.md @@ -659,6 +659,66 @@ This is the third instance of "compare against JDBC at the byte level" diagnosti --- +## 2026-05-04 — Phase 9: smart-LOB BLOB/CLOB locator decoding (Phase 10 deferred for full fetch) + +**Status**: active +**Decision**: Smart-LOB columns are decoded into typed `informix_db.BlobLocator` / `informix_db.ClobLocator` objects that wrap the 72-byte server-side reference. Full data retrieval (fetching the actual bytes) is deferred to **Phase 10** because it requires implementing two new wire-protocol families: + +### How smart-LOBs surface in the wire protocol + +Surprise discovery: **BLOB and CLOB columns do not appear with their nominal type codes (102 / 101) in the SQ_DESCRIBE response.** Instead, the server presents them as `UDTFIXED` (type 41) with: +- `extended_id = 10` for BLOB, `11` for CLOB +- `extended_owner = "informix"`, `extended_name = "blob"` / `"clob"` +- `encoded_length = 72` (locator size) + +The 72 bytes that arrive in the SQ_TUPLE are the locator — an opaque server-side pointer into the smart-LOB sbspace. They contain enough information for the server to find the actual data (sbspace ID, blob ID, etc.) but they are NOT the data. + +### What it takes to retrieve the actual bytes (Phase 10 work) + +Captured JDBC wire flow shows that retrieving a BLOB requires: + +1. **`SQ_FPROUTINE` (tag 103)** — fast-path RPC to invoke `ifx_lo_open(locator, mode=4)` (LO_RDONLY). This is a *separate* execution path from PREPARE/EXECUTE/FETCH. It includes its own parameter-marshaling format with UDT support (the locator goes in as an `IfxUDT` with `extended_type_name="blob"` and the 72 bytes). The response carries back a small int — the file descriptor (`loFd`). + +2. **`SQ_LODATA` (tag 97)** — bulk byte transfer. Body: `[short subCom][short loFd][int length][short bufSize=32000]` with sub-commands 0=LO_READ, 1=LO_READWITHSEEK, 2=LO_WRITE. Response is `[short SQ_LODATA][short opType][int totalSize][short chunk_size][bytes data]...`. + +3. **Another `SQ_FPROUTINE`** to invoke `ifx_lo_close(loFd)`. + +Writing a smart-LOB is even more involved: `ifx_lo_create(spec, mode, blob)` returns a fresh locator AND a file descriptor, then `SQ_LODATA(LO_WRITE, ...)` streams the bytes, then `ifx_lo_close`. The locator is then passed as an INSERT parameter (also via UDT marshaling). + +### Server-side prerequisites + +Building on Phase 7/8 setup, smart-LOBs additionally need: +1. **An sbspace** (Phase 6.f setup): `onspaces -c -S sbspace1 -p /path -o 0 -s 50000 -Df "AVG_LO_SIZE=100"` +2. **`SBSPACENAME` config**: `onmode -wm SBSPACENAME=sbspace1` — the default sbspace name. Without this, `ifx_lo_create` fails with `-Invalid default sbspace name (sbspace).` (the default is the literal string `"sbspace"` which doesn't exist). + +### What ships in Phase 9 + +- `informix_db.BlobLocator(raw: bytes)` — 72-byte frozen dataclass, validates length on construction, has a safe `__repr__` that doesn't leak the locator bytes (they're internal/opaque to the client). +- `informix_db.ClobLocator(raw: bytes)` — same shape, distinct type. Same-bytes locators of different families compare *unequal* by design. +- Row decoder branch in `_resultset.parse_tuple_payload` that detects `UDTFIXED` + extended_id 10/11 and wraps the bytes appropriately. +- Wire constants `SQ_LODATA = 97`, `SQ_FPROUTINE = 103`, `SQ_FPARAM = 104` added to `_messages.py` for Phase 10 use. + +### Test coverage + +- 11 unit tests (`tests/test_blob_locator_unit.py`) exercising construction, immutability, equality, hash, repr safety, and size validation. No Informix needed. +- 4 integration tests (`tests/test_smart_lob.py`) verifying that SELECT on a BLOB column returns a `BlobLocator`, the description metadata is correct, the result is immutable, and the repr doesn't leak. The fixture seeds test data via the JDBC reference client (since smart-LOB writes also need the deferred protocols). + +Total project tests: **64 unit + 111 integration = 175 tests**. + +### Why "research-first, implementation-after" is becoming the default for big-protocol phases + +Phases 6.f, 8, and 9 all followed the same arc: spend the first half of the phase on "what does the wire actually look like?" research (capturing JDBC traces, reading decompiled source, configuring the server until JDBC works). Then either ship implementation in the same phase (Phase 8) or split into a separate later phase (6.f → 8, 9 → 10). The split is appropriate when the protocol surface is materially larger than what we can validate in one focused session. + +For Phase 9, the deferred work is genuinely substantial: +- SQ_FPROUTINE alone is a new RPC framework with its own request/response format +- It needs UDT parameter marshaling (`extended_owner` + `extended_name` + raw bytes) +- SQ_LODATA needs read+write paths with chunk streaming +- The cursor needs new state-machine awareness (open the LOB, fetch, close — all between cursor open and CLOSE) + +Estimating Phase 10 at ~2x the protocol surface of Phase 8. + +--- + ## (template — copy below this line for new entries) ``` diff --git a/src/informix_db/__init__.py b/src/informix_db/__init__.py index 25ce41e..6d8e91f 100644 --- a/src/informix_db/__init__.py +++ b/src/informix_db/__init__.py @@ -23,7 +23,7 @@ from __future__ import annotations from importlib.metadata import PackageNotFoundError, version from .connections import Connection -from .converters import IntervalYM +from .converters import BlobLocator, ClobLocator, IntervalYM from .exceptions import ( DatabaseError, DataError, @@ -49,6 +49,8 @@ except PackageNotFoundError: __version__ = "0.0.0+local" __all__ = [ + "BlobLocator", + "ClobLocator", "Connection", "DataError", "DatabaseError", diff --git a/src/informix_db/_messages.py b/src/informix_db/_messages.py index 623d1c0..6cbd4de 100644 --- a/src/informix_db/_messages.py +++ b/src/informix_db/_messages.py @@ -87,13 +87,26 @@ class MessageType(IntEnum): SQ_XASTATE = 74 SQ_XOPEN = 82 - # --- BLOB / LOB — Phase 6+ --- + # --- BLOB / LOB --- + # Phase 8 (BYTE/TEXT in-row blobs) SQ_FETCHBLOB = 38 SQ_BLOB = 39 SQ_BBIND = 41 SQ_SBBIND = 52 SQ_FILE_READ = 106 SQ_FILE_WRITE = 107 + # Phase 9+ (smart-LOB BLOB/CLOB) + SQ_LODATA = 97 # smart-LOB data transfer with sub-commands: + # 0=LO_READ, 1=LO_READWITHSEEK, 2=LO_WRITE. + # Body: [short subCom][short loFd][int length] + # [short bufSize=32000] (+ [int8 offset][short whence] + # for LO_READWITHSEEK). See IfxSqli.sendLoData line 4864. + SQ_FPROUTINE = 103 # fast-path RPC to invoke server-side stored + # functions like ifx_lo_open / ifx_lo_close / + # ifx_lo_create. Used to obtain a file descriptor + # for an open smart-LOB locator. Implements its own + # parameter-marshaling format with UDT support. + SQ_FPARAM = 104 # parameter metadata for SQ_FPROUTINE # --- RPC sub-protocol (range 200-205) — Phase 6+ --- SQ_INVOKE = 200 diff --git a/src/informix_db/_resultset.py b/src/informix_db/_resultset.py index a3c56af..4b7fe59 100644 --- a/src/informix_db/_resultset.py +++ b/src/informix_db/_resultset.py @@ -285,6 +285,21 @@ def parse_tuple_payload( values.append(_decode_interval(raw, col.encoded_length)) continue + # BLOB / CLOB (smart-LOBs): the SQ_DESCRIBE response presents + # these as UDTFIXED (type 41) with extended_id 10 (BLOB) or 11 + # (CLOB) and encoded_length = 72 (locator size). The 72 bytes + # we read here are an opaque server-side reference, NOT the + # actual data. To fetch bytes, the client must call ``ifx_lo_open`` + # via SQ_FPROUTINE then SQ_LODATA(LO_READ) — deferred to Phase 10. + if base == int(IfxType.UDTFIXED) and col.extended_id in (10, 11): + from .converters import BlobLocator, ClobLocator + width = col.encoded_length + raw = payload[offset:offset + width] + offset += width + cls = BlobLocator if col.extended_id == 10 else ClobLocator + values.append(cls(raw=bytes(raw))) + continue + # Fixed-width types width = FIXED_WIDTHS.get(base) if width is None: diff --git a/src/informix_db/converters.py b/src/informix_db/converters.py index 20e2dc3..e7c8498 100644 --- a/src/informix_db/converters.py +++ b/src/informix_db/converters.py @@ -24,6 +24,59 @@ from collections.abc import Callable from ._types import IfxType, base_type +@dataclasses.dataclass(frozen=True, slots=True) +class BlobLocator: + """Reference to a smart-LOB BLOB stored in an sbspace. + + A BLOB column doesn't return the actual bytes in the SQL row — it + returns a 72-byte server-side locator (a pointer into the smart-large- + object space). To retrieve the bytes, the client must invoke the + fast-path RPC ``ifx_lo_open(locator, LO_RDONLY)`` followed by an + ``SQ_LODATA`` data-transfer round-trip. + + Phase 9 surfaces these locators as typed objects so users can recognize + BLOB columns and not mistake the raw 72 bytes for actual data. + Implementing ``.read()`` requires the SQ_FPROUTINE + SQ_LODATA wire + protocol, deferred to Phase 10. + + The ``raw`` field is the on-the-wire bytes — opaque to clients, + meaningful only when handed back to the server via ifx_lo_open. + """ + + raw: bytes + + def __post_init__(self) -> None: + if len(self.raw) != 72: + raise ValueError( + f"BlobLocator expects 72 bytes, got {len(self.raw)}" + ) + + def __repr__(self) -> str: + return "BlobLocator(<72-byte server reference>)" + + +@dataclasses.dataclass(frozen=True, slots=True) +class ClobLocator: + """Reference to a smart-LOB CLOB stored in an sbspace. + + Same shape as :class:`BlobLocator` but for character data. Differs + only in extended-id (11 instead of 10) on the wire and in how the + bytes should be decoded if/when retrieved (per the connection's + DB_LOCALE). + """ + + raw: bytes + + def __post_init__(self) -> None: + if len(self.raw) != 72: + raise ValueError( + f"ClobLocator expects 72 bytes, got {len(self.raw)}" + ) + + def __repr__(self) -> str: + return "ClobLocator(<72-byte server reference>)" + + @dataclasses.dataclass(frozen=True, slots=True) class IntervalYM: """Year-month interval — Informix's only temporal duration that can't diff --git a/tests/reference/RefBlobTest.java b/tests/reference/RefBlobTest.java new file mode 100644 index 0000000..99c7f41 --- /dev/null +++ b/tests/reference/RefBlobTest.java @@ -0,0 +1,22 @@ +package tests.reference; +import java.sql.*; +public class RefBlobTest { + public static void main(String[] args) throws Exception { + String table = args[0], payload = args[1]; + Class.forName("com.informix.jdbc.IfxDriver"); + try (Connection c = DriverManager.getConnection( + "jdbc:informix-sqli://127.0.0.1:9088/testdb:INFORMIXSERVER=informix", + "informix", "in4mix")) { + c.setAutoCommit(true); + try (Statement s = c.createStatement()) { + s.execute("CREATE TABLE " + table + " (id INT, data BLOB)"); + } + try (PreparedStatement ps = c.prepareStatement( + "INSERT INTO " + table + " VALUES (?, ?)")) { + ps.setInt(1, 1); + ps.setBytes(2, payload.getBytes()); + ps.executeUpdate(); + } + } + } +} diff --git a/tests/reference/RefClient.java b/tests/reference/RefClient.java index 925b70c..1e1b436 100644 --- a/tests/reference/RefClient.java +++ b/tests/reference/RefClient.java @@ -44,6 +44,7 @@ public class RefClient { case "select-1": runSelect1(); break; case "dml-cycle": runDmlCycle(); break; case "byte-cycle": runByteCycle(); break; + case "blob-cycle": runBlobCycle(); break; case "all": runConnectOnly(); runSelect1(); @@ -193,4 +194,45 @@ public class RefClient { } } } + + // ------------------------------------------------------------------- + // Scenario E: BLOB (smart-LOB) write+read cycle. Requires: + // - logged DB (env IFX_DATABASE=testdb) + // - sbspace1 (smart-large-object space) already created + // Set IFX_DATABASE=testdb before running. + // ------------------------------------------------------------------- + static void runBlobCycle() throws SQLException { + log("=== blob-cycle ==="); + String table = "blob_" + Long.toHexString(System.nanoTime()); + try (Connection c = DriverManager.getConnection(url(), USER, PASSWORD)) { + c.setAutoCommit(true); + + try (Statement s = c.createStatement()) { + log("CREATE TABLE %s (id INT, data BLOB)", table); + s.execute("CREATE TABLE " + table + " (id INT, data BLOB)"); + } + + byte[] payload = "hello smart-LOB blob from JDBC".getBytes(); + try (PreparedStatement ps = c.prepareStatement( + "INSERT INTO " + table + " VALUES (?, ?)")) { + ps.setInt(1, 1); + ps.setBytes(2, payload); + int n = ps.executeUpdate(); + log("INSERT rowcount=%d (sent %d bytes)", n, payload.length); + } + + try (Statement s = c.createStatement(); + ResultSet rs = s.executeQuery("SELECT id, data FROM " + table)) { + while (rs.next()) { + byte[] got = rs.getBytes(2); + log(" row: id=%d data.len=%d data=%s", rs.getInt(1), + got.length, new String(got)); + } + } + + try (Statement s = c.createStatement()) { + s.execute("DROP TABLE " + table); + } + } + } } diff --git a/tests/test_blob_locator_unit.py b/tests/test_blob_locator_unit.py new file mode 100644 index 0000000..25e4532 --- /dev/null +++ b/tests/test_blob_locator_unit.py @@ -0,0 +1,71 @@ +"""Phase 9 unit tests — BlobLocator/ClobLocator value semantics. + +These don't require Informix; they exercise the typed locator wrappers +directly to verify shape, immutability, equality, and repr safety. +""" + +from __future__ import annotations + +import dataclasses + +import pytest + +from informix_db import BlobLocator, ClobLocator + + +def test_blob_locator_holds_72_bytes() -> None: + raw = bytes(range(72)) + loc = BlobLocator(raw=raw) + assert loc.raw == raw + assert len(loc.raw) == 72 + + +def test_clob_locator_holds_72_bytes() -> None: + raw = bytes(reversed(range(72))) + loc = ClobLocator(raw=raw) + assert loc.raw == raw + + +@pytest.mark.parametrize("size", [0, 1, 71, 73, 144]) +def test_locator_rejects_wrong_size(size: int) -> None: + """The constructor enforces exactly 72 bytes.""" + with pytest.raises(ValueError, match="72 bytes"): + BlobLocator(raw=bytes(size)) + with pytest.raises(ValueError, match="72 bytes"): + ClobLocator(raw=bytes(size)) + + +def test_locator_is_frozen() -> None: + """Instances are immutable per ``frozen=True`` dataclass decorator.""" + loc = BlobLocator(raw=bytes(72)) + with pytest.raises(dataclasses.FrozenInstanceError): + loc.raw = b"x" * 72 # type: ignore[misc] + + +def test_blob_and_clob_locator_are_distinct_types() -> None: + """Same-bytes locators of different families compare unequal.""" + raw = bytes(72) + blob = BlobLocator(raw=raw) + clob = ClobLocator(raw=raw) + assert blob != clob + assert not isinstance(blob, ClobLocator) + assert not isinstance(clob, BlobLocator) + + +def test_locator_equality() -> None: + """Same bytes + same family → equal.""" + raw = b"\x01\x02\x03" + bytes(69) + a = BlobLocator(raw=raw) + b = BlobLocator(raw=raw) + assert a == b + assert hash(a) == hash(b) + + +def test_locator_repr_omits_raw_bytes() -> None: + """``repr`` doesn't leak the opaque locator bytes (no use to user).""" + raw = b"\xde\xad\xbe\xef" + bytes(68) + loc = BlobLocator(raw=raw) + r = repr(loc) + assert "BlobLocator" in r + assert "deadbeef" not in r.lower() + assert raw.hex() not in r diff --git a/tests/test_smart_lob.py b/tests/test_smart_lob.py new file mode 100644 index 0000000..6c03d85 --- /dev/null +++ b/tests/test_smart_lob.py @@ -0,0 +1,186 @@ +"""Phase 9 integration tests — smart-LOB BLOB/CLOB locator decoding. + +Smart-LOB columns (BLOB type 102, CLOB type 101) are presented in the +SQ_DESCRIBE response as ``UDTFIXED`` (type 41) with extended_id 10 (BLOB) +or 11 (CLOB) and ``encoded_length=72`` (the locator size). The 72 bytes +in the SQ_TUPLE are an opaque server-side reference, NOT the actual data. + +This phase surfaces the locator as a typed :class:`informix_db.BlobLocator` +or :class:`informix_db.ClobLocator` so users can recognize the column +type and not mistake the raw 72 bytes for actual content. Retrieving +the actual bytes requires the ``SQ_FPROUTINE`` + ``SQ_LODATA`` wire +protocols, deferred to Phase 10. + +Test data is populated via the JDBC reference client (a Java helper) +since our driver doesn't yet support smart-LOB writes either. +""" + +from __future__ import annotations + +import contextlib +import dataclasses +import os +import shutil +import subprocess +from collections.abc import Iterator +from pathlib import Path + +import pytest + +import informix_db +from tests.conftest import ConnParams + +pytestmark = pytest.mark.integration + + +def _connect(params: ConnParams) -> informix_db.Connection: + return informix_db.connect( + host=params.host, + port=params.port, + user=params.user, + password=params.password, + database=params.database, + server=params.server, + connect_timeout=10.0, + read_timeout=10.0, + autocommit=True, + ) + + +def _java_available() -> bool: + """JDBC reference client requires java + the IfxJdbc jar.""" + if not shutil.which("java"): + return False + return Path("build/ifxjdbc.jar").exists() and Path("build/tests").exists() + + +@pytest.fixture +def blob_table_with_data( + logged_db_params: ConnParams, +) -> Iterator[str]: + """Create a BLOB table and seed it via the JDBC reference client. + + Smart-LOB writes require the SQ_FPROUTINE + SQ_LODATA protocols + that our driver doesn't implement yet (Phase 10). We use the + JDBC reference client (``RefBlob``) to seed test data. + """ + if not _java_available(): + pytest.skip( + "JDBC reference client unavailable (need java + build/ifxjdbc.jar)" + ) + + table = "t_blob_test" + # Drop if exists + with _connect(logged_db_params) as conn: + cur = conn.cursor() + with contextlib.suppress(Exception): + cur.execute(f"DROP TABLE {table}") + + # Use Java helper to populate (compile RefBlob inline if needed) + helper_dir = Path("build/tests/reference") + helper_dir.mkdir(parents=True, exist_ok=True) + helper_src = Path("tests/reference/RefBlobTest.java") + if not helper_src.exists(): + helper_src.write_text( + 'package tests.reference;\n' + 'import java.sql.*;\n' + 'public class RefBlobTest {\n' + ' public static void main(String[] args) throws Exception {\n' + ' String table = args[0], payload = args[1];\n' + ' Class.forName("com.informix.jdbc.IfxDriver");\n' + ' try (Connection c = DriverManager.getConnection(\n' + ' "jdbc:informix-sqli://127.0.0.1:9088/testdb:INFORMIXSERVER=informix",\n' + ' "informix", "in4mix")) {\n' + ' c.setAutoCommit(true);\n' + ' try (Statement s = c.createStatement()) {\n' + ' s.execute("CREATE TABLE " + table + " (id INT, data BLOB)");\n' + ' }\n' + ' try (PreparedStatement ps = c.prepareStatement(\n' + ' "INSERT INTO " + table + " VALUES (?, ?)")) {\n' + ' ps.setInt(1, 1);\n' + ' ps.setBytes(2, payload.getBytes());\n' + ' ps.executeUpdate();\n' + ' }\n' + ' }\n' + ' }\n' + '}\n' + ) + subprocess.run( + [ + "javac", "-cp", "build/ifxjdbc.jar", + "-d", "build/", str(helper_src), + ], + check=True, capture_output=True, + ) + subprocess.run( + [ + "java", "-cp", "build/ifxjdbc.jar:build/", + "tests.reference.RefBlobTest", table, "hello smart-lob bytes", + ], + check=True, capture_output=True, + env={**os.environ, "IFX_DATABASE": "testdb"}, + ) + + try: + yield table + finally: + with _connect(logged_db_params) as conn: + cur = conn.cursor() + with contextlib.suppress(Exception): + cur.execute(f"DROP TABLE {table}") + + +def test_blob_column_returns_blob_locator( + logged_db_params: ConnParams, blob_table_with_data: str +) -> None: + """SELECTing a BLOB column returns a :class:`BlobLocator`.""" + with _connect(logged_db_params) as conn: + cur = conn.cursor() + cur.execute(f"SELECT id, data FROM {blob_table_with_data}") + rows = cur.fetchall() + assert len(rows) == 1 + assert rows[0][0] == 1 + assert isinstance(rows[0][1], informix_db.BlobLocator) + assert len(rows[0][1].raw) == 72 + + +def test_blob_column_description_metadata( + logged_db_params: ConnParams, blob_table_with_data: str +) -> None: + """``cursor.description`` for BLOB column reports type=41 (UDTFIXED) size=72.""" + with _connect(logged_db_params) as conn: + cur = conn.cursor() + cur.execute(f"SELECT id, data FROM {blob_table_with_data} WHERE 1=0") + # description is (name, type_code, display_size, internal_size, + # precision, scale, null_ok) + assert cur.description is not None + data_col = cur.description[1] + assert data_col[0] == "data" + assert data_col[1] == 41 # UDTFIXED + assert data_col[2] == 72 # display_size = locator size + + +def test_blob_locator_is_immutable( + logged_db_params: ConnParams, blob_table_with_data: str +) -> None: + """BlobLocator is frozen: the 72-byte ref can't be reassigned in place.""" + with _connect(logged_db_params) as conn: + cur = conn.cursor() + cur.execute(f"SELECT data FROM {blob_table_with_data}") + (locator,) = cur.fetchone() + with pytest.raises(dataclasses.FrozenInstanceError): + locator.raw = b"x" * 72 # type: ignore[misc] + + +def test_blob_locator_repr_is_safe( + logged_db_params: ConnParams, blob_table_with_data: str +) -> None: + """``repr(locator)`` doesn't leak the raw bytes (which are opaque/internal).""" + with _connect(logged_db_params) as conn: + cur = conn.cursor() + cur.execute(f"SELECT data FROM {blob_table_with_data}") + (locator,) = cur.fetchone() + r = repr(locator) + assert "BlobLocator" in r + # raw bytes must NOT leak into repr + assert locator.raw.hex() not in r