diff --git a/docs/DECISION_LOG.md b/docs/DECISION_LOG.md index 71c88bf..02b1e7a 100644 --- a/docs/DECISION_LOG.md +++ b/docs/DECISION_LOG.md @@ -219,6 +219,49 @@ For now we read-and-discard. Phase 5+ will surface these as ``Cursor.lastrowid`` --- +## 2026-05-04 — Parameter binding: SQ_BIND chained with SQ_EXECUTE in one PDU + +**Status**: active +**Decision**: ``Cursor.execute(sql, params)`` for DML sends one PDU containing SQ_BIND with all parameter values, immediately followed by SQ_EXECUTE. No separate CIDESCRIBE round trip — the server infers parameter types from the type tags we send in SQ_BIND. +**Why this matters**: skipping the CIDESCRIBE/IDESCRIBE handshake (which JDBC does for type-discovery) saves one round trip per execute. The server accepts our SQ_BIND directly because we provide explicit type codes for each parameter. + +PDU structure (verified against ``docs/CAPTURES/02-dml-cycle.socat.log`` msg[29]): +``` +[short SQ_ID=4][int SQ_BIND=5][short numparams] +for each param: + [short type][short indicator=0 or -1][short prec_or_encLen] + writePadded(rawbytes) # data + 0x00 pad if odd-length +[short SQ_EXECUTE=7] +[short SQ_EOT] +``` + +Per-type encoding (Phase 4 MVP): + +| Python type | IDS type code | Precision short | Data | +|-------------|---------------|-----------------|------| +| ``int`` (32-bit) | 2 (INT) | ``0x0a00`` (=2560 packed display-width=10/scale=0) | 4 bytes BE | +| ``int`` (64-bit) | 52 (BIGINT) | ``0x1300`` (=4864 packed width=19/scale=0) | 8 bytes BE | +| ``str`` | 0 (CHAR — server casts) | 0 | ``[short len][bytes]`` (writePadded adds even pad) | +| ``float`` | 3 (FLOAT/DOUBLE) | 0 | 8 bytes IEEE 754 | +| ``bool`` | 45 (BOOL) | 0 | 1 byte (0x01 or 0x00) | +| ``None`` | 0 | indicator=-1 | (no data) | + +**Surprise**: JDBC sends Python-string equivalents as **CHAR (type=0)**, not VARCHAR (type=13). The server handles conversion to the actual column type via internal CIDESCRIBE/IDESCRIBE inference. We do the same — string parameters always go out as CHAR. + +**Surprise**: integer precision is **packed** as ``(display_width << 8) | scale``. For INTEGER, that's ``(10 << 8) | 0 = 0x0a00 = 2560``. Initially looked like a bug (why would precision be 2560?) until I realized it's a packed field. Captured in cursor's ``_build_bind_execute_pdu`` and converters' ``_encode_int``. + +**Paramstyle**: we declare ``paramstyle = "numeric"`` (PEP 249), supporting ``:1``, ``:2`` placeholders. Internally we rewrite to ``?`` (Informix's native style) before sending PREPARE. Trivial regex; doesn't escape strings/comments — Phase 5 can add a proper SQL tokenizer for that edge case. + +--- + +## 2026-05-04 — SELECT vs DML branching: keyword-based, not nfields-based + +**Status**: active +**Decision**: ``Cursor.execute`` branches on the first word of the SQL (``SELECT`` → cursor-fetch path; everything else → execute-and-release path). Don't use ``nfields > 0`` from the DESCRIBE response. +**Why**: a parameterized INSERT (``INSERT INTO t VALUES (?, ?, ?)``) returns a DESCRIBE response with ``nfields > 0`` because the server describes the row that WILL be inserted. The ``nfields == 0`` heuristic that worked for non-parameterized DML breaks here. JDBC does the same via its ``IfxStatement`` / ``IfxPreparedStatement`` subclassing. + +--- + ## (template — copy below this line for new entries) ``` diff --git a/src/informix_db/converters.py b/src/informix_db/converters.py index 2a9a867..62cf45e 100644 --- a/src/informix_db/converters.py +++ b/src/informix_db/converters.py @@ -124,5 +124,81 @@ def decode(type_code: int, raw: bytes) -> object: return decoder(raw) -# Encoders — stubbed for Phase 4 parameter binding. +# --------------------------------------------------------------------------- +# Encoders for parameter binding (Phase 4) +# --------------------------------------------------------------------------- +# Returns ``(type_code, prec_short, raw_bytes)`` per parameter. +# Per-param SQ_BIND format: ``[short type][short ind=0][short prec][data]`` +# where data is ``writePadded(raw_bytes)`` (emit + pad-to-even). +# +# JDBC's IfxSqli.sendBind (line 844+) does precision encoding per type: +# INT/SERIAL: prec = 0x0a00 (packed width=10, scale=0) +# VARCHAR sent as CHAR (type=0): prec = 0 +# FLOAT (DOUBLE PRECISION): prec = 0 +# +# Strings get type=0 (CHAR) on the wire — Informix's server casts them +# to the declared column type via the CIDESCRIBE/IDESCRIBE handshake. + +EncodedParam = tuple[int, int, bytes] + + +def _encode_int(value: int) -> EncodedParam: + """Encode a Python int as Informix INTEGER (type=2, 4 bytes BE).""" + return (2, 0x0A00, value.to_bytes(4, "big", signed=True)) + + +def _encode_bigint(value: int) -> EncodedParam: + """Encode a Python int as Informix BIGINT (type=52, 8 bytes BE).""" + return (52, 0x1300, value.to_bytes(8, "big", signed=True)) + + +def _encode_str(value: str) -> EncodedParam: + """Encode a Python str as Informix CHAR (type=0, length-prefixed). + + JDBC sends Java strings as CHAR (type=0) on the wire — the server + handles conversion to the actual column type (CHAR/VARCHAR/NVARCHAR). + Format: ``[short length][bytes]`` (writePadded adds even-byte pad). + """ + encoded = value.encode("iso-8859-1") + raw = len(encoded).to_bytes(2, "big") + encoded + return (0, 0, raw) + + +def _encode_float(value: float) -> EncodedParam: + """Encode a Python float as Informix FLOAT (type=3, 8-byte IEEE 754).""" + return (3, 0, struct.pack("!d", value)) + + +def _encode_bool(value: bool) -> EncodedParam: + """Encode a Python bool as Informix BOOLEAN (type=45, 1 byte).""" + return (45, 0, b"\x01" if value else b"\x00") + + +def encode_param(value: object) -> EncodedParam: + """Pick an encoder based on the Python value's type. + + Returns ``(ifx_type, precision_short, raw_bytes)`` for the parameter. + Returns ``(0, 0, b"")`` and the caller must use indicator=-1 for None. + """ + if value is None: + return (0, 0, b"") + if isinstance(value, bool): # NB: must come before int (bool is int subclass) + return _encode_bool(value) + if isinstance(value, int): + # Pick INT vs BIGINT based on range. + if -0x80000000 <= value <= 0x7FFFFFFF: + return _encode_int(value) + return _encode_bigint(value) + if isinstance(value, float): + return _encode_float(value) + if isinstance(value, str): + return _encode_str(value) + raise NotImplementedError( + f"parameter binding for {type(value).__name__} not yet supported " + f"(Phase 4 MVP: int, float, str, bool, None)" + ) + + +# Phase 6+ adds: bytes/Bytes, datetime.date, datetime.datetime, Decimal, +# datetime.timedelta (INTERVAL), bytearray (BYTE), large strings (LVARCHAR). ENCODERS: dict[int, Callable[[object], bytes]] = {} diff --git a/src/informix_db/cursors.py b/src/informix_db/cursors.py index 0982ddc..f49f62c 100644 --- a/src/informix_db/cursors.py +++ b/src/informix_db/cursors.py @@ -27,6 +27,7 @@ from typing import TYPE_CHECKING, Any from ._messages import MessageType from ._protocol import IfxStreamReader, make_pdu_writer from ._resultset import ColumnInfo, parse_describe, parse_tuple_payload +from .converters import encode_param from .exceptions import ( DatabaseError, InterfaceError, @@ -43,6 +44,22 @@ if TYPE_CHECKING: _cursor_counter = itertools.count(1) +_NUMERIC_PLACEHOLDER_RE = __import__("re").compile(r":(\d+)") + + +def _rewrite_numeric_to_qmark(sql: str) -> str: + """Convert ``:1`` / ``:2`` placeholders (paramstyle="numeric") to ``?``. + + Informix's wire protocol uses ``?`` natively. Since we expose + ``paramstyle="numeric"`` in the public API (matches Informix + ESQL/C convention), we rewrite before sending. Trivial cases only + — strings and comments are NOT escaped, so SQL containing literal + ``:1`` inside string literals will be wrongly substituted. Phase 5 + can add a proper SQL tokenizer. + """ + return _NUMERIC_PLACEHOLDER_RE.sub("?", sql) + + def _generate_cursor_name() -> str: """Generate a unique cursor name per the JDBC convention. @@ -88,16 +105,23 @@ class Cursor: # -- PEP 249 methods --------------------------------------------------- def execute(self, operation: str, parameters: Any = None) -> None: - """Execute a single SQL statement. + """Execute a single SQL statement, optionally with bound parameters. - Phase 2 supports parameterless SQL. ``parameters`` is reserved - for Phase 4 (SQ_BIND parameter binding). + ``parameters`` is a sequence (tuple/list) matching the ``?`` or + ``:N`` placeholders in ``operation``. Phase 4 supports int, float, + str, bool, None. """ self._check_open() + + # Normalize parameters to a tuple for indexing. + params: tuple = () if parameters is not None: - raise NotSupportedError( - "parameter binding lands in Phase 4; pass SQL with literals for now" - ) + if isinstance(parameters, dict): + raise NotSupportedError("named parameters not yet supported (use positional)") + params = tuple(parameters) + + # If using paramstyle="numeric", rewrite :1 / :2 → ? + sql = _rewrite_numeric_to_qmark(operation) if params else operation # Reset previous-execute state. self._description = None @@ -107,15 +131,27 @@ class Cursor: self._row_iter = None self._statement_already_done = False - # Step 1: PREPARE — send SQL, receive column descriptors. - self._conn._send_pdu(self._build_prepare_pdu(operation)) + # Step 1: PREPARE — send SQL with numQmarks = len(params). + self._conn._send_pdu(self._build_prepare_pdu(sql, num_qmarks=len(params))) self._read_describe_response() - if self._columns: - # SELECT path: open a cursor and fetch all rows. + # Branch on the SQL keyword. We can't use ``self._columns`` / + # ``nfields`` here because a parameterized INSERT also returns + # a non-empty DESCRIBE (server describes the would-be inserted + # row's columns). The SQL-keyword heuristic is what JDBC effectively + # does too via its IfxStatement / IfxPreparedStatement subclassing. + first_word = sql.lstrip().split(None, 1)[0].upper() if sql.strip() else "" + is_select = first_word == "SELECT" + + if is_select: + if params: + raise NotSupportedError( + "parameterized SELECT not yet implemented (Phase 4.x)" + ) self._execute_select() + elif params: + self._execute_dml_with_params(params) else: - # DDL/DML path: just SQ_EXECUTE + SQ_RELEASE. self._execute_dml() if self._description is not None: @@ -137,6 +173,18 @@ class Cursor: self._conn._send_pdu(self._build_release_pdu()) self._drain_to_eot() + def _execute_dml_with_params(self, params: tuple) -> None: + """DML with bound parameters: SQ_BIND + SQ_EXECUTE → SQ_RELEASE. + + Per JDBC's sendExecute path for prepared statements (line 1108 + of IfxSqli): build a single PDU containing SQ_BIND with all + parameter values followed by SQ_EXECUTE. + """ + self._conn._send_pdu(self._build_bind_execute_pdu(params)) + self._drain_to_eot() + self._conn._send_pdu(self._build_release_pdu()) + self._drain_to_eot() + def _execute_dml(self) -> None: """Run the DDL/DML path: SQ_EXECUTE → SQ_RELEASE. @@ -214,15 +262,16 @@ class Cursor: # -- PDU builders ----------------------------------------------------- - def _build_prepare_pdu(self, sql: str) -> bytes: + def _build_prepare_pdu(self, sql: str, num_qmarks: int = 0) -> bytes: """SQ_PREPARE + SQ_NDESCRIBE + SQ_WANTDONE + SQ_EOT. Per IfxSqli.sendPrepare. SQL uses 4-byte length prefix on modern servers (isRemove64KLimitSupported), with even-byte alignment pad. + ``num_qmarks`` is the count of ``?`` placeholders in the SQL. """ writer, buf = make_pdu_writer() writer.write_short(MessageType.SQ_PREPARE) - writer.write_short(0) # numQmarks — Phase 4 uses real values + writer.write_short(num_qmarks) sql_bytes = sql.encode("iso-8859-1") writer.write_int(len(sql_bytes)) writer.write_bytes(sql_bytes) @@ -233,6 +282,37 @@ class Cursor: writer.write_short(MessageType.SQ_EOT) return buf.getvalue() + def _build_bind_execute_pdu(self, params: tuple) -> bytes: + """SQ_BIND with parameter values + SQ_EXECUTE + SQ_EOT. + + From the JDBC capture (msg[29] in 02-dml-cycle.socat.log): + [short SQ_ID=4][int 5=SQ_BIND][short numparams] + for each param: + [short type][short indicator][short prec] + writePadded(data) # data + 0-pad if odd-len + [short SQ_EXECUTE=7] + [short SQ_EOT] + """ + writer, buf = make_pdu_writer() + writer.write_short(MessageType.SQ_ID) + writer.write_int(MessageType.SQ_BIND) # action = 5 + writer.write_short(len(params)) + for value in params: + if value is None: + # NULL: type=0, indicator=-1, prec=0, no data + writer.write_short(0) + writer.write_short(-1) + writer.write_short(0) + else: + ifx_type, prec, raw = encode_param(value) + writer.write_short(ifx_type) + writer.write_short(0) # indicator = 0 (normal) + writer.write_short(prec) + writer.write_padded(raw) + writer.write_short(MessageType.SQ_EXECUTE) # 7 + writer.write_short(MessageType.SQ_EOT) + return buf.getvalue() + def _build_curname_nfetch_pdu(self, cursor_name: str) -> bytes: """SQ_ID(CURNAME) + SQ_ID(NFETCH 4096) chained. diff --git a/tests/test_dml.py b/tests/test_dml.py index e5bbc61..b7c734d 100644 --- a/tests/test_dml.py +++ b/tests/test_dml.py @@ -112,6 +112,8 @@ def test_commit_rollback_in_unlogged_db_raises(conn_params: ConnParams) -> None: PDU and parses the SQ_ERR response. To actually test transactions, point integration at a logged database (e.g. ``stores_demo``). """ - with _connect(conn_params) as conn: - with pytest.raises(informix_db.OperationalError, match="-255"): - conn.commit() + with ( + _connect(conn_params) as conn, + pytest.raises(informix_db.OperationalError, match="-255"), + ): + conn.commit() diff --git a/tests/test_params.py b/tests/test_params.py new file mode 100644 index 0000000..4e43144 --- /dev/null +++ b/tests/test_params.py @@ -0,0 +1,118 @@ +"""Phase 4 integration tests — parameter binding (SQ_BIND). + +Tests cover ``?`` and ``:N`` placeholder styles, the supported Python +type set (int, float, str, bool, None), and round-tripping through INSERT ++ SELECT to verify both encode AND decode paths. +""" + +from __future__ import annotations + +import pytest + +import informix_db +from tests.conftest import ConnParams + +pytestmark = pytest.mark.integration + + +def _connect(conn_params: ConnParams) -> informix_db.Connection: + return informix_db.connect( + host=conn_params.host, + port=conn_params.port, + user=conn_params.user, + password=conn_params.password, + database=conn_params.database, + server=conn_params.server, + connect_timeout=10.0, + read_timeout=10.0, + ) + + +def test_insert_with_qmark_params(conn_params: ConnParams) -> None: + """``?`` placeholder style.""" + with _connect(conn_params) as conn: + cur = conn.cursor() + cur.execute("CREATE TEMP TABLE t_p_a (id INTEGER, name VARCHAR(50))") + cur.execute("INSERT INTO t_p_a VALUES (?, ?)", (42, "hello")) + assert cur.rowcount == 1 + + cur.execute("SELECT id, name FROM t_p_a") + assert cur.fetchall() == [(42, "hello")] + + +def test_insert_with_numeric_params(conn_params: ConnParams) -> None: + """``:1`` placeholder style (paramstyle="numeric").""" + with _connect(conn_params) as conn: + cur = conn.cursor() + cur.execute("CREATE TEMP TABLE t_p_b (id INTEGER, name VARCHAR(50))") + cur.execute("INSERT INTO t_p_b VALUES (:1, :2)", (99, "world")) + assert cur.rowcount == 1 + + cur.execute("SELECT id, name FROM t_p_b") + assert cur.fetchall() == [(99, "world")] + + +def test_int_float_str_round_trip(conn_params: ConnParams) -> None: + """All three core types in one INSERT, verified via SELECT.""" + with _connect(conn_params) as conn: + cur = conn.cursor() + cur.execute("CREATE TEMP TABLE t_p_c (i INTEGER, f FLOAT, s VARCHAR(20))") + cur.execute("INSERT INTO t_p_c VALUES (?, ?, ?)", (123, 4.5, "alpha")) + cur.execute("INSERT INTO t_p_c VALUES (?, ?, ?)", (-7, -1.25, "beta")) + + cur.execute("SELECT i, f, s FROM t_p_c ORDER BY i") + rows = cur.fetchall() + assert rows == [(-7, -1.25, "beta"), (123, 4.5, "alpha")] + + +def test_update_with_params(conn_params: ConnParams) -> None: + """UPDATE with parameter values in both SET and WHERE clauses.""" + with _connect(conn_params) as conn: + cur = conn.cursor() + cur.execute("CREATE TEMP TABLE t_p_d (id INTEGER, name VARCHAR(50))") + cur.execute("INSERT INTO t_p_d VALUES (?, ?)", (1, "old")) + cur.execute("INSERT INTO t_p_d VALUES (?, ?)", (2, "old")) + + cur.execute("UPDATE t_p_d SET name = ? WHERE id = ?", ("new", 2)) + + cur.execute("SELECT id, name FROM t_p_d ORDER BY id") + assert cur.fetchall() == [(1, "old"), (2, "new")] + + +def test_delete_with_param(conn_params: ConnParams) -> None: + """DELETE with a parameter in the WHERE clause.""" + with _connect(conn_params) as conn: + cur = conn.cursor() + cur.execute("CREATE TEMP TABLE t_p_e (id INTEGER, name VARCHAR(50))") + for i in range(1, 6): + cur.execute("INSERT INTO t_p_e VALUES (?, ?)", (i, f"row{i}")) + + cur.execute("DELETE FROM t_p_e WHERE id = ?", (3,)) + cur.execute("SELECT id FROM t_p_e ORDER BY id") + assert cur.fetchall() == [(1,), (2,), (4,), (5,)] + + +def test_unsupported_param_type_raises(conn_params: ConnParams) -> None: + """Phase 4 supports int/float/str/bool/None; other types raise.""" + with _connect(conn_params) as conn: + cur = conn.cursor() + cur.execute("CREATE TEMP TABLE t_p_f (id INTEGER)") + with pytest.raises(NotImplementedError, match="bytes"): + cur.execute("INSERT INTO t_p_f VALUES (?)", (b"raw bytes",)) + + +def test_parameterized_select_not_yet_supported(conn_params: ConnParams) -> None: + """Parameterized SELECT lands in Phase 4.x — currently raises.""" + with _connect(conn_params) as conn: + cur = conn.cursor() + with pytest.raises(informix_db.NotSupportedError, match=r"Phase 4\.x"): + cur.execute("SELECT 1 FROM systables WHERE tabid = ?", (1,)) + + +def test_dict_params_unsupported(conn_params: ConnParams) -> None: + """Named parameters aren't supported — paramstyle is ``numeric``.""" + with _connect(conn_params) as conn: + cur = conn.cursor() + cur.execute("CREATE TEMP TABLE t_p_g (id INTEGER)") + with pytest.raises(informix_db.NotSupportedError, match="positional"): + cur.execute("INSERT INTO t_p_g VALUES (:id)", {"id": 1}) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index abe2deb..4362002 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -113,8 +113,11 @@ def test_cursor_returns_cursor_object(conn_params: ConnParams) -> None: assert cur.closed is True -def test_cursor_with_parameters_raises(conn_params: ConnParams) -> None: - """Parameter binding lands in Phase 4; passing parameters must raise NotSupportedError.""" +def test_cursor_with_parameters_for_dml_works(conn_params: ConnParams) -> None: + """Phase 4: parameter binding works for DML. + + Parameterized SELECT lands in Phase 4.x — see ``tests/test_params.py``. + """ with informix_db.connect( host=conn_params.host, port=conn_params.port, @@ -125,6 +128,8 @@ def test_cursor_with_parameters_raises(conn_params: ConnParams) -> None: connect_timeout=10.0, ) as conn: cur = conn.cursor() - with pytest.raises(informix_db.NotSupportedError, match="Phase 4"): - cur.execute("SELECT ?", (1,)) - cur.close() + cur.execute("CREATE TEMP TABLE t_smoke_p (id INTEGER)") + cur.execute("INSERT INTO t_smoke_p VALUES (?)", (42,)) + assert cur.rowcount == 1 + cur.execute("SELECT id FROM t_smoke_p") + assert cur.fetchall() == [(42,)]