From 34ad04a87282d7faca56b960007167936c6eaeb7 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Mon, 4 May 2026 07:55:13 -0600 Subject: [PATCH] =?UTF-8?q?Phase=202.x:=20VARCHAR=20row=20decoding=20works?= =?UTF-8?q?=20=E2=80=94=20three=20byte-level=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three findings, each caught by a different debugging technique, documented in DECISION_LOG.md: 1. CURNAME+NFETCH PDU: trailing reserved field is SHORT not INT. Caught by byte-diffing our 44-byte PDU against JDBC's 42-byte reference under socat. The server tolerated the longer version for INT-only SELECTs (silently consuming extra zeros) but rejected it for VARCHAR queries. Lesson: server tolerance varies by query type — always match JDBC byte-for-byte. 2. SQ_TUPLE payload pads to even byte alignment. An 11-byte "syscolumns" VARCHAR payload had a trailing 0x00 between it and the next SQ_TUPLE tag. JDBC's IfxRowColumn.readTuple consumes this pad silently; we weren't, so any odd-length variable-width row desynced the parser. 3. VARCHAR/NCHAR/NVCHAR in tuple data use a SINGLE-byte length prefix (max 255 chars — IDS VARCHAR's hard limit). NOT a 2-byte short as I'd initially assumed. CHAR is fixed-width per encoded_length. LVARCHAR uses a 4-byte int prefix for >255 byte values. Module changes: src/informix_db/_resultset.py — _LENGTH_PREFIXED_SHORT_TYPES set, branched VARCHAR/NCHAR/NVCHAR (1-byte prefix) vs CHAR (fixed) vs LVARCHAR (4-byte prefix); even-byte alignment pad consumed after each SQ_TUPLE payload. src/informix_db/cursors.py — CURNAME+NFETCH and standalone NFETCH PDUs now write_short(0) for the reserved trailing field. Tests: 40 unit + 18 integration (3 new VARCHAR tests) = 58 total, all green, ruff clean. New tests cover: - VARCHAR single-column SELECT - Odd-length VARCHAR row (regression for the pad-byte bug) - Mixed INT + VARCHAR + FLOAT three-column SELECT Sample output: SELECT FIRST 5 tabname FROM systables → ('systables',), ('syscolumns',), ('sysindices',), ('systabauth',), ('syscolauth',) SELECT FIRST 3 tabname, tabid, nrows → ('systables', 1, 276.0), ... VARCHAR was the last known gap from the Phase 2 commit. Phase 2 now reads INT, BIGINT, REAL, FLOAT, CHAR, VARCHAR end-to-end. Phase 6+ types (DATETIME, INTERVAL, DECIMAL, BLOBs) remain. --- docs/CAPTURES/13-py-varchar.socat.log | 36 ++++++++++ docs/CAPTURES/14-py-varchar-fail.socat.log | 36 ++++++++++ docs/CAPTURES/15-py-varchar-fixed.socat.log | 36 ++++++++++ docs/DECISION_LOG.md | 16 +++++ src/informix_db/_resultset.py | 75 +++++++++++++++++++-- src/informix_db/cursors.py | 8 ++- tests/test_select.py | 47 ++++++++++++- 7 files changed, 245 insertions(+), 9 deletions(-) create mode 100644 docs/CAPTURES/13-py-varchar.socat.log create mode 100644 docs/CAPTURES/14-py-varchar-fail.socat.log create mode 100644 docs/CAPTURES/15-py-varchar-fixed.socat.log diff --git a/docs/CAPTURES/13-py-varchar.socat.log b/docs/CAPTURES/13-py-varchar.socat.log new file mode 100644 index 0000000..b6aa621 --- /dev/null +++ b/docs/CAPTURES/13-py-varchar.socat.log @@ -0,0 +1,36 @@ +2026/05/04 07:40:07 socat[340723] N listening on AF=2 0.0.0.0:9090 +2026/05/04 07:40:07 socat[340723] N accepting connection from AF=2 127.0.0.1:46824 on AF=2 127.0.0.1:9090 +2026/05/04 07:40:07 socat[340723] N opening connection to 127.0.0.1:9088 +2026/05/04 07:40:07 socat[340723] N opening connection to AF=2 127.0.0.1:9088 +2026/05/04 07:40:07 socat[340723] N successfully connected from local address AF=2 127.0.0.1:49596 +2026/05/04 07:40:07 socat[340723] N successfully connected to 127.0.0.1:9088 +2026/05/04 07:40:07 socat[340723] N starting data transfer loop with FDs [6,6] and [5,5] +> 2026/05/04 07:40:07.516183 length=384 from=0 to=383 + 01 80 01 3c 00 00 00 64 00 65 00 00 00 3d 00 06 49 45 45 45 4d 00 00 6c 73 71 6c 65 78 65 63 00 00 00 00 00 00 06 39 2e 32 38 30 00 00 0c 52 44 53 23 52 30 30 30 30 30 30 00 00 05 73 71 6c 69 00 00 00 01 3c 00 00 00 00 00 00 00 00 00 01 00 09 69 6e 66 6f 72 6d 69 78 00 00 07 69 6e 34 6d 69 78 00 6f 6c 00 00 00 00 00 00 00 00 00 3d 74 6c 69 74 63 70 00 00 00 00 00 01 00 68 00 0b 00 00 00 03 00 09 69 6e 66 6f 72 6d 69 78 00 00 00 00 00 00 00 00 00 00 00 00 6a 00 06 00 07 44 42 50 41 54 48 00 00 02 2e 00 00 0e 43 4c 49 45 4e 54 5f 4c 4f 43 41 4c 45 00 00 0d 65 6e 5f 55 53 2e 38 38 35 39 2d 31 00 00 11 43 4c 4e 54 5f 50 41 4d 5f 43 41 50 41 42 4c 45 00 00 02 31 00 00 07 44 42 44 41 54 45 00 00 06 59 34 4d 44 2d 00 00 0c 49 46 58 5f 55 50 44 44 45 53 43 00 00 02 31 00 00 09 4e 4f 44 45 46 44 41 43 00 00 03 6e 6f 00 00 6b 00 00 00 00 00 05 33 01 00 00 00 00 00 0b 72 70 6d 2d 62 75 6c 6c 65 74 00 00 00 00 29 2f 68 6f 6d 65 2f 72 70 6d 2f 63 6c 61 75 64 65 2f 69 6e 66 6f 72 6d 69 78 2f 70 79 74 68 6f 6e 2d 6c 69 62 72 61 72 79 00 00 74 00 20 00 00 00 00 00 00 00 00 00 16 69 6e 66 6f 72 6d 69 78 2d 64 62 40 70 69 64 33 34 30 37 33 37 00 00 7f +< 2026/05/04 07:40:07.528122 length=276 from=0 to=275 + 01 14 02 3c 10 00 00 64 00 65 00 00 00 3d 00 06 49 45 45 45 49 00 00 6c 73 72 76 69 6e 66 78 00 00 00 00 00 00 2f 49 42 4d 20 49 6e 66 6f 72 6d 69 78 20 44 79 6e 61 6d 69 63 20 53 65 72 76 65 72 20 56 65 72 73 69 6f 6e 20 31 35 2e 30 2e 31 2e 30 2e 33 00 00 07 73 65 72 69 61 6c 00 00 09 69 6e 66 6f 72 6d 69 78 00 00 00 01 3c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 6f 6e 00 00 00 00 00 00 00 00 00 3d 73 6f 63 74 63 70 00 00 00 00 00 00 00 66 00 00 00 00 00 00 00 00 00 00 00 14 00 00 00 6b 00 00 00 00 00 00 03 1a 00 00 00 00 00 0d 32 33 32 37 63 34 33 35 34 65 61 38 00 00 00 00 0f 2f 68 6f 6d 65 2f 69 6e 66 6f 72 6d 69 78 00 00 6e 00 04 00 00 00 00 00 74 00 33 00 00 00 c8 00 00 00 c8 00 29 2f 6f 70 74 2f 69 62 6d 2f 69 6e 66 6f 72 6d 69 78 2f 76 31 35 2e 30 2e 31 2e 30 2e 33 2f 62 69 6e 2f 6f 6e 69 6e 69 74 00 00 7f +> 2026/05/04 07:40:07.528386 length=14 from=384 to=397 + 00 7e 00 08 ff fc 7f fc 3c 8c aa 97 00 0c +< 2026/05/04 07:40:07.528522 length=16 from=276 to=291 + 00 7e 00 09 bd be 9f fe 7f b7 ff ef ff 00 00 0c +> 2026/05/04 07:40:07.528584 length=48 from=398 to=445 + 00 51 00 06 00 26 00 0c 00 04 00 06 44 42 54 45 4d 50 00 04 2f 74 6d 70 00 0b 53 55 42 51 43 41 43 48 45 53 5a 00 00 02 31 30 00 00 00 00 00 0c +< 2026/05/04 07:40:07.528662 length=2 from=292 to=293 + 00 0c +> 2026/05/04 07:40:07.528681 length=18 from=446 to=463 + 00 24 00 09 73 79 73 6d 61 73 74 65 72 00 00 00 00 0c +< 2026/05/04 07:40:07.528847 length=28 from=294 to=321 + 00 0f 00 15 00 00 00 00 00 00 00 00 00 00 00 00 00 37 00 00 00 01 00 00 00 01 00 0c +> 2026/05/04 07:40:07.528893 length=66 from=464 to=529 + 00 02 00 00 00 00 00 34 53 45 4c 45 43 54 20 46 49 52 53 54 20 32 20 74 61 62 6e 61 6d 65 20 46 52 4f 4d 20 73 79 73 74 61 62 6c 65 73 20 4f 52 44 45 52 20 42 59 20 74 61 62 69 64 00 16 00 31 00 0c +< 2026/05/04 07:40:07.529094 length=84 from=322 to=405 + 00 08 00 02 00 00 00 00 00 00 00 81 00 01 00 00 00 08 00 00 00 00 00 00 00 00 00 0d 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 74 61 62 6e 61 6d 65 00 00 0f 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 37 00 00 01 14 00 00 00 25 00 0c +> 2026/05/04 07:40:07.529204 length=44 from=530 to=573 + 00 04 00 00 00 03 00 12 5f 69 66 78 63 30 30 30 30 30 30 30 30 30 30 30 30 31 00 06 00 04 00 00 00 09 00 00 10 00 00 00 00 00 00 0c +< 2026/05/04 07:40:07.529311 length=66 from=406 to=471 + 00 0e 00 00 00 00 00 0a 09 73 79 73 74 61 62 6c 65 73 00 0e 00 00 00 00 00 0b 0a 73 79 73 63 6f 6c 75 6d 6e 73 00 00 0f 00 10 00 00 00 02 00 00 03 02 00 00 00 00 00 37 00 00 01 14 00 00 00 25 00 0c +> 2026/05/04 07:40:07.529416 length=2 from=574 to=575 + 00 38 +2026/05/04 07:40:07 socat[340723] N socket 1 (fd 6) is at EOF +2026/05/04 07:40:07 socat[340723] N socket 2 (fd 5) is at EOF +2026/05/04 07:40:07 socat[340723] N exiting with status 0 diff --git a/docs/CAPTURES/14-py-varchar-fail.socat.log b/docs/CAPTURES/14-py-varchar-fail.socat.log new file mode 100644 index 0000000..b6ad334 --- /dev/null +++ b/docs/CAPTURES/14-py-varchar-fail.socat.log @@ -0,0 +1,36 @@ +2026/05/04 07:52:19 socat[368990] N listening on AF=2 0.0.0.0:9090 +2026/05/04 07:52:20 socat[368990] N accepting connection from AF=2 127.0.0.1:54142 on AF=2 127.0.0.1:9090 +2026/05/04 07:52:20 socat[368990] N opening connection to 127.0.0.1:9088 +2026/05/04 07:52:20 socat[368990] N opening connection to AF=2 127.0.0.1:9088 +2026/05/04 07:52:20 socat[368990] N successfully connected from local address AF=2 127.0.0.1:53650 +2026/05/04 07:52:20 socat[368990] N successfully connected to 127.0.0.1:9088 +2026/05/04 07:52:20 socat[368990] N starting data transfer loop with FDs [6,6] and [5,5] +> 2026/05/04 07:52:20.401381 length=384 from=0 to=383 + 01 80 01 3c 00 00 00 64 00 65 00 00 00 3d 00 06 49 45 45 45 4d 00 00 6c 73 71 6c 65 78 65 63 00 00 00 00 00 00 06 39 2e 32 38 30 00 00 0c 52 44 53 23 52 30 30 30 30 30 30 00 00 05 73 71 6c 69 00 00 00 01 3c 00 00 00 00 00 00 00 00 00 01 00 09 69 6e 66 6f 72 6d 69 78 00 00 07 69 6e 34 6d 69 78 00 6f 6c 00 00 00 00 00 00 00 00 00 3d 74 6c 69 74 63 70 00 00 00 00 00 01 00 68 00 0b 00 00 00 03 00 09 69 6e 66 6f 72 6d 69 78 00 00 00 00 00 00 00 00 00 00 00 00 6a 00 06 00 07 44 42 50 41 54 48 00 00 02 2e 00 00 0e 43 4c 49 45 4e 54 5f 4c 4f 43 41 4c 45 00 00 0d 65 6e 5f 55 53 2e 38 38 35 39 2d 31 00 00 11 43 4c 4e 54 5f 50 41 4d 5f 43 41 50 41 42 4c 45 00 00 02 31 00 00 07 44 42 44 41 54 45 00 00 06 59 34 4d 44 2d 00 00 0c 49 46 58 5f 55 50 44 44 45 53 43 00 00 02 31 00 00 09 4e 4f 44 45 46 44 41 43 00 00 03 6e 6f 00 00 6b 00 00 00 00 00 05 a1 6a 00 00 00 00 00 0b 72 70 6d 2d 62 75 6c 6c 65 74 00 00 00 00 29 2f 68 6f 6d 65 2f 72 70 6d 2f 63 6c 61 75 64 65 2f 69 6e 66 6f 72 6d 69 78 2f 70 79 74 68 6f 6e 2d 6c 69 62 72 61 72 79 00 00 74 00 20 00 00 00 00 00 00 00 00 00 16 69 6e 66 6f 72 6d 69 78 2d 64 62 40 70 69 64 33 36 39 30 30 32 00 00 7f +< 2026/05/04 07:52:20.413315 length=276 from=0 to=275 + 01 14 02 3c 10 00 00 64 00 65 00 00 00 3d 00 06 49 45 45 45 49 00 00 6c 73 72 76 69 6e 66 78 00 00 00 00 00 00 2f 49 42 4d 20 49 6e 66 6f 72 6d 69 78 20 44 79 6e 61 6d 69 63 20 53 65 72 76 65 72 20 56 65 72 73 69 6f 6e 20 31 35 2e 30 2e 31 2e 30 2e 33 00 00 07 73 65 72 69 61 6c 00 00 09 69 6e 66 6f 72 6d 69 78 00 00 00 01 3c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 6f 6e 00 00 00 00 00 00 00 00 00 3d 73 6f 63 74 63 70 00 00 00 00 00 00 00 66 00 00 00 00 00 00 00 00 00 00 00 14 00 00 00 6b 00 00 00 00 00 00 03 1a 00 00 00 00 00 0d 32 33 32 37 63 34 33 35 34 65 61 38 00 00 00 00 0f 2f 68 6f 6d 65 2f 69 6e 66 6f 72 6d 69 78 00 00 6e 00 04 00 00 00 00 00 74 00 33 00 00 00 c8 00 00 00 c8 00 29 2f 6f 70 74 2f 69 62 6d 2f 69 6e 66 6f 72 6d 69 78 2f 76 31 35 2e 30 2e 31 2e 30 2e 33 2f 62 69 6e 2f 6f 6e 69 6e 69 74 00 00 7f +> 2026/05/04 07:52:20.413609 length=14 from=384 to=397 + 00 7e 00 08 ff fc 7f fc 3c 8c aa 97 00 0c +< 2026/05/04 07:52:20.413685 length=16 from=276 to=291 + 00 7e 00 09 bd be 9f fe 7f b7 ff ef ff 00 00 0c +> 2026/05/04 07:52:20.413733 length=48 from=398 to=445 + 00 51 00 06 00 26 00 0c 00 04 00 06 44 42 54 45 4d 50 00 04 2f 74 6d 70 00 0b 53 55 42 51 43 41 43 48 45 53 5a 00 00 02 31 30 00 00 00 00 00 0c +< 2026/05/04 07:52:20.413829 length=2 from=292 to=293 + 00 0c +> 2026/05/04 07:52:20.413857 length=18 from=446 to=463 + 00 24 00 09 73 79 73 6d 61 73 74 65 72 00 00 00 00 0c +< 2026/05/04 07:52:20.414134 length=28 from=294 to=321 + 00 0f 00 15 00 00 00 00 00 00 00 00 00 00 00 00 00 37 00 00 00 01 00 00 00 01 00 0c +> 2026/05/04 07:52:20.414192 length=66 from=464 to=529 + 00 02 00 00 00 00 00 34 53 45 4c 45 43 54 20 46 49 52 53 54 20 32 20 74 61 62 6e 61 6d 65 20 46 52 4f 4d 20 73 79 73 74 61 62 6c 65 73 20 4f 52 44 45 52 20 42 59 20 74 61 62 69 64 00 16 00 31 00 0c +< 2026/05/04 07:52:20.414481 length=84 from=322 to=405 + 00 08 00 02 00 00 00 00 00 00 00 81 00 01 00 00 00 08 00 00 00 00 00 00 00 00 00 0d 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 74 61 62 6e 61 6d 65 00 00 0f 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 37 00 00 01 14 00 00 00 25 00 0c +> 2026/05/04 07:52:20.414674 length=44 from=530 to=573 + 00 04 00 00 00 03 00 12 5f 69 66 78 63 30 30 30 30 30 30 30 30 30 30 30 30 31 00 06 00 04 00 00 00 09 00 00 10 00 00 00 00 00 00 0c +< 2026/05/04 07:52:20.414850 length=66 from=406 to=471 + 00 0e 00 00 00 00 00 0a 09 73 79 73 74 61 62 6c 65 73 00 0e 00 00 00 00 00 0b 0a 73 79 73 63 6f 6c 75 6d 6e 73 00 00 0f 00 10 00 00 00 02 00 00 03 02 00 00 00 00 00 37 00 00 01 14 00 00 00 25 00 0c +> 2026/05/04 07:52:20.415061 length=2 from=574 to=575 + 00 38 +2026/05/04 07:52:20 socat[368990] N socket 1 (fd 6) is at EOF +2026/05/04 07:52:20 socat[368990] N socket 2 (fd 5) is at EOF +2026/05/04 07:52:20 socat[368990] N exiting with status 0 diff --git a/docs/CAPTURES/15-py-varchar-fixed.socat.log b/docs/CAPTURES/15-py-varchar-fixed.socat.log new file mode 100644 index 0000000..400c54d --- /dev/null +++ b/docs/CAPTURES/15-py-varchar-fixed.socat.log @@ -0,0 +1,36 @@ +2026/05/04 07:53:11 socat[370904] N listening on AF=2 0.0.0.0:9090 +2026/05/04 07:53:11 socat[370904] N accepting connection from AF=2 127.0.0.1:39366 on AF=2 127.0.0.1:9090 +2026/05/04 07:53:11 socat[370904] N opening connection to 127.0.0.1:9088 +2026/05/04 07:53:11 socat[370904] N opening connection to AF=2 127.0.0.1:9088 +2026/05/04 07:53:11 socat[370904] N successfully connected from local address AF=2 127.0.0.1:34888 +2026/05/04 07:53:11 socat[370904] N successfully connected to 127.0.0.1:9088 +2026/05/04 07:53:11 socat[370904] N starting data transfer loop with FDs [6,6] and [5,5] +> 2026/05/04 07:53:11.713724 length=384 from=0 to=383 + 01 80 01 3c 00 00 00 64 00 65 00 00 00 3d 00 06 49 45 45 45 4d 00 00 6c 73 71 6c 65 78 65 63 00 00 00 00 00 00 06 39 2e 32 38 30 00 00 0c 52 44 53 23 52 30 30 30 30 30 30 00 00 05 73 71 6c 69 00 00 00 01 3c 00 00 00 00 00 00 00 00 00 01 00 09 69 6e 66 6f 72 6d 69 78 00 00 07 69 6e 34 6d 69 78 00 6f 6c 00 00 00 00 00 00 00 00 00 3d 74 6c 69 74 63 70 00 00 00 00 00 01 00 68 00 0b 00 00 00 03 00 09 69 6e 66 6f 72 6d 69 78 00 00 00 00 00 00 00 00 00 00 00 00 6a 00 06 00 07 44 42 50 41 54 48 00 00 02 2e 00 00 0e 43 4c 49 45 4e 54 5f 4c 4f 43 41 4c 45 00 00 0d 65 6e 5f 55 53 2e 38 38 35 39 2d 31 00 00 11 43 4c 4e 54 5f 50 41 4d 5f 43 41 50 41 42 4c 45 00 00 02 31 00 00 07 44 42 44 41 54 45 00 00 06 59 34 4d 44 2d 00 00 0c 49 46 58 5f 55 50 44 44 45 53 43 00 00 02 31 00 00 09 4e 4f 44 45 46 44 41 43 00 00 03 6e 6f 00 00 6b 00 00 00 00 00 05 a9 22 00 00 00 00 00 0b 72 70 6d 2d 62 75 6c 6c 65 74 00 00 00 00 29 2f 68 6f 6d 65 2f 72 70 6d 2f 63 6c 61 75 64 65 2f 69 6e 66 6f 72 6d 69 78 2f 70 79 74 68 6f 6e 2d 6c 69 62 72 61 72 79 00 00 74 00 20 00 00 00 00 00 00 00 00 00 16 69 6e 66 6f 72 6d 69 78 2d 64 62 40 70 69 64 33 37 30 39 37 38 00 00 7f +< 2026/05/04 07:53:11.725381 length=276 from=0 to=275 + 01 14 02 3c 10 00 00 64 00 65 00 00 00 3d 00 06 49 45 45 45 49 00 00 6c 73 72 76 69 6e 66 78 00 00 00 00 00 00 2f 49 42 4d 20 49 6e 66 6f 72 6d 69 78 20 44 79 6e 61 6d 69 63 20 53 65 72 76 65 72 20 56 65 72 73 69 6f 6e 20 31 35 2e 30 2e 31 2e 30 2e 33 00 00 07 73 65 72 69 61 6c 00 00 09 69 6e 66 6f 72 6d 69 78 00 00 00 01 3c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 6f 6e 00 00 00 00 00 00 00 00 00 3d 73 6f 63 74 63 70 00 00 00 00 00 00 00 66 00 00 00 00 00 00 00 00 00 00 00 14 00 00 00 6b 00 00 00 00 00 00 03 1a 00 00 00 00 00 0d 32 33 32 37 63 34 33 35 34 65 61 38 00 00 00 00 0f 2f 68 6f 6d 65 2f 69 6e 66 6f 72 6d 69 78 00 00 6e 00 04 00 00 00 00 00 74 00 33 00 00 00 c8 00 00 00 c8 00 29 2f 6f 70 74 2f 69 62 6d 2f 69 6e 66 6f 72 6d 69 78 2f 76 31 35 2e 30 2e 31 2e 30 2e 33 2f 62 69 6e 2f 6f 6e 69 6e 69 74 00 00 7f +> 2026/05/04 07:53:11.725655 length=14 from=384 to=397 + 00 7e 00 08 ff fc 7f fc 3c 8c aa 97 00 0c +< 2026/05/04 07:53:11.725715 length=16 from=276 to=291 + 00 7e 00 09 bd be 9f fe 7f b7 ff ef ff 00 00 0c +> 2026/05/04 07:53:11.725742 length=48 from=398 to=445 + 00 51 00 06 00 26 00 0c 00 04 00 06 44 42 54 45 4d 50 00 04 2f 74 6d 70 00 0b 53 55 42 51 43 41 43 48 45 53 5a 00 00 02 31 30 00 00 00 00 00 0c +< 2026/05/04 07:53:11.725816 length=2 from=292 to=293 + 00 0c +> 2026/05/04 07:53:11.725832 length=18 from=446 to=463 + 00 24 00 09 73 79 73 6d 61 73 74 65 72 00 00 00 00 0c +< 2026/05/04 07:53:11.726016 length=28 from=294 to=321 + 00 0f 00 15 00 00 00 00 00 00 00 00 00 00 00 00 00 37 00 00 00 01 00 00 00 01 00 0c +> 2026/05/04 07:53:11.726083 length=66 from=464 to=529 + 00 02 00 00 00 00 00 34 53 45 4c 45 43 54 20 46 49 52 53 54 20 32 20 74 61 62 6e 61 6d 65 20 46 52 4f 4d 20 73 79 73 74 61 62 6c 65 73 20 4f 52 44 45 52 20 42 59 20 74 61 62 69 64 00 16 00 31 00 0c +< 2026/05/04 07:53:11.726262 length=84 from=322 to=405 + 00 08 00 02 00 00 00 00 00 00 00 81 00 01 00 00 00 08 00 00 00 00 00 00 00 00 00 0d 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 74 61 62 6e 61 6d 65 00 00 0f 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 37 00 00 01 14 00 00 00 25 00 0c +> 2026/05/04 07:53:11.726360 length=42 from=530 to=571 + 00 04 00 00 00 03 00 12 5f 69 66 78 63 30 30 30 30 30 30 30 30 30 30 30 30 31 00 06 00 04 00 00 00 09 00 00 10 00 00 00 00 0c +< 2026/05/04 07:53:11.726453 length=66 from=406 to=471 + 00 0e 00 00 00 00 00 0a 09 73 79 73 74 61 62 6c 65 73 00 0e 00 00 00 00 00 0b 0a 73 79 73 63 6f 6c 75 6d 6e 73 00 00 0f 00 10 00 00 00 02 00 00 03 02 00 00 00 00 00 37 00 00 01 14 00 00 00 25 00 0c +> 2026/05/04 07:53:11.726530 length=2 from=572 to=573 + 00 38 +2026/05/04 07:53:11 socat[370904] N socket 1 (fd 6) is at EOF +2026/05/04 07:53:11 socat[370904] N socket 2 (fd 5) is at EOF +2026/05/04 07:53:11 socat[370904] N exiting with status 0 diff --git a/docs/DECISION_LOG.md b/docs/DECISION_LOG.md index 76c012b..b7fa69d 100644 --- a/docs/DECISION_LOG.md +++ b/docs/DECISION_LOG.md @@ -168,6 +168,22 @@ DATETIME / INTERVAL / DECIMAL / NUMERIC / MONEY remain in Phase 6+ — their enc --- +## 2026-05-04 — VARCHAR row decoding: three byte-level discoveries + +**Status**: active +**Decision**: ``parse_tuple_payload`` now handles VARCHAR/NCHAR/NVCHAR with a single-byte length prefix; SQ_TUPLE payloads are padded to even byte alignment; the trailing reserved field in CURNAME+NFETCH is a SHORT not an INT. +**Why this is three findings**: each one was caught by a different debugging technique: + +1. **CURNAME+NFETCH PDU off by 2 bytes**: my reserved trailing field was `write_int(0)` (4 bytes); JDBC's reference is `write_short(0)` (2 bytes). Caught by capturing both PDUs under socat and byte-diffing — our 44-byte vs JDBC's 42-byte. The server happened to accept the longer version for INT-only SELECTs (silently treating the extra zeros as padding) but rejected it for VARCHAR queries. Lesson: **server tolerance varies by query type — always match JDBC byte-for-byte**. + +2. **SQ_TUPLE payload pads to even alignment**: when `size` is odd, an extra 0x00 byte follows the payload before the next tag. Found in `docs/CAPTURES/15-py-varchar-fixed.socat.log` — an 11-byte "syscolumns" VARCHAR payload had a trailing `0x00` that JDBC's `IfxRowColumn.readTuple` consumes silently. We weren't doing this, so the parser desynced for any odd-length variable-width row. **Even-byte alignment is a wire-protocol-wide invariant — every variable-length payload pads.** + +3. **VARCHAR in tuple uses 1-byte length prefix, NOT 2**: per the on-wire encoding (verified empirically in capture 15), VARCHAR values in row data are `[byte length][bytes]` — single-byte prefix, max 255 chars. NCHAR and NVCHAR follow the same pattern. (CHAR is fixed-width per encoded_length, no length prefix at all.) LVARCHAR uses a 4-byte int prefix for values >255 bytes. + +**How to apply**: when adding new variable-width type decoders, capture a tuple under socat first to see the exact framing — don't infer from the column descriptor's `encoded_length`, which is the MAX storage, not the wire format. The wire format may differ by orders of magnitude (1-byte prefix vs encoded_length=128 for VARCHAR). + +--- + ## (template — copy below this line for new entries) ``` diff --git a/src/informix_db/_resultset.py b/src/informix_db/_resultset.py index c813cd1..210bc38 100644 --- a/src/informix_db/_resultset.py +++ b/src/informix_db/_resultset.py @@ -160,6 +160,20 @@ def parse_describe(reader: IfxStreamReader) -> tuple[list[ColumnInfo], dict]: return columns, metadata +# IDS type codes that are length-prefixed in the tuple payload. +# Per ``IfxSqli`` row-data extraction (see receiveFastPath case 13/15/16): +# CHAR, VARCHAR, NCHAR, NVCHAR all use ``[short length][bytes][pad if odd]`` +# inside the tuple blob. LVARCHAR uses a 4-byte length prefix instead. +from ._types import IfxType # noqa: E402 + +_LENGTH_PREFIXED_SHORT_TYPES = frozenset({ + int(IfxType.CHAR), + int(IfxType.VARCHAR), + int(IfxType.NCHAR), + int(IfxType.NVCHAR), +}) + + def parse_tuple_payload( reader: IfxStreamReader, columns: list[ColumnInfo], @@ -168,26 +182,77 @@ def parse_tuple_payload( Per ``IfxSqli.receiveTuple``: ``[short warn][int size][bytes payload]`` + + The payload contains column values back-to-back. For each column, the + on-wire encoding depends on the type: + + * Fixed-width types (INT, FLOAT, DATE, BIGINT, etc.): exact byte count + from ``FIXED_WIDTHS``. + * Length-prefixed strings (CHAR, VARCHAR, NCHAR, NVCHAR): ``[short len] + [bytes][pad if odd]``. + * LVARCHAR: 4-byte length prefix instead of 2. + * Other variable-width types (DECIMAL, DATETIME, INTERVAL, BLOBs): + Phase 6+ — currently surfaces raw bytes from ``encoded_length``. """ reader.read_short() # warn (Phase 5 surfaces) size = reader.read_int() payload = reader.read_exact(size) + # SQ_TUPLE payload is padded to even-byte alignment on the wire. + # Discovered empirically: a 11-byte "syscolumns" VARCHAR payload had + # a trailing 0x00 between it and the next SQ_TUPLE tag. Consuming + # this pad keeps the next read aligned. + # (See docs/CAPTURES/15-py-varchar-fixed.socat.log analysis.) + if size & 1: + reader.read_exact(1) values: list[object] = [] offset = 0 for col in columns: base = base_type(col.type_code) + + if base in _LENGTH_PREFIXED_SHORT_TYPES: + # In tuple data, VARCHAR/NCHAR/NVCHAR use a SINGLE-BYTE + # length prefix (max 255 — IDS VARCHAR's hard limit), not + # a short. Empirically verified against the SQ_TUPLE bytes + # for ``SELECT tabname FROM systables`` in + # docs/CAPTURES/13-py-varchar.socat.log: + # payload = 09 73 79 73 74 61 62 6c 65 73 + # = [byte 9]["systables"] + # CHAR is fixed-width per encoded_length — handled below. + if base == int(IfxType.CHAR): + # CHAR(N) is fixed-width; uses encoded_length straight + width = col.encoded_length + raw = payload[offset:offset + width] + offset += width + else: + length = payload[offset] + offset += 1 + raw = payload[offset:offset + length] + offset += length + values.append(decode(col.type_code, raw)) + continue + + if base == int(IfxType.LVARCHAR): + # [int length][bytes][pad if odd] + length = int.from_bytes(payload[offset:offset + 4], "big", signed=True) + offset += 4 + raw = payload[offset:offset + length] + offset += length + if length & 1: + offset += 1 + values.append(decode(col.type_code, raw)) + continue + + # Fixed-width types width = FIXED_WIDTHS.get(base) if width is None: - # Variable-width: use encoded_length from the descriptor for now. - # Phase 2.x adds per-type variable-width parsing (e.g. CHAR uses - # encoded_length, VARCHAR has a length prefix in the payload). + # Phase 6+ types (DECIMAL, DATETIME, INTERVAL, BLOBs) — fall back + # to encoded_length and surface raw bytes. width = col.encoded_length - raw = payload[offset : offset + width] + raw = payload[offset:offset + width] offset += width try: values.append(decode(col.type_code, raw)) except NotImplementedError: - # Best-effort: surface the raw bytes for unsupported types values.append(raw) return tuple(values) diff --git a/src/informix_db/cursors.py b/src/informix_db/cursors.py index 3dbfc93..a445e43 100644 --- a/src/informix_db/cursors.py +++ b/src/informix_db/cursors.py @@ -224,11 +224,13 @@ class Cursor: writer.write_byte(0) writer.write_short(6) # cursor-type flag from JDBC - # NFETCH + # NFETCH (note: trailing field is a SHORT, not an int — + # caught by byte-diff against JDBC's 42-byte reference PDU, + # see docs/CAPTURES/14-py-varchar-fail.socat.log analysis) writer.write_short(MessageType.SQ_ID) writer.write_int(MessageType.SQ_NFETCH) # action = 9 writer.write_int(4096) # max bytes per fetch - writer.write_int(0) # reserved + writer.write_short(0) # reserved short (NOT int) writer.write_short(MessageType.SQ_EOT) return buf.getvalue() @@ -239,7 +241,7 @@ class Cursor: writer.write_short(MessageType.SQ_ID) writer.write_int(MessageType.SQ_NFETCH) writer.write_int(4096) - writer.write_int(0) + writer.write_short(0) # reserved short (matches JDBC, not int) writer.write_short(MessageType.SQ_EOT) return buf.getvalue() diff --git a/tests/test_select.py b/tests/test_select.py index c068ced..88bbe22 100644 --- a/tests/test_select.py +++ b/tests/test_select.py @@ -46,7 +46,7 @@ def test_select_1_description_shape(conn_params: ConnParams) -> None: col = cur.description[0] assert len(col) == 7 # (name, type_code, display_size, internal_size, precision, scale, null_ok) - name, type_code, display_size, internal_size, precision, scale, null_ok = col + name, type_code, display_size, internal_size, _precision, _scale, _null_ok = col assert name == "(constant)" assert type_code == 2 # IfxType.INT assert display_size == internal_size == 4 @@ -108,6 +108,51 @@ def test_two_executes_on_same_cursor(conn_params: ConnParams) -> None: assert cur.fetchone() == (2,) +def test_varchar_single_column(conn_params: ConnParams) -> None: + """VARCHAR column decoding — single-byte length prefix in tuple payload.""" + with _connect(conn_params) as conn: + cur = conn.cursor() + cur.execute("SELECT FIRST 5 tabname FROM systables ORDER BY tabid") + rows = cur.fetchall() + assert len(rows) == 5 + # All rows should have a single VARCHAR string element + for (name,) in rows: + assert isinstance(name, str) + assert len(name) > 0 + # Specifically the system-catalog tables we expect + names = [r[0] for r in rows] + assert names[0] == "systables" + assert "syscolumns" in names + + +def test_varchar_with_odd_length_padding(conn_params: ConnParams) -> None: + """Odd-length VARCHAR row — payload padding to even alignment must be consumed. + + 'syscolumns' is 10 chars but the payload is 11 bytes (1-byte length + 10 bytes). + 11 is odd, so the SQ_TUPLE format inserts a pad byte before the next message. + Regression for the bug where parse_tuple_payload didn't consume that pad. + """ + with _connect(conn_params) as conn: + cur = conn.cursor() + # systables row 2 is "syscolumns" (10 chars → 11-byte payload → odd → pad) + cur.execute("SELECT FIRST 2 tabname FROM systables ORDER BY tabid") + rows = cur.fetchall() + assert rows == [("systables",), ("syscolumns",)] + + +def test_mixed_types_int_varchar_float(conn_params: ConnParams) -> None: + """Three-column SELECT mixing INT + VARCHAR + FLOAT.""" + with _connect(conn_params) as conn: + cur = conn.cursor() + cur.execute("SELECT FIRST 3 tabname, tabid, nrows FROM systables ORDER BY tabid") + rows = cur.fetchall() + assert len(rows) == 3 + for name, tabid, nrows in rows: + assert isinstance(name, str) + assert isinstance(tabid, int) + assert isinstance(nrows, float) + + def test_two_cursors_on_same_connection(conn_params: ConnParams) -> None: """Two cursors on one connection — used sequentially (Phase 4 may parallel-ize).""" with _connect(conn_params) as conn: