"""SQ_DESCRIBE column descriptor parser and SQ_TUPLE row decoder. Per IfxSqli.receiveDescribe (line 2175+) for ``isUSVER`` modern servers. The per-field block layout is: fieldIndex (int 4) columnStartPos (int 4 — USVER) columnType (short 2 — base IDS type code with high-bit flags) columnExtendedId (int 4 — USVER, for UDT/extended types) ownerName (readChar = [short len][bytes][pad if odd]) extendedName (readChar) reference (short 2) alignment (short 2) sourceType (int 4) encodedLength (int 4) After all fields: the string table (a length-prefixed block of nul-separated column names), read via readPadded. """ from __future__ import annotations from dataclasses import dataclass from types import MappingProxyType from ._protocol import IfxStreamReader from ._types import IfxType, base_type, is_nullable from .converters import ( FIXED_WIDTHS, BlobLocator, ClobLocator, CollectionValue, RowValue, _decode_datetime, _decode_interval, decode, ) # Module-level type-code constants — lifted out of the hot loop in # parse_tuple_payload so we don't pay the IntFlag→int conversion per # column per row. _TC_CHAR = int(IfxType.CHAR) _TC_VARCHAR = int(IfxType.VARCHAR) _TC_NCHAR = int(IfxType.NCHAR) _TC_NVCHAR = int(IfxType.NVCHAR) _TC_LVARCHAR = int(IfxType.LVARCHAR) _TC_DECIMAL = int(IfxType.DECIMAL) _TC_MONEY = int(IfxType.MONEY) _TC_DATETIME = int(IfxType.DATETIME) _TC_INTERVAL = int(IfxType.INTERVAL) _TC_UDTFIXED = int(IfxType.UDTFIXED) _TC_UDTVAR = int(IfxType.UDTVAR) _TC_ROW = int(IfxType.ROW) _TC_COLLECTION = int(IfxType.COLLECTION) _TC_SET = int(IfxType.SET) _TC_MULTISET = int(IfxType.MULTISET) _TC_LIST = int(IfxType.LIST) _COLLECTION_KIND_MAP = MappingProxyType({ _TC_SET: "set", _TC_MULTISET: "multiset", _TC_LIST: "list", _TC_COLLECTION: "collection", }) @dataclass class ColumnInfo: """One column in a SQ_DESCRIBE response.""" name: str type_code: int # base IDS type code (high-bit flags stripped) raw_type_code: int # raw type-code short with flags intact encoded_length: int column_start_pos: int = 0 extended_id: int = 0 owner_name: str = "" extended_name: str = "" @property def null_ok(self) -> bool: return is_nullable(self.raw_type_code) def to_description_tuple(self) -> tuple: """The PEP 249 cursor.description 7-tuple.""" return ( self.name, self.type_code, self.encoded_length, # display_size self.encoded_length, # internal_size 0, # precision (Phase 6+ derives from type) 0, # scale self.null_ok, ) def _read_char(reader: IfxStreamReader, encoding: str = "iso-8859-1") -> str: """Read JDBC's ``readChar`` format: [short len][bytes][pad if odd-len].""" length = reader.read_short() if length < 0: return "" if length == 0: return "" data = reader.read_exact(length) if length & 1: reader.read_exact(1) # pad byte return data.decode(encoding) def parse_describe(reader: IfxStreamReader) -> tuple[list[ColumnInfo], dict]: """Parse a SQ_DESCRIBE response (the SQ_DESCRIBE tag is already consumed). Returns ``(columns, metadata)``. """ statement_type = reader.read_short() statement_id = reader.read_short() estimated_cost = reader.read_int() tuple_size = reader.read_short() nfields = reader.read_short() string_table_size = reader.read_int() # 4-byte on modern servers metadata = { "statement_type": statement_type, "statement_id": statement_id, "estimated_cost": estimated_cost, "tuple_size": tuple_size, "nfields": nfields, "string_table_size": string_table_size, } if nfields <= 0: return [], metadata # Pass 1: per-field descriptor block (no name yet — names come from # the string table). raw_fields: list[dict] = [] for _ in range(nfields): field_index = reader.read_int() column_start_pos = reader.read_int() column_type = reader.read_short() column_extended_id = reader.read_int() owner_name = _read_char(reader) extended_name = _read_char(reader) reference = reader.read_short() # noqa: F841 (Phase 6+) alignment = reader.read_short() # noqa: F841 source_type = reader.read_int() # noqa: F841 encoded_length = reader.read_int() raw_fields.append( { "field_index": field_index, "column_start_pos": column_start_pos, "type_code": column_type, "extended_id": column_extended_id, "owner_name": owner_name, "extended_name": extended_name, "encoded_length": encoded_length, } ) # Pass 2: string table — nul-separated column names. readPadded. string_table = b"" if string_table_size > 0: string_table = reader.read_exact(string_table_size) if string_table_size & 1: reader.read_exact(1) # pad # Split string table on nul to get the column-name list. The fieldIndex # values point into this table for each column's name. raw_names = string_table.split(b"\x00") name_lookup = {0: ""} cursor = 0 for piece in raw_names: if piece: name_lookup[cursor] = piece.decode("iso-8859-1") cursor += len(piece) + 1 # +1 for the nul we split on columns: list[ColumnInfo] = [] for fd in raw_fields: # fieldIndex is the byte offset where the column's name starts. name = name_lookup.get(fd["field_index"]) if name is None: # Walk the string table to find the name at this offset. tail = string_table[fd["field_index"] :].split(b"\x00", 1)[0] name = tail.decode("iso-8859-1") if tail else f"col{len(columns)}" # INVARIANT: ColumnInfo.type_code is always base-typed (high-bit # flags stripped). This is the single producer site — every reader # (parse_tuple_payload, cursor._dereference_blob_columns, etc.) # depends on this and skips redundant base_type() calls. If you # ever construct ColumnInfo elsewhere, base_type() the input. columns.append( ColumnInfo( name=name or f"col{len(columns)}", type_code=base_type(fd["type_code"]), raw_type_code=fd["type_code"], encoded_length=fd["encoded_length"], column_start_pos=fd["column_start_pos"], extended_id=fd["extended_id"], owner_name=fd["owner_name"], extended_name=fd["extended_name"], ) ) return columns, metadata # IDS type codes that are length-prefixed in the tuple payload. # Per ``IfxSqli`` row-data extraction (see receiveFastPath case 13/15/16): # CHAR, VARCHAR, NCHAR, NVCHAR all use ``[short length][bytes][pad if odd]`` # inside the tuple blob. LVARCHAR uses a 4-byte length prefix instead. _LENGTH_PREFIXED_SHORT_TYPES = frozenset({ _TC_CHAR, _TC_VARCHAR, _TC_NCHAR, _TC_NVCHAR, }) _COMPOSITE_UDT_TYPES = frozenset({ _TC_ROW, _TC_COLLECTION, _TC_SET, _TC_MULTISET, _TC_LIST, }) _NUMERIC_TYPES = frozenset({_TC_DECIMAL, _TC_MONEY}) def parse_tuple_payload( reader: IfxStreamReader, columns: list[ColumnInfo], encoding: str = "iso-8859-1", ) -> tuple: """Parse a SQ_TUPLE payload (the SQ_TUPLE tag is already consumed). Per ``IfxSqli.receiveTuple``: ``[short warn][int size][bytes payload]`` The payload contains column values back-to-back. For each column, the on-wire encoding depends on the type: * Fixed-width types (INT, FLOAT, DATE, BIGINT, etc.): exact byte count from ``FIXED_WIDTHS``. * Length-prefixed strings (CHAR, VARCHAR, NCHAR, NVCHAR): ``[short len] [bytes][pad if odd]``. * LVARCHAR: 4-byte length prefix instead of 2. * Other variable-width types (DECIMAL, DATETIME, INTERVAL, BLOBs): Phase 6+ — currently surfaces raw bytes from ``encoded_length``. ``encoding`` is forwarded to ``decode()`` for string columns. Caller (typically the cursor) should pass the connection's ``encoding`` so user-data text honors CLIENT_LOCALE. """ reader.read_short() # warn (Phase 5 surfaces) size = reader.read_int() payload = reader.read_exact(size) # SQ_TUPLE payload is padded to even-byte alignment on the wire. # Discovered empirically: a 11-byte "syscolumns" VARCHAR payload had # a trailing 0x00 between it and the next SQ_TUPLE tag. Consuming # this pad keeps the next read aligned. # (See docs/CAPTURES/15-py-varchar-fixed.socat.log analysis.) if size & 1: reader.read_exact(1) values: list[object] = [] offset = 0 # Note: ``col.type_code`` is *already* base-typed by ``parse_describe`` # (see INVARIANT comment there), so we don't re-strip high-bit flags # here. The original code called ``base_type(col.type_code)`` per # column per row — pure waste. Skipping it is the single largest # savings in this loop. for col in columns: tc = col.type_code if tc in _LENGTH_PREFIXED_SHORT_TYPES: # In tuple data, VARCHAR/NCHAR/NVCHAR use a SINGLE-BYTE # length prefix (max 255 — IDS VARCHAR's hard limit), not # a short. Empirically verified against the SQ_TUPLE bytes # for ``SELECT tabname FROM systables`` in # docs/CAPTURES/13-py-varchar.socat.log: # payload = 09 73 79 73 74 61 62 6c 65 73 # = [byte 9]["systables"] # CHAR is fixed-width per encoded_length — handled below. if tc == _TC_CHAR: width = col.encoded_length raw = payload[offset:offset + width] offset += width else: length = payload[offset] offset += 1 raw = payload[offset:offset + length] offset += length values.append(decode(tc, raw, encoding)) continue if tc == _TC_LVARCHAR: # [int length][bytes][pad if odd] length = int.from_bytes(payload[offset:offset + 4], "big", signed=True) offset += 4 raw = payload[offset:offset + length] offset += length if length & 1: offset += 1 values.append(decode(tc, raw, encoding)) continue # DECIMAL/MONEY: width = ceil(precision/2) + 1, where precision is # the high byte of encoded_length (packed as (precision << 8) | scale). # Per IfxRowColumn.loadColumnData and IfxToJavaDecimal byte sizing. if tc in _NUMERIC_TYPES: precision = (col.encoded_length >> 8) & 0xFF width = (precision + 1) // 2 + 1 raw = payload[offset:offset + width] offset += width try: values.append(decode(tc, raw)) except NotImplementedError: values.append(raw) continue # DATETIME: width = ceil(digit_count/2) + 1, where digit_count is the # high byte of encoded_length (packed as (digit_count << 8) | # (start_TU << 4) | end_TU). The decoder needs the qualifier too, # so we call it directly here rather than via the dispatch. if tc == _TC_DATETIME: digit_count = (col.encoded_length >> 8) & 0xFF width = (digit_count + 1) // 2 + 1 raw = payload[offset:offset + width] offset += width values.append(_decode_datetime(raw, col.encoded_length)) continue # INTERVAL: same width formula as DATETIME — high byte of # encoded_length holds the total digit count across all fields, # and the wire bytes are ``[head][digit pairs]`` (one head byte # plus ceil(digit_count/2) digit pairs). Like DATETIME, the # qualifier is needed at decode time, so we bypass the generic # dispatch. if tc == _TC_INTERVAL: digit_count = (col.encoded_length >> 8) & 0xFF width = (digit_count + 1) // 2 + 1 raw = payload[offset:offset + width] offset += width values.append(_decode_interval(raw, col.encoded_length)) continue # BLOB / CLOB (smart-LOBs): the SQ_DESCRIBE response presents # these as UDTFIXED (type 41) with extended_id 10 (BLOB) or 11 # (CLOB) and encoded_length = 72 (locator size). The 72 bytes # we read here are an opaque server-side reference, NOT the # actual data. Phase 10 lets users fetch via lotofile + SQ_FILE. if tc == _TC_UDTFIXED and col.extended_id in (10, 11): width = col.encoded_length raw = payload[offset:offset + width] offset += width cls = BlobLocator if col.extended_id == 10 else ClobLocator values.append(cls(raw=bytes(raw))) continue # ROW / COLLECTION (Phase 12): composite UDTs. Wire format is # ``[byte ind][int length][bytes]`` — same shape as # UDTVAR(lvarchar) above, but the payload semantics are a # textual representation of the composite (e.g., # ``ROW('Alice',30 )`` or ``LIST{10,20,30}``) when # selected with default options. JDBC requests a richer # binary-with-schema format that's ~30x larger; we don't. # # We surface the bytes wrapped in a typed object and let the # user parse the textual form themselves. Type codes: # ROW=22, COLLECTION=23, SET=19, MULTISET=20, LIST=21. if tc in _COMPOSITE_UDT_TYPES: indicator = payload[offset] offset += 1 if indicator == 1: # null values.append(None) continue length = int.from_bytes( payload[offset:offset + 4], "big", signed=True ) offset += 4 raw = bytes(payload[offset:offset + length]) offset += length if tc == _TC_ROW: values.append(RowValue(raw=raw, schema=col.extended_name)) else: values.append( CollectionValue( raw=raw, kind=_COLLECTION_KIND_MAP[tc], element_schema=col.extended_name, ) ) continue # UDTVAR (type 40) with extended_name="lvarchar": this is what # functions like ``lotofile`` return — a length-prefixed string # wrapped as a UDT. The wire format adds a 1-byte indicator # prefix BEFORE the LVARCHAR ``[int len][bytes]``. Empirically # verified against ``SELECT lotofile(...)`` row data — the # leading ``00`` is null indicator (0=not null, 1=null per UDT # convention). if tc == _TC_UDTVAR and col.extended_name == "lvarchar": indicator = payload[offset] offset += 1 if indicator == 1: values.append(None) continue length = int.from_bytes( payload[offset:offset + 4], "big", signed=True ) offset += 4 raw = payload[offset:offset + length] offset += length if length & 1: offset += 1 values.append(raw.decode(encoding)) continue # Fixed-width types width = FIXED_WIDTHS.get(tc) if width is None: # Phase 6+ types (DATETIME, INTERVAL, BLOBs) — fall back # to encoded_length and surface raw bytes. width = col.encoded_length raw = payload[offset:offset + width] offset += width try: values.append(decode(tc, raw, encoding)) except NotImplementedError: values.append(raw) return tuple(values)