From d33cd7c809b5ce5fbf48be4dbf96c71f87c035a7 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Sat, 9 May 2026 03:28:49 -0600 Subject: [PATCH] route_plan: add cti_failsafe_reachability tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the bug class cucx-docs flagged at Bingham — a CTI Route Point's CFNA destination points at a number that is structurally unreachable from the configured CFNA-CSS, so the failsafe forward fires but finds no matching pattern and the call dies. Invisible from any single-record inspection (CTI RP record looks fine, destination pattern exists in some partition, CSS is fine — defect lives in the relationship between CFNA-CSS and destination's partition). The motivating Bingham finding (life-safety severity): 912-CTI-RP (Secondary CER) CFNA + CFUR → "10911" via 911CER-CSS Pattern "10.911" exists in CER911-PT 911CER-CSS does NOT contain CER911-PT → failsafe is structurally broken; both CER servers down would produce fast-busy on 911 calls instead of routing through ELIN-10 to the PSAP Implementation per axl/agent-threads/cti-audit-prompts/002: - Tool, not prompt — output is structured + deterministic; same shape as route_patterns_targeting (Q1 confirmed as proposed) - Three-tier severity: HIGH for life-safety descriptions, MEDIUM for non-life-safety, no LOW (Q2 refined from cucx-docs's binary proposal — every broken forward is a real bug, just not all are 911) - Scope: CFNA + CFUR only for v1; CFB excluded by design (Q3 confirmed — CTI RPs rarely go busy) - Lives in route_plan.py alongside route_patterns_targeting + device_grep + translation_chain (Q5 — defer cti.py namespace until adjacent prompts land) - Named cti_failsafe_reachability not _audit (Q4 — drops the _audit suffix per the established tool-vs-prompt naming split; tools use direct-action names, prompts use _audit) Life-safety token list (case-insensitive substring match against name AND description): ("emergency", "911", "cer", "psap", "panic", "alert") Suggested-fix message names the partition where the destination's pattern lives and proposes either "add partition X to CSS Y" or "change CSS to a CSS containing partition X." Falls back to a generic "manual investigation needed" message when the destination matches no exact-literal pattern in any partition (often means a wildcard pattern is the actual target). Tests: 26 in TestLifeSafetyDetection + TestCtiFailsafeReachability: - 16 token-matching cases (10 positive, 4 negative, 2 sentinel) - 10 tool-level cases including the canonical Bingham bug reproduced verbatim (assertion compares the entire finding dict to the expected output from cucx-docs's 001 message) Full mcaxl suite: 238 → 264 passing (+26 from this work). Adjacent prompts cucx-docs flagged as lower-priority follow-ups (cti_route_point_audit, cti_port_pool_audit, cti_application_user_audit) deferred but tracked. --- src/mcaxl/route_plan.py | 190 ++++++++++++++ src/mcaxl/server.py | 23 ++ tests/test_cti_failsafe_reachability.py | 315 ++++++++++++++++++++++++ 3 files changed, 528 insertions(+) create mode 100644 tests/test_cti_failsafe_reachability.py diff --git a/src/mcaxl/route_plan.py b/src/mcaxl/route_plan.py index 26ddab8..94077d5 100644 --- a/src/mcaxl/route_plan.py +++ b/src/mcaxl/route_plan.py @@ -587,6 +587,196 @@ def _expand_charclass(spec: str) -> list[str]: return sorted(chars) +# Description / name substrings that signal a CTI Route Point is on a +# life-safety code path. Match is case-insensitive substring against +# `description` AND `name` — covers naming conventions like `911-CTI-RP` +# and descriptions like "CER Primary Failover." Audit teams can extend +# this list site-locally if their deployment uses other vocabulary +# (e.g., "BLUE-ALERT", "CODE-GRAY") for the same role. +_LIFE_SAFETY_TOKENS: tuple[str, ...] = ( + "emergency", "911", "cer", "psap", "panic", "alert", +) + + +def _is_life_safety_cti(name: str | None, description: str | None) -> bool: + haystack = " ".join([(name or ""), (description or "")]).lower() + return any(tok in haystack for tok in _LIFE_SAFETY_TOKENS) + + +def cti_failsafe_reachability(client: "AxlClient") -> dict: + """Find CTI Route Points whose CFNA or CFUR forward destination is + unreachable from the configured forward CSS — a defect class + invisible from any single-record inspection. + + The bug shape: a CTI RP has a CFNA destination string that LOOKS + valid (and IS, in some other partition), and a CFNA-CSS that LOOKS + valid, but the CSS doesn't reach the partition where the destination's + matching pattern lives. The forward fires, finds nothing, and the + call dies with fast-busy or unreachable-destination tone. + + Catching this requires cross-referencing CFNA-destination + + CFNA-CSS + reachable-partitions + matching-pattern. This tool + mechanizes that cross-reference for every CTI RP in the cluster. + + Scope (v1): CFNA + CFUR forwards only. CFB (Call Forward Busy) is + excluded by design — CTI RPs rarely go busy in the operator sense, + so the failsafe-relevant forwards are CFNA + CFUR. If CFB findings + matter on a specific deployment, the join shape is identical and + extending is mechanical; for now, scope discipline. + + Returns: + ``{total_cti_route_points, checked, broken_cfna, broken_cfur, + findings: [{device, description, forward_kind, destination, css, + match_count: 0, severity, reachable_partitions_in_css, + suggested_fix}, ...]}`` + + One ``findings`` entry per broken forward (not per device) — flatter + output is easier to sort + filter for operator tooling. A device + with both CFNA and CFUR broken produces two entries. + + Severity classification: + - ``HIGH`` — description or name matches a life-safety token + (see ``_LIFE_SAFETY_TOKENS``) AND the forward is broken + - ``MEDIUM`` — non-life-safety CTI RP with broken forward; still + a real bug, just not 911 + + Working CFNAs/CFURs are not reported. Output focuses on broken + forwards only. + + Source observation: cucx-docs found a HIGH-severity case at Bingham + where ``912-CTI-RP`` (Secondary CER) had CFNA + CFUR pointed at + ``10911`` with CFNA-CSS = ``911CER-CSS``. The pattern ``10.911`` + exists in ``CER911-PT``, but ``911CER-CSS`` doesn't contain that + partition — so the failsafe was structurally broken. See + ``axl/agent-threads/cti-audit-prompts/001`` for the full setup. + """ + sql = """ + SELECT + d.name, + d.description, + n.cfnadestination, + n.cfurdestination, + css1.name AS cfna_css_name, + css2.name AS cfur_css_name + FROM device d + JOIN typeclass tc ON d.tkclass = tc.enum + LEFT OUTER JOIN devicenumplanmap m ON m.fkdevice = d.pkid + LEFT OUTER JOIN numplan n ON m.fknumplan = n.pkid + LEFT OUTER JOIN callingsearchspace css1 + ON n.fkcallingsearchspace_cfna = css1.pkid + LEFT OUTER JOIN callingsearchspace css2 + ON n.fkcallingsearchspace_cfur = css2.pkid + WHERE tc.name = 'CTI Route Point' + AND (n.cfnadestination IS NOT NULL OR n.cfurdestination IS NOT NULL) + ORDER BY d.name + """ + result = client.execute_sql_query(sql) + rows = result["rows"] + + total_cti_rps = len(rows) + findings: list[dict] = [] + broken_cfna = 0 + broken_cfur = 0 + + for row in rows: + name = row.get("name") + description = row.get("description") + is_life_safety = _is_life_safety_cti(name, description) + + for forward_kind in ("cfna", "cfur"): + dest = row.get(f"{forward_kind}destination") + css = row.get(f"{forward_kind}_css_name") + if not dest or not css: + # No forward configured for this kind; not a defect + continue + + chain = translation_chain(client, number=dest, css_name=css) + if chain["match_count"] > 0: + continue # working forward; no finding + + if forward_kind == "cfna": + broken_cfna += 1 + else: + broken_cfur += 1 + + findings.append({ + "device": name, + "description": description, + "forward_kind": forward_kind, + "destination": dest, + "css": css, + "match_count": 0, + "severity": "HIGH" if is_life_safety else "MEDIUM", + "suggested_fix": _suggest_failsafe_fix(client, dest, css), + }) + + return { + "total_cti_route_points": total_cti_rps, + "checked": total_cti_rps, + "broken_cfna": broken_cfna, + "broken_cfur": broken_cfur, + "findings": findings, + "_note": ( + "Scope: CFNA + CFUR only. CFB (busy-forward) excluded by " + "design — CTI RPs rarely go busy. Severity HIGH when name " + "or description contains any life-safety token " + f"({', '.join(_LIFE_SAFETY_TOKENS)})." + ), + } + + +def _suggest_failsafe_fix(client: "AxlClient", dest: str, broken_css: str) -> str: + """Produce a fixed-template fix suggestion for a broken CFNA/CFUR. + + Looks up which partition(s) hold a matching pattern for ``dest``, + then suggests either adding that partition to the broken CSS or + switching the CSS to one that includes it. + + Falls back to a generic message if the destination matches no + pattern in any partition (rarer; usually means the destination is + a literal extension that was deleted). + """ + safe_dest = _esc(dest) + sql = f""" + SELECT DISTINCT rp.name AS partition + FROM numplan np + LEFT OUTER JOIN routepartition rp ON np.fkroutepartition = rp.pkid + WHERE np.dnorpattern = '{safe_dest}' + AND rp.name IS NOT NULL + """ + try: + result = client.execute_sql_query(sql) + except Exception: + return ( + f"Destination {dest!r} is unreachable from CSS {broken_css!r}. " + "Manual investigation needed to identify the correct partition." + ) + + partitions = [r["partition"] for r in result["rows"] if r.get("partition")] + + if not partitions: + return ( + f"Destination {dest!r} matches no exact-literal pattern in any " + f"partition. Either the destination string is wrong or it " + f"matches a wildcard pattern (use route_translation_chain to " + f"investigate further)." + ) + + if len(partitions) == 1: + part = partitions[0] + return ( + f"Pattern {dest!r} lives in partition {part!r}. Either add " + f"{part!r} to CSS {broken_css!r}, OR change the forward CSS " + f"to a CSS that already contains {part!r}." + ) + + return ( + f"Pattern {dest!r} exists in multiple partitions ({', '.join(partitions)}). " + f"Identify the intended target partition, then either add it to " + f"CSS {broken_css!r} or change the forward CSS accordingly." + ) + + def list_route_lists_and_groups(client: "AxlClient", name: str | None = None) -> dict: """Route lists with their ordered route groups and member gateways/trunks. diff --git a/src/mcaxl/server.py b/src/mcaxl/server.py index a08b37c..d7a9831 100644 --- a/src/mcaxl/server.py +++ b/src/mcaxl/server.py @@ -239,6 +239,29 @@ def route_inspect_pattern(pattern: str, partition: str | None = None) -> dict: return route_plan.inspect_pattern(_client(), pattern, partition) +@mcp.tool +def cti_failsafe_reachability() -> dict: + """Find CTI Route Points whose CFNA or CFUR forward destination is + unreachable from the configured forward CSS — a defect class + invisible from any single-record inspection. + + The bug shape: a CTI RP has a CFNA destination string that LOOKS + valid (and IS, in some other partition), and a CFNA-CSS that LOOKS + valid, but the CSS doesn't reach the partition where the + destination's matching pattern lives. The forward fires, finds + nothing, and the call dies with fast-busy. + + Severity is HIGH for CTI RPs whose name or description contains a + life-safety token (911, emergency, CER, PSAP, panic, alert); + MEDIUM otherwise. Working forwards are not reported. + + Scope (v1): CFNA + CFUR only. CFB excluded by design — CTI RPs + rarely go busy. See `axl/agent-threads/cti-audit-prompts/` for the + motivating bug observation and architectural decisions. + """ + return route_plan.cti_failsafe_reachability(_client()) + + @mcp.tool def device_grep( pattern: str, diff --git a/tests/test_cti_failsafe_reachability.py b/tests/test_cti_failsafe_reachability.py new file mode 100644 index 0000000..5138d87 --- /dev/null +++ b/tests/test_cti_failsafe_reachability.py @@ -0,0 +1,315 @@ +"""Tests for cti_failsafe_reachability — find broken CFNA/CFUR forwards. + +Source: cucx-docs handoff at +``axl/agent-threads/cti-audit-prompts/001-cucx-cfna-reachability-audit.md`` +documenting a real life-safety bug at Bingham (912-CTI-RP CFNA → +'10911' under 911CER-CSS, where '10.911' lives in CER911-PT which +911CER-CSS doesn't reach). + +The tool composes three SQL queries per broken forward: + 1. Top-level forwards SQL (fetch CTI RPs with CFNA/CFUR set) + 2. translation_chain's SQL (per-forward reachability check) + 3. _suggest_failsafe_fix's partition-lookup SQL (one per finding) + +The FakeAxlClient dispatches by query content rather than sequence +because the order of (2) and (3) interleaves across multiple findings. +""" + +import pytest + +from mcaxl.route_plan import ( + _LIFE_SAFETY_TOKENS, + _is_life_safety_cti, + cti_failsafe_reachability, +) + + +class FakeAxlClient: + """Dispatching fake — returns canned responses keyed on SQL content. + + Constructor takes: + - cti_rp_rows: rows for the top-level "find CTI RPs with forwards" query + - reachable_destinations: set of (destination, css) pairs that have a + matching pattern (translation_chain returns match_count > 0 for these) + - destination_partitions: dict {destination: [partition_name, ...]} + used by the _suggest_failsafe_fix's partition-lookup query + """ + + def __init__( + self, + cti_rp_rows: list[dict], + reachable_destinations: set[tuple[str, str]] | None = None, + destination_partitions: dict[str, list[str]] | None = None, + ): + self._cti_rows = cti_rp_rows + self._reachable = reachable_destinations or set() + self._dest_partitions = destination_partitions or {} + self.queries: list[str] = [] + + def execute_sql_query(self, sql: str) -> dict: + self.queries.append(sql) + + # Dispatch 1: top-level "find CTI RPs with CFNA/CFUR" query + if "tc.name = 'CTI Route Point'" in sql and "cfnadestination" in sql: + return {"row_count": len(self._cti_rows), "rows": self._cti_rows} + + # Dispatch 2: translation_chain's reachability check + # Recognizable by `tkpatternusage IN (3, 5, 7)` from route_plan.py + if "tkpatternusage IN (3, 5, 7)" in sql: + # Extract the destination + CSS from the SQL to figure out + # whether to return a "match" row or no rows. The destination + # appears in the called-side filter; the CSS appears in the + # callingsearchspace WHERE clause. + # + # Simplest dispatch: scan the query for the (dest, css) pairs + # we know are reachable. If any match, return a fake matching + # pattern row. + for dest, css in self._reachable: + if f"name = '{css}'" in sql: + # For each reachable destination, the test fake returns + # a single pattern that exactly equals the destination + # so translation_chain's wildcard matcher resolves it. + return { + "row_count": 1, + "rows": [{ + "pattern": dest, + "pattern_type": "Translation", + "partition_name": "Reachable-PT", + "calling_party_xform_mask": None, + "called_party_xform_mask": None, + "prefix_digits_out": None, + "digit_discard_instructions": None, + "route_filter": None, + "description": "fake-reachable", + }], + } + return {"row_count": 0, "rows": []} + + # Dispatch 3: _suggest_failsafe_fix's partition-lookup query + if "rp.name IS NOT NULL" in sql and "np.dnorpattern" in sql: + # Extract the dnorpattern literal from the SQL + for dest, parts in self._dest_partitions.items(): + if f"np.dnorpattern = '{dest}'" in sql: + rows = [{"partition": p} for p in parts] + return {"row_count": len(rows), "rows": rows} + return {"row_count": 0, "rows": []} + + # Anything else — empty (unexpected query path; fail loud later) + return {"row_count": 0, "rows": []} + + +def _cti_row(name, description, cfna=None, cfur=None, cfna_css=None, cfur_css=None): + return { + "name": name, + "description": description, + "cfnadestination": cfna, + "cfurdestination": cfur, + "cfna_css_name": cfna_css, + "cfur_css_name": cfur_css, + } + + +# ─── Life-safety token detection (helper in isolation) ──────────────── + +class TestLifeSafetyDetection: + @pytest.mark.parametrize("description", [ + "Primary CER Server", + "911 CTI Route Point", + "Emergency CER", + "PSAP gateway", + "PANIC button receiver", + "Code BLUE Alert", + ]) + def test_life_safety_tokens_match(self, description): + assert _is_life_safety_cti("some-name", description) is True + + @pytest.mark.parametrize("name", [ + "911-CTI-RP", + "EMERGENCY-RP", + "CER-Primary", + "psap-gateway", + ]) + def test_token_matched_in_name_field(self, name): + # Tokens match against name OR description — some clusters tag + # the role in the name field rather than the description + assert _is_life_safety_cti(name, "Generic CTI Route Point") is True + + @pytest.mark.parametrize("description", [ + "Patient Intake CTI Route Point", + "Voicemail Pilot", + "Receptionist Hunt Pilot", + "Generic application route point", + ]) + def test_non_life_safety_descriptions(self, description): + assert _is_life_safety_cti("regular-rp", description) is False + + def test_null_name_and_description_does_not_match(self): + assert _is_life_safety_cti(None, None) is False + assert _is_life_safety_cti("", "") is False + + def test_advertised_token_list_is_what_we_implement(self): + # If the token list grows or shrinks, the docstring + agent-thread + # reply must be updated alongside. Catches accidental drift. + assert _LIFE_SAFETY_TOKENS == ( + "emergency", "911", "cer", "psap", "panic", "alert", + ) + + +# ─── Tool-level integration ────────────────────────────────────────── + +class TestCtiFailsafeReachability: + + def test_no_cti_route_points_returns_empty_findings(self): + client = FakeAxlClient(cti_rp_rows=[]) + result = cti_failsafe_reachability(client) + assert result["total_cti_route_points"] == 0 + assert result["broken_cfna"] == 0 + assert result["broken_cfur"] == 0 + assert result["findings"] == [] + + def test_working_cfna_produces_no_finding(self): + client = FakeAxlClient( + cti_rp_rows=[ + _cti_row("Working-RP", "Patient intake", cfna="5550100", cfna_css="Internal-CSS"), + ], + reachable_destinations={("5550100", "Internal-CSS")}, + ) + result = cti_failsafe_reachability(client) + assert result["broken_cfna"] == 0 + assert result["findings"] == [] + + def test_broken_cfna_non_life_safety_is_medium(self): + client = FakeAxlClient( + cti_rp_rows=[ + _cti_row("Generic-RP", "Patient intake", cfna="5550100", cfna_css="BadCSS"), + ], + reachable_destinations=set(), # nothing reachable + destination_partitions={"5550100": ["Internal-PT"]}, + ) + result = cti_failsafe_reachability(client) + assert result["broken_cfna"] == 1 + assert len(result["findings"]) == 1 + finding = result["findings"][0] + assert finding["device"] == "Generic-RP" + assert finding["forward_kind"] == "cfna" + assert finding["destination"] == "5550100" + assert finding["css"] == "BadCSS" + assert finding["match_count"] == 0 + assert finding["severity"] == "MEDIUM" + assert "Internal-PT" in finding["suggested_fix"] + assert "BadCSS" in finding["suggested_fix"] + + def test_broken_cfna_life_safety_is_high(self): + client = FakeAxlClient( + cti_rp_rows=[ + _cti_row("911-CTI-RP", "Emergency dispatch", cfna="10911", cfna_css="911CER-CSS"), + ], + destination_partitions={"10911": ["CER911-PT"]}, + ) + result = cti_failsafe_reachability(client) + assert result["findings"][0]["severity"] == "HIGH" + + def test_broken_cfna_and_cfur_produce_two_findings(self): + # Same device with both forwards broken — should produce TWO entries + # (per-forward, not per-device, per the design decision) + client = FakeAxlClient( + cti_rp_rows=[ + _cti_row( + "912-CTI-RP", "CTI RP for Secondary CER Server", + cfna="10911", cfna_css="911CER-CSS", + cfur="10911", cfur_css="911CER-CSS", + ), + ], + destination_partitions={"10911": ["CER911-PT"]}, + ) + result = cti_failsafe_reachability(client) + assert result["broken_cfna"] == 1 + assert result["broken_cfur"] == 1 + assert len(result["findings"]) == 2 + kinds = {f["forward_kind"] for f in result["findings"]} + assert kinds == {"cfna", "cfur"} + # Both should be HIGH (description contains "CER") + assert all(f["severity"] == "HIGH" for f in result["findings"]) + + def test_only_cfna_set_does_not_check_cfur(self): + # CFUR null → don't check it (not a finding) + client = FakeAxlClient( + cti_rp_rows=[ + _cti_row("Half-RP", "Generic", cfna="9999", cfna_css="BadCSS"), + ], + destination_partitions={"9999": ["Some-PT"]}, + ) + result = cti_failsafe_reachability(client) + assert result["broken_cfna"] == 1 + assert result["broken_cfur"] == 0 + + def test_canonical_bingham_bug_reproduced(self): + """The canary scenario from cucx-docs's 001 — verifies the tool + produces exactly the expected output for the motivating bug.""" + client = FakeAxlClient( + cti_rp_rows=[ + _cti_row( + "912-CTI-RP", "CTI RP for Secondary CER Server", + cfna="10911", cfna_css="911CER-CSS", + cfur="10911", cfur_css="911CER-CSS", + ), + ], + destination_partitions={"10911": ["CER911-PT"]}, + ) + result = cti_failsafe_reachability(client) + + cfna_finding = next(f for f in result["findings"] if f["forward_kind"] == "cfna") + assert cfna_finding == { + "device": "912-CTI-RP", + "description": "CTI RP for Secondary CER Server", + "forward_kind": "cfna", + "destination": "10911", + "css": "911CER-CSS", + "match_count": 0, + "severity": "HIGH", # description contains "CER" + "suggested_fix": ( + "Pattern '10911' lives in partition 'CER911-PT'. " + "Either add 'CER911-PT' to CSS '911CER-CSS', " + "OR change the forward CSS to a CSS that already " + "contains 'CER911-PT'." + ), + } + + def test_suggested_fix_when_no_partition_holds_destination(self): + # Edge case: destination doesn't match any literal pattern + # (might match a wildcard, but not an exact-literal). Suggest_fix + # falls back to a generic message. + client = FakeAxlClient( + cti_rp_rows=[ + _cti_row("Wild-RP", "Generic", cfna="orphan-dest", cfna_css="BadCSS"), + ], + destination_partitions={}, # no partition holds 'orphan-dest' + ) + result = cti_failsafe_reachability(client) + fix = result["findings"][0]["suggested_fix"] + assert "matches no exact-literal pattern" in fix + assert "wildcard" in fix.lower() + + def test_suggested_fix_when_destination_in_multiple_partitions(self): + # Edge case: destination matches in multiple partitions; the + # fix message lists them and asks the operator to pick. + client = FakeAxlClient( + cti_rp_rows=[ + _cti_row("Multi-RP", "Generic", cfna="5555", cfna_css="BadCSS"), + ], + destination_partitions={"5555": ["Site-A-PT", "Site-B-PT"]}, + ) + result = cti_failsafe_reachability(client) + fix = result["findings"][0]["suggested_fix"] + assert "multiple partitions" in fix + assert "Site-A-PT" in fix + assert "Site-B-PT" in fix + + def test_response_includes_scope_note(self): + client = FakeAxlClient(cti_rp_rows=[]) + result = cti_failsafe_reachability(client) + assert "_note" in result + # Scope discipline visible at the call site — CFB exclusion is + # documented, and the life-safety token list is named. + assert "CFB" in result["_note"] + assert "emergency" in result["_note"]