Two ideas borrowed from cisco-cucm-mcp (calltelemetry/cisco-cucm-mcp,
MIT licensed): real-time device registration via RisPort70, and
exponential-backoff retry on transient HTTP 5xx errors. Both are
purpose-built for the audit use case rather than general-purpose
ports — RisPort tools exist to inform audit findings, not as a
standalone "look at my devices" interface.
Rate limit / 503 backoff (~30 lines + 3 tests):
AxlClient now mounts an HTTPAdapter with a urllib3 Retry policy
(3 retries, exponential backoff, status_forcelist=[502,503,504]).
Configurable via AXL_RATE_LIMIT_RETRIES (default 3, 0 disables).
Surfaces in connection_status() so operators can see the policy.
Closes a real reliability gap: CUCM SOAP rate-limits under load
during change windows or with multiple concurrent admins; pre-fix
any 503 was a hard failure.
RisPort70 (new src/risport.py + 2 tools + prompt update):
Hand-coded SOAP client for /realtimeservice2/services/RISService70
(avoids dragging in another zeep instance for one operation).
Reuses AXL_URL/USER/PASS env vars — RisPort lives on the same host.
New tools:
device_registration_status(device_class, status, name_filter, page_size)
device_registration_summary() — cluster-wide breakdown by class
Live-cluster verification (cucm-pub.binghammemorial.org):
Phone: 803 registered=679 unregistered=123 rejected=1
Gateway: 85 registered=41 rejected=44 ← real audit finding
SIPTrunk: 22 registered=18 unregistered=4
HuntList: 28 registered=28
H323/CTI: 0 (cluster doesn't use these)
Discovered while live-verifying: CUCM 15 wraps the RisPort response
in an extra <SelectCmDeviceResult> element inside <selectCmDeviceReturn>.
Older CUCM versions exposed the fields directly. The parser falls
back to either shape; tests cover both (test_legacy_response_shape_still_parses
asserts the older shape still works).
phone_inventory_report prompt updated:
New Step 3 — "Cross-reference with real-time registration" — recommends
device_registration_summary() + device_registration_status(status="UnRegistered")
to surface configured-but-never-registered phones (strongest orphan signal),
PartiallyRegistered phones (firewall/cert/version mismatch indicator),
and registration-state vs config-state mismatches.
Tooling delta worth noting:
AXL device count: 1,377 phones
RisPort device count: 803 phones
Delta (~574) likely templates, hidden phones, or stale config —
itself an audit finding the new tool will surface
to anyone running phone_inventory_report.
README updated:
- Added health(), device_registration_status, device_registration_summary
- Added "Scope and complement" section recommending @calltelemetry/cisco-cucm-mcp
alongside for operational debugging (logs, perfmon, packet capture,
service control). The two servers answer different questions; the LLM
with both can compose audit findings with operational state.
- Listed all 10 prompts (was 4 outdated entries).
Tests: 134 → 155 (+21).
377 lines
14 KiB
Python
377 lines
14 KiB
Python
"""AXL SOAP client wrapper.
|
|
|
|
Lazy connection — instantiated on first tool call, not at server boot.
|
|
This means the FastMCP server registers tools and prompts immediately,
|
|
even if the cluster is unreachable, and the user gets a clear error
|
|
only when they actually invoke a tool that needs CUCM.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
import urllib3
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from requests import Session
|
|
from requests.auth import HTTPBasicAuth
|
|
from zeep import Client, Settings
|
|
from zeep.cache import SqliteCache
|
|
from zeep.transports import Transport
|
|
|
|
from .cache import AxlCache
|
|
from .sql_validator import validate_select
|
|
from .wsdl_loader import resolve_wsdl_path
|
|
|
|
|
|
class _ConfigError(RuntimeError):
|
|
"""Permanent configuration error — pin and don't retry.
|
|
|
|
Used internally to distinguish "missing env var, bad WSDL path, etc."
|
|
(which won't get better until the operator fixes them) from operational
|
|
errors like network blips or session timeouts (which should retry).
|
|
"""
|
|
|
|
|
|
class AxlClient:
|
|
"""Lazy-loaded zeep client for CUCM AXL.
|
|
|
|
Hamilton review MAJOR #5: distinguishes configuration errors (pinned —
|
|
they don't get better on retry) from operational errors (transient —
|
|
next call should attempt fresh). Pre-fix, ANY first-time failure
|
|
pinned the client forever and required a server restart.
|
|
"""
|
|
|
|
def __init__(self, response_cache: AxlCache):
|
|
self._client: Client | None = None
|
|
self._service: Any = None
|
|
self._response_cache = response_cache
|
|
self._config_error: str | None = None # permanent, pinned
|
|
self._last_error: str | None = None # last seen, may be transient
|
|
self._connected_at: float | None = None # monotonic time of last success
|
|
self._retry_config: dict | None = None # populated when session is built
|
|
|
|
def connection_status(self) -> dict:
|
|
"""Diagnostic snapshot — what's the state of the connection?
|
|
|
|
Useful for the `health` MCP tool and for operators trying to
|
|
figure out why a tool call failed. Reports whether we're
|
|
currently connected, when we last successfully connected, the
|
|
last error (config or operational), and the rate-limit retry
|
|
policy in effect.
|
|
"""
|
|
return {
|
|
"connected": self._service is not None,
|
|
"connected_at_monotonic": self._connected_at,
|
|
"config_error": self._config_error, # permanent until restart
|
|
"last_error": self._last_error,
|
|
"retry_config": self._retry_config,
|
|
}
|
|
|
|
def _ensure_connected(self) -> None:
|
|
if self._service is not None:
|
|
return
|
|
# Configuration errors are permanent — don't waste time retrying.
|
|
if self._config_error is not None:
|
|
raise _ConfigError(self._config_error)
|
|
|
|
# Read env vars FIRST. Missing env is a config error (pinned).
|
|
try:
|
|
url = os.environ["AXL_URL"]
|
|
user = os.environ["AXL_USER"]
|
|
password = os.environ["AXL_PASS"]
|
|
except KeyError as e:
|
|
self._config_error = (
|
|
f"Missing required env var {e.args[0]}. "
|
|
f"Set AXL_URL, AXL_USER, AXL_PASS in .env or the environment."
|
|
)
|
|
self._last_error = self._config_error
|
|
raise _ConfigError(self._config_error) from None
|
|
|
|
# CUCM's AXL endpoint 302-redirects /axl to /axl/. The redirect
|
|
# converts POST to GET (standard HTTP/1.1 behavior for 302), which
|
|
# makes the SOAP request silently fail with an HTML status page.
|
|
# Normalize the trailing slash so users don't need to remember.
|
|
if not url.rstrip().endswith("/"):
|
|
url = url.rstrip() + "/"
|
|
|
|
verify_tls = os.environ.get("AXL_VERIFY_TLS", "false").lower() in ("1", "true", "yes")
|
|
if not verify_tls:
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
wsdl_path = resolve_wsdl_path()
|
|
|
|
session = Session()
|
|
session.verify = verify_tls
|
|
session.auth = HTTPBasicAuth(user, password)
|
|
|
|
# Rate-limit / transient-error retry. CUCM's SOAP layer returns 503
|
|
# under load (multiple admins running AXL queries during a change
|
|
# window, etc). 502/504 occur when the publisher is restarting or
|
|
# a load balancer is between us and CUCM. Pre-fix, any of these
|
|
# was a hard failure to the caller; now they're retried with
|
|
# exponential backoff.
|
|
from requests.adapters import HTTPAdapter
|
|
from urllib3.util.retry import Retry
|
|
max_retries = int(os.environ.get("AXL_RATE_LIMIT_RETRIES", "3"))
|
|
if max_retries > 0:
|
|
retry = Retry(
|
|
total=max_retries,
|
|
backoff_factor=1.0, # 1s, 2s, 4s between retries
|
|
status_forcelist=(502, 503, 504),
|
|
allowed_methods=frozenset(["POST", "GET"]),
|
|
raise_on_status=False, # let zeep see the final response
|
|
respect_retry_after_header=True,
|
|
)
|
|
adapter = HTTPAdapter(max_retries=retry)
|
|
session.mount("https://", adapter)
|
|
session.mount("http://", adapter)
|
|
self._retry_config = {
|
|
"max_retries": max_retries,
|
|
"backoff_factor": 1.0,
|
|
"status_forcelist": [502, 503, 504],
|
|
}
|
|
|
|
# zeep's own WSDL cache (separate from our response cache) keeps
|
|
# repeat startups fast — it parses the WSDL once and reuses
|
|
from platformdirs import user_cache_dir
|
|
zeep_cache_path = Path(user_cache_dir("mcp-cucm-axl")) / "zeep_wsdl.db"
|
|
zeep_cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
transport = Transport(
|
|
session=session,
|
|
cache=SqliteCache(path=str(zeep_cache_path), timeout=86400),
|
|
timeout=30,
|
|
)
|
|
|
|
try:
|
|
self._client = Client(
|
|
wsdl=str(wsdl_path),
|
|
settings=Settings(strict=False, xml_huge_tree=True),
|
|
transport=transport,
|
|
)
|
|
# AXL endpoint is the AXL_URL itself; override the WSDL's default
|
|
# service location which usually points at a placeholder host.
|
|
self._service = self._client.create_service(
|
|
"{http://www.cisco.com/AXLAPIService/}AXLAPIBinding",
|
|
url,
|
|
)
|
|
import time as _time
|
|
self._connected_at = _time.monotonic()
|
|
self._last_error = None # operational state is now clean
|
|
print(
|
|
f"[mcp-cucm-axl] connected to {url} (TLS verify={verify_tls})",
|
|
file=sys.stderr,
|
|
flush=True,
|
|
)
|
|
except Exception as e:
|
|
# Operational error (network, TLS, WSDL fetch failure). Don't
|
|
# pin — the next call should be allowed to retry. Just record
|
|
# the last error for diagnostics.
|
|
self._last_error = f"AXL connection failed: {e}"
|
|
print(
|
|
f"[mcp-cucm-axl] {self._last_error} (operational, will retry on next call)",
|
|
file=sys.stderr,
|
|
flush=True,
|
|
)
|
|
raise RuntimeError(self._last_error) from e
|
|
|
|
# ---- read-only operations ----
|
|
|
|
def get_ccm_version(self) -> dict:
|
|
cached = self._response_cache.get("getCCMVersion", {})
|
|
if cached is not None:
|
|
return cached
|
|
self._ensure_connected()
|
|
resp = self._service.getCCMVersion()
|
|
# zeep CompoundValue → dict; the actual payload is under "return"
|
|
full = _zeep_to_dict(resp)
|
|
result = full.get("return", full) if isinstance(full, dict) else full
|
|
self._response_cache.set("getCCMVersion", {}, result, ttl=3600)
|
|
return result
|
|
|
|
def execute_sql_query(self, query: str) -> dict:
|
|
cleaned = validate_select(query)
|
|
cached = self._response_cache.get("executeSQLQuery", {"sql": cleaned})
|
|
if cached is not None:
|
|
return {**cached, "_cache": "hit"}
|
|
self._ensure_connected()
|
|
resp = self._service.executeSQLQuery(sql=cleaned)
|
|
rows = _parse_sql_rows(resp)
|
|
result = {"row_count": len(rows), "rows": rows, "query": cleaned}
|
|
self._response_cache.set("executeSQLQuery", {"sql": cleaned}, result)
|
|
return {**result, "_cache": "miss"}
|
|
|
|
def list_informix_tables(self, pattern: str | None = None) -> dict:
|
|
# systables is the Informix system catalog. tabid > 99 filters out
|
|
# internal/system tables and leaves CUCM's data dictionary tables.
|
|
if pattern:
|
|
safe_pattern = pattern.replace("'", "''")
|
|
sql = (
|
|
"SELECT tabname FROM systables "
|
|
f"WHERE tabid > 99 AND tabname LIKE '{safe_pattern}' "
|
|
"ORDER BY tabname"
|
|
)
|
|
else:
|
|
sql = "SELECT tabname FROM systables WHERE tabid > 99 ORDER BY tabname"
|
|
result = self.execute_sql_query(sql)
|
|
names = [row.get("tabname") for row in result.get("rows", []) if row.get("tabname")]
|
|
return {"table_count": len(names), "tables": names, "pattern": pattern}
|
|
|
|
def describe_informix_table(self, table_name: str) -> dict:
|
|
# Join syscolumns to systables to get column metadata for one table.
|
|
# coltype encoding: low byte = type code, high bit = NOT NULL flag.
|
|
safe = table_name.replace("'", "''")
|
|
sql = (
|
|
"SELECT c.colname, c.coltype, c.collength "
|
|
"FROM syscolumns c, systables t "
|
|
f"WHERE t.tabname = '{safe}' AND c.tabid = t.tabid "
|
|
"ORDER BY c.colno"
|
|
)
|
|
result = self.execute_sql_query(sql)
|
|
columns = []
|
|
for row in result.get("rows", []):
|
|
coltype_raw = int(row.get("coltype", 0))
|
|
type_code = coltype_raw & 0xFF
|
|
not_null = bool(coltype_raw & 0x100)
|
|
columns.append({
|
|
"name": row.get("colname"),
|
|
"informix_type_code": type_code,
|
|
"type": _INFORMIX_TYPE_NAMES.get(type_code, f"type_{type_code}"),
|
|
"length": int(row.get("collength", 0)),
|
|
"not_null": not_null,
|
|
})
|
|
if not columns:
|
|
return {"table": table_name, "error": "Table not found or has no columns."}
|
|
return {"table": table_name, "column_count": len(columns), "columns": columns}
|
|
|
|
|
|
# Informix type codes — partial list, enough for CUCM's data dictionary.
|
|
# Full list: https://www.ibm.com/docs/en/informix-servers/14.10?topic=tables-syscolumns
|
|
_INFORMIX_TYPE_NAMES = {
|
|
0: "CHAR",
|
|
1: "SMALLINT",
|
|
2: "INTEGER",
|
|
3: "FLOAT",
|
|
4: "SMALLFLOAT",
|
|
5: "DECIMAL",
|
|
6: "SERIAL",
|
|
7: "DATE",
|
|
8: "MONEY",
|
|
10: "DATETIME",
|
|
11: "BYTE",
|
|
12: "TEXT",
|
|
13: "VARCHAR",
|
|
14: "INTERVAL",
|
|
15: "NCHAR",
|
|
16: "NVARCHAR",
|
|
17: "INT8",
|
|
18: "SERIAL8",
|
|
19: "SET",
|
|
20: "MULTISET",
|
|
21: "LIST",
|
|
22: "ROW",
|
|
23: "COLLECTION",
|
|
41: "LVARCHAR",
|
|
43: "LVARCHAR",
|
|
45: "BOOLEAN",
|
|
}
|
|
|
|
|
|
def _zeep_to_dict(obj: Any) -> Any:
|
|
"""Recursively convert zeep CompoundValue objects to plain dicts/lists."""
|
|
if obj is None:
|
|
return None
|
|
if hasattr(obj, "__values__"):
|
|
return {k: _zeep_to_dict(v) for k, v in obj.__values__.items()}
|
|
if isinstance(obj, list):
|
|
return [_zeep_to_dict(item) for item in obj]
|
|
if isinstance(obj, dict):
|
|
return {k: _zeep_to_dict(v) for k, v in obj.items()}
|
|
return obj
|
|
|
|
|
|
def _parse_sql_rows(resp: Any) -> list[dict]:
|
|
"""Pull the row list out of an executeSQLQuery response.
|
|
|
|
AXL's executeSQLQuery returns rows as raw lxml elements wrapped in
|
|
`<return><row><colname>val</colname>...</row></return>`. Zeep doesn't
|
|
schema-bind these because the columns vary per query — they come
|
|
through as a list of `lxml.etree._Element` row objects with column
|
|
children.
|
|
|
|
When the query matches zero rows, the response is `<return/>` (empty),
|
|
which arrives as a CompoundValue with .return = None. In that case we
|
|
must return [] — NOT fall back to parsing the response envelope itself,
|
|
which would yield a phantom row of `{"return": None, "sequence": None}`.
|
|
"""
|
|
if resp is None:
|
|
return []
|
|
|
|
# Find the row container at .return / ["return"] / __values__["return"]
|
|
container = None
|
|
for accessor in (
|
|
lambda: getattr(resp, "return", None) if hasattr(resp, "return") else None,
|
|
lambda: resp.__values__.get("return") if hasattr(resp, "__values__") else None,
|
|
lambda: resp.get("return") if isinstance(resp, dict) else None,
|
|
):
|
|
try:
|
|
v = accessor()
|
|
except Exception:
|
|
v = None
|
|
if v is not None:
|
|
container = v
|
|
break
|
|
|
|
# No `return` member, or it's None → zero rows. Critical: do NOT fall
|
|
# back to parsing `resp` itself, which would produce a phantom row.
|
|
if container is None:
|
|
return []
|
|
|
|
# If the container is itself the rows list, use it; else look for .row
|
|
if isinstance(container, list):
|
|
row_iter = container
|
|
elif hasattr(container, "row"):
|
|
row_iter = container.row or []
|
|
elif isinstance(container, dict) and "row" in container:
|
|
row_iter = container["row"] or []
|
|
else:
|
|
# Container present but no obvious row collection — try iterating it
|
|
row_iter = list(container) if hasattr(container, "__iter__") else [container]
|
|
|
|
if not isinstance(row_iter, list):
|
|
row_iter = [row_iter]
|
|
|
|
out = []
|
|
for r in row_iter:
|
|
# AXL's executeSQLQuery wraps each row as a list of lxml column
|
|
# elements: [<Element colname1>, <Element colname2>, ...].
|
|
if isinstance(r, list):
|
|
out.append({
|
|
child.tag: child.text
|
|
for child in r
|
|
if hasattr(child, "tag")
|
|
})
|
|
continue
|
|
# Single lxml element with children (some response shapes)
|
|
if hasattr(r, "tag") and not isinstance(r, str):
|
|
try:
|
|
out.append({child.tag: child.text for child in r})
|
|
continue
|
|
except TypeError:
|
|
pass
|
|
if hasattr(r, "__values__"):
|
|
out.append({k: _stringify(v) for k, v in r.__values__.items()})
|
|
elif isinstance(r, dict):
|
|
out.append({k: _stringify(v) for k, v in r.items()})
|
|
else:
|
|
out.append({"value": str(r)})
|
|
return out
|
|
|
|
|
|
def _stringify(v: Any) -> Any:
|
|
if v is None or isinstance(v, (str, int, float, bool)):
|
|
return v
|
|
return str(v)
|