Two ideas borrowed from cisco-cucm-mcp (calltelemetry/cisco-cucm-mcp,
MIT licensed): real-time device registration via RisPort70, and
exponential-backoff retry on transient HTTP 5xx errors. Both are
purpose-built for the audit use case rather than general-purpose
ports — RisPort tools exist to inform audit findings, not as a
standalone "look at my devices" interface.
Rate limit / 503 backoff (~30 lines + 3 tests):
AxlClient now mounts an HTTPAdapter with a urllib3 Retry policy
(3 retries, exponential backoff, status_forcelist=[502,503,504]).
Configurable via AXL_RATE_LIMIT_RETRIES (default 3, 0 disables).
Surfaces in connection_status() so operators can see the policy.
Closes a real reliability gap: CUCM SOAP rate-limits under load
during change windows or with multiple concurrent admins; pre-fix
any 503 was a hard failure.
RisPort70 (new src/risport.py + 2 tools + prompt update):
Hand-coded SOAP client for /realtimeservice2/services/RISService70
(avoids dragging in another zeep instance for one operation).
Reuses AXL_URL/USER/PASS env vars — RisPort lives on the same host.
New tools:
device_registration_status(device_class, status, name_filter, page_size)
device_registration_summary() — cluster-wide breakdown by class
Live-cluster verification (cucm-pub.binghammemorial.org):
Phone: 803 registered=679 unregistered=123 rejected=1
Gateway: 85 registered=41 rejected=44 ← real audit finding
SIPTrunk: 22 registered=18 unregistered=4
HuntList: 28 registered=28
H323/CTI: 0 (cluster doesn't use these)
Discovered while live-verifying: CUCM 15 wraps the RisPort response
in an extra <SelectCmDeviceResult> element inside <selectCmDeviceReturn>.
Older CUCM versions exposed the fields directly. The parser falls
back to either shape; tests cover both (test_legacy_response_shape_still_parses
asserts the older shape still works).
phone_inventory_report prompt updated:
New Step 3 — "Cross-reference with real-time registration" — recommends
device_registration_summary() + device_registration_status(status="UnRegistered")
to surface configured-but-never-registered phones (strongest orphan signal),
PartiallyRegistered phones (firewall/cert/version mismatch indicator),
and registration-state vs config-state mismatches.
Tooling delta worth noting:
AXL device count: 1,377 phones
RisPort device count: 803 phones
Delta (~574) likely templates, hidden phones, or stale config —
itself an audit finding the new tool will surface
to anyone running phone_inventory_report.
README updated:
- Added health(), device_registration_status, device_registration_summary
- Added "Scope and complement" section recommending @calltelemetry/cisco-cucm-mcp
alongside for operational debugging (logs, perfmon, packet capture,
service control). The two servers answer different questions; the LLM
with both can compose audit findings with operational state.
- Listed all 10 prompts (was 4 outdated entries).
Tests: 134 → 155 (+21).
156 lines
6.2 KiB
Python
156 lines
6.2 KiB
Python
"""Hamilton review MAJOR #5: connection recovery and config-vs-operational errors.
|
|
|
|
Pre-fix: any connection failure set `_connection_error` and pinned it forever.
|
|
A transient network blip required restarting the MCP server. Fix: distinguish
|
|
*configuration* errors (missing env, bad WSDL) which are pinned, from
|
|
*operational* errors (network, TLS, session timeout) which can be retried
|
|
on the next call.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from mcp_cucm_axl.cache import AxlCache
|
|
from mcp_cucm_axl.client import AxlClient
|
|
|
|
|
|
@pytest.fixture
|
|
def cache(tmp_path: Path) -> AxlCache:
|
|
return AxlCache(tmp_path / "test.sqlite", default_ttl=60, cluster_id="test")
|
|
|
|
|
|
def test_config_error_is_pinned(cache: AxlCache, monkeypatch):
|
|
"""Missing AXL_URL is a config error — it doesn't get better on retry,
|
|
and the next call should still raise the same clear message."""
|
|
monkeypatch.delenv("AXL_URL", raising=False)
|
|
monkeypatch.delenv("AXL_USER", raising=False)
|
|
monkeypatch.delenv("AXL_PASS", raising=False)
|
|
client = AxlClient(cache)
|
|
|
|
with pytest.raises(RuntimeError, match="AXL_URL"):
|
|
client._ensure_connected()
|
|
# Second call: same config error, pinned
|
|
with pytest.raises(RuntimeError, match="AXL_URL"):
|
|
client._ensure_connected()
|
|
|
|
|
|
def test_operational_error_is_not_pinned(cache: AxlCache, monkeypatch):
|
|
"""A transient operational error (zeep Client construction failing,
|
|
network blip, etc.) should NOT pin the client forever. The next call
|
|
must be allowed to retry."""
|
|
monkeypatch.setenv("AXL_URL", "https://test.invalid:8443/axl")
|
|
monkeypatch.setenv("AXL_USER", "test")
|
|
monkeypatch.setenv("AXL_PASS", "test")
|
|
monkeypatch.setenv("AXL_VERIFY_TLS", "false")
|
|
|
|
# Force the zeep Client constructor inside _ensure_connected to raise.
|
|
# This simulates "WSDL fetch failed", "TLS handshake error", etc. —
|
|
# transient operational failures.
|
|
from mcp_cucm_axl import client as client_mod
|
|
|
|
def boom(*args, **kwargs):
|
|
raise ConnectionError("simulated transient network failure")
|
|
|
|
monkeypatch.setattr(client_mod, "Client", boom)
|
|
|
|
client = AxlClient(cache)
|
|
with pytest.raises(RuntimeError, match="simulated transient"):
|
|
client._ensure_connected()
|
|
|
|
# Hamilton review MAJOR #5: operational errors must NOT set _config_error.
|
|
# _config_error is the permanent pin; only set on missing env vars / config
|
|
# mistakes. A failed network connection is operational and the next call
|
|
# must be allowed to retry.
|
|
assert client._config_error is None, (
|
|
"operational errors must not set _config_error (the pin); "
|
|
"only configuration errors (missing env vars, bad WSDL) should pin"
|
|
)
|
|
# _last_error is set for diagnostics, but it does not block retries.
|
|
assert client._last_error is not None, (
|
|
"_last_error should record the operational failure for diagnostics"
|
|
)
|
|
assert "simulated transient" in client._last_error
|
|
|
|
|
|
def test_health_diagnostic_includes_connection_state(cache: AxlCache):
|
|
"""The client should expose its connection age / last-attempt info
|
|
so an operator can see what's going on without reading sys.stderr."""
|
|
client = AxlClient(cache)
|
|
info = client.connection_status()
|
|
assert "connected" in info
|
|
assert info["connected"] is False # never tried yet
|
|
assert "last_error" in info
|
|
|
|
|
|
# ---- Rate limit / 503 retry --------------------------------------------------
|
|
# Inspired by cisco-cucm-mcp's exponential-backoff approach. CUCM's SOAP
|
|
# layer returns 503 under load (concurrent AXL admins, change window). Without
|
|
# retries, we'd fail loudly; with them, transient rate limiting becomes
|
|
# invisible to the caller.
|
|
|
|
def test_retry_config_default_three_retries(cache: AxlCache, monkeypatch):
|
|
"""By default, the session is configured for 3 retries with backoff."""
|
|
monkeypatch.setenv("AXL_URL", "https://example.invalid:8443/axl")
|
|
monkeypatch.setenv("AXL_USER", "test")
|
|
monkeypatch.setenv("AXL_PASS", "test")
|
|
monkeypatch.setenv("AXL_VERIFY_TLS", "false")
|
|
# Stub Client construction so we exercise only the session/retry setup
|
|
from mcp_cucm_axl import client as client_mod
|
|
|
|
constructed = {}
|
|
|
|
def stub_client(*args, **kwargs):
|
|
constructed["transport"] = kwargs.get("transport")
|
|
# Raise to short-circuit before service creation
|
|
raise ConnectionError("stub: don't actually connect")
|
|
|
|
monkeypatch.setattr(client_mod, "Client", stub_client)
|
|
|
|
client = AxlClient(cache)
|
|
with pytest.raises(RuntimeError):
|
|
client._ensure_connected()
|
|
|
|
info = client.connection_status()
|
|
assert info["retry_config"] is not None
|
|
assert info["retry_config"]["max_retries"] == 3
|
|
assert 503 in info["retry_config"]["status_forcelist"]
|
|
assert 502 in info["retry_config"]["status_forcelist"]
|
|
assert 504 in info["retry_config"]["status_forcelist"]
|
|
|
|
|
|
def test_retry_config_overridable_via_env(cache: AxlCache, monkeypatch):
|
|
"""Operators can tune the retry count via AXL_RATE_LIMIT_RETRIES."""
|
|
monkeypatch.setenv("AXL_URL", "https://example.invalid:8443/axl")
|
|
monkeypatch.setenv("AXL_USER", "test")
|
|
monkeypatch.setenv("AXL_PASS", "test")
|
|
monkeypatch.setenv("AXL_RATE_LIMIT_RETRIES", "7")
|
|
|
|
from mcp_cucm_axl import client as client_mod
|
|
monkeypatch.setattr(client_mod, "Client", lambda *a, **kw: (_ for _ in ()).throw(ConnectionError("stub")))
|
|
|
|
client = AxlClient(cache)
|
|
with pytest.raises(RuntimeError):
|
|
client._ensure_connected()
|
|
|
|
assert client.connection_status()["retry_config"]["max_retries"] == 7
|
|
|
|
|
|
def test_retry_config_zero_disables(cache: AxlCache, monkeypatch):
|
|
"""AXL_RATE_LIMIT_RETRIES=0 disables the retry adapter entirely.
|
|
Useful for test environments or when an operator wants raw failures."""
|
|
monkeypatch.setenv("AXL_URL", "https://example.invalid:8443/axl")
|
|
monkeypatch.setenv("AXL_USER", "test")
|
|
monkeypatch.setenv("AXL_PASS", "test")
|
|
monkeypatch.setenv("AXL_RATE_LIMIT_RETRIES", "0")
|
|
|
|
from mcp_cucm_axl import client as client_mod
|
|
monkeypatch.setattr(client_mod, "Client", lambda *a, **kw: (_ for _ in ()).throw(ConnectionError("stub")))
|
|
|
|
client = AxlClient(cache)
|
|
with pytest.raises(RuntimeError):
|
|
client._ensure_connected()
|
|
|
|
cfg = client.connection_status()["retry_config"]
|
|
assert cfg["max_retries"] == 0
|