"""Hamilton review MAJOR #5: connection recovery and config-vs-operational errors. Pre-fix: any connection failure set `_connection_error` and pinned it forever. A transient network blip required restarting the MCP server. Fix: distinguish *configuration* errors (missing env, bad WSDL) which are pinned, from *operational* errors (network, TLS, session timeout) which can be retried on the next call. """ from pathlib import Path import pytest from mcaxl.cache import AxlCache from mcaxl.client import AxlClient @pytest.fixture def cache(tmp_path: Path) -> AxlCache: return AxlCache(tmp_path / "test.sqlite", default_ttl=60, cluster_id="test") def test_config_error_is_pinned(cache: AxlCache, monkeypatch): """Missing AXL_URL is a config error — it doesn't get better on retry, and the next call should still raise the same clear message.""" monkeypatch.delenv("AXL_URL", raising=False) monkeypatch.delenv("AXL_USER", raising=False) monkeypatch.delenv("AXL_PASS", raising=False) client = AxlClient(cache) with pytest.raises(RuntimeError, match="AXL_URL"): client._ensure_connected() # Second call: same config error, pinned with pytest.raises(RuntimeError, match="AXL_URL"): client._ensure_connected() def test_operational_error_is_not_pinned(cache: AxlCache, monkeypatch): """A transient operational error (zeep Client construction failing, network blip, etc.) should NOT pin the client forever. The next call must be allowed to retry.""" monkeypatch.setenv("AXL_URL", "https://test.invalid:8443/axl") monkeypatch.setenv("AXL_USER", "test") monkeypatch.setenv("AXL_PASS", "test") monkeypatch.setenv("AXL_VERIFY_TLS", "false") # Force the zeep Client constructor inside _ensure_connected to raise. # This simulates "WSDL fetch failed", "TLS handshake error", etc. — # transient operational failures. from mcaxl import client as client_mod def boom(*args, **kwargs): raise ConnectionError("simulated transient network failure") monkeypatch.setattr(client_mod, "Client", boom) client = AxlClient(cache) with pytest.raises(RuntimeError, match="simulated transient"): client._ensure_connected() # Hamilton review MAJOR #5: operational errors must NOT set _config_error. # _config_error is the permanent pin; only set on missing env vars / config # mistakes. A failed network connection is operational and the next call # must be allowed to retry. assert client._config_error is None, ( "operational errors must not set _config_error (the pin); " "only configuration errors (missing env vars, bad WSDL) should pin" ) # _last_error is set for diagnostics, but it does not block retries. assert client._last_error is not None, ( "_last_error should record the operational failure for diagnostics" ) assert "simulated transient" in client._last_error def test_health_diagnostic_includes_connection_state(cache: AxlCache): """The client should expose its connection age / last-attempt info so an operator can see what's going on without reading sys.stderr.""" client = AxlClient(cache) info = client.connection_status() assert "connected" in info assert info["connected"] is False # never tried yet assert "last_error" in info # ---- Rate limit / 503 retry -------------------------------------------------- # Inspired by cisco-cucm-mcp's exponential-backoff approach. CUCM's SOAP # layer returns 503 under load (concurrent AXL admins, change window). Without # retries, we'd fail loudly; with them, transient rate limiting becomes # invisible to the caller. def test_retry_config_default_three_retries(cache: AxlCache, monkeypatch): """By default, the session is configured for 3 retries with backoff.""" monkeypatch.setenv("AXL_URL", "https://example.invalid:8443/axl") monkeypatch.setenv("AXL_USER", "test") monkeypatch.setenv("AXL_PASS", "test") monkeypatch.setenv("AXL_VERIFY_TLS", "false") # Stub Client construction so we exercise only the session/retry setup from mcaxl import client as client_mod constructed = {} def stub_client(*args, **kwargs): constructed["transport"] = kwargs.get("transport") # Raise to short-circuit before service creation raise ConnectionError("stub: don't actually connect") monkeypatch.setattr(client_mod, "Client", stub_client) client = AxlClient(cache) with pytest.raises(RuntimeError): client._ensure_connected() info = client.connection_status() assert info["retry_config"] is not None assert info["retry_config"]["max_retries"] == 3 assert 503 in info["retry_config"]["status_forcelist"] assert 502 in info["retry_config"]["status_forcelist"] assert 504 in info["retry_config"]["status_forcelist"] def test_retry_config_overridable_via_env(cache: AxlCache, monkeypatch): """Operators can tune the retry count via AXL_RATE_LIMIT_RETRIES.""" monkeypatch.setenv("AXL_URL", "https://example.invalid:8443/axl") monkeypatch.setenv("AXL_USER", "test") monkeypatch.setenv("AXL_PASS", "test") monkeypatch.setenv("AXL_RATE_LIMIT_RETRIES", "7") from mcaxl import client as client_mod monkeypatch.setattr(client_mod, "Client", lambda *a, **kw: (_ for _ in ()).throw(ConnectionError("stub"))) client = AxlClient(cache) with pytest.raises(RuntimeError): client._ensure_connected() assert client.connection_status()["retry_config"]["max_retries"] == 7 def test_retry_config_zero_disables(cache: AxlCache, monkeypatch): """AXL_RATE_LIMIT_RETRIES=0 disables the retry adapter entirely. Useful for test environments or when an operator wants raw failures.""" monkeypatch.setenv("AXL_URL", "https://example.invalid:8443/axl") monkeypatch.setenv("AXL_USER", "test") monkeypatch.setenv("AXL_PASS", "test") monkeypatch.setenv("AXL_RATE_LIMIT_RETRIES", "0") from mcaxl import client as client_mod monkeypatch.setattr(client_mod, "Client", lambda *a, **kw: (_ for _ in ()).throw(ConnectionError("stub"))) client = AxlClient(cache) with pytest.raises(RuntimeError): client._ensure_connected() cfg = client.connection_status()["retry_config"] assert cfg["max_retries"] == 0