From 20059b9476c13ee3a950cb035d1d0fc7b5404595 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Sat, 21 Feb 2026 21:08:16 -0700 Subject: [PATCH] Harden error handling and input validation - Station ID validation (7-digit regex) prevents path traversal - Stale cache fallback: serve expired data if refresh fails - Resilient startup: server starts even if NOAA is down - Contextual HTTP error messages with recovery hints - Hours range validation (1-720) - find_nearest limit/distance validation - search() and find_nearest() now async with cache TTL checks - Better error type info in conditions snapshot partial failures --- src/noaa_tides/client.py | 79 +++++++++++++++++++++++++----- src/noaa_tides/resources.py | 2 +- src/noaa_tides/server.py | 14 +++++- src/noaa_tides/tools/conditions.py | 3 +- src/noaa_tides/tools/stations.py | 8 ++- 5 files changed, 89 insertions(+), 17 deletions(-) diff --git a/src/noaa_tides/client.py b/src/noaa_tides/client.py index 1030174..2ddaec7 100644 --- a/src/noaa_tides/client.py +++ b/src/noaa_tides/client.py @@ -1,6 +1,8 @@ """Async NOAA CO-OPS API client with station caching and proximity search.""" import math +import re +import sys import time import httpx @@ -11,6 +13,18 @@ DATA_URL = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter" META_URL = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi" CACHE_TTL = 86400 # 24 hours +MAX_RANGE_HOURS = 720 # NOAA API cap ~30 days + +_STATION_ID_RE = re.compile(r"^\d{7}$") + + +def _validate_station_id(station_id: str) -> str: + """NOAA station IDs are 7-digit numbers (e.g. '8454000').""" + if not _STATION_ID_RE.match(station_id): + raise ValueError( + f"Invalid station ID '{station_id}': expected a 7-digit number (e.g. '8454000')" + ) + return station_id def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float: @@ -54,17 +68,39 @@ class NOAAClient: async def get_stations(self) -> list[Station]: if time.monotonic() - self._cache_time > CACHE_TTL: - await self._refresh_stations() + try: + await self._refresh_stations() + except Exception: + # Serve stale data rather than failing the request. + # If cache was never populated, re-raise. + if not self._stations: + raise + print( + "Warning: station cache refresh failed, serving stale data", + file=sys.stderr, + ) return list(self._stations) # -- Metadata API -- async def get_station_metadata(self, station_id: str) -> dict: - resp = await self._http.get( - f"{META_URL}/stations/{station_id}.json", - params={"expand": "details,sensors,datums,products,disclaimers"}, - ) - resp.raise_for_status() + _validate_station_id(station_id) + try: + resp = await self._http.get( + f"{META_URL}/stations/{station_id}.json", + params={"expand": "details,sensors,datums,products,disclaimers"}, + ) + resp.raise_for_status() + except httpx.HTTPStatusError as exc: + if exc.response.status_code == 404: + raise ValueError( + f"Station '{station_id}' not found. " + "Verify the ID using search_stations." + ) from exc + raise RuntimeError( + f"NOAA metadata API error ({exc.response.status_code}). " + "The service may be temporarily unavailable." + ) from exc data = resp.json() # The metadata API wraps the station in a "stations" list stations = data.get("stations", []) @@ -91,6 +127,11 @@ class NOAAClient: Date format: yyyyMMdd or yyyyMMdd HH:mm If no date range or hours specified, defaults to last 24 hours. """ + _validate_station_id(station_id) + + if hours and (hours < 0 or hours > MAX_RANGE_HOURS): + raise ValueError(f"hours must be between 1 and {MAX_RANGE_HOURS}, got {hours}") + params: dict[str, str] = { "station": station_id, "product": product, @@ -113,8 +154,19 @@ class NOAAClient: if not begin_date and not end_date and not hours: params["range"] = "24" - resp = await self._http.get(DATA_URL, params=params) - resp.raise_for_status() + try: + resp = await self._http.get(DATA_URL, params=params) + resp.raise_for_status() + except httpx.HTTPStatusError as exc: + if exc.response.status_code == 404: + raise ValueError( + f"No data for station '{station_id}' product '{product}'. " + "Use get_station_info to check available products." + ) from exc + raise RuntimeError( + f"NOAA data API error ({exc.response.status_code}). " + "The service may be temporarily unavailable." + ) from exc result = resp.json() if "error" in result: @@ -124,13 +176,15 @@ class NOAAClient: # -- In-memory search -- - def search( + async def search( self, query: str = "", state: str = "", is_tidal: bool | None = None, ) -> list[Station]: - matches = self._stations + """Filter cached stations. Triggers cache refresh if TTL expired.""" + stations = await self.get_stations() + matches = stations if query: q = query.lower() matches = [s for s in matches if q in s.name.lower() or q in s.id] @@ -141,7 +195,7 @@ class NOAAClient: matches = [s for s in matches if s.tidal == is_tidal] return matches - def find_nearest( + async def find_nearest( self, lat: float, lon: float, @@ -149,8 +203,9 @@ class NOAAClient: max_distance: float = 100, ) -> list[tuple[Station, float]]: """Return stations within max_distance nautical miles, sorted by proximity.""" + stations = await self.get_stations() results: list[tuple[Station, float]] = [] - for station in self._stations: + for station in stations: dist = haversine(lat, lon, station.lat, station.lng) if dist <= max_distance: results.append((station, dist)) diff --git a/src/noaa_tides/resources.py b/src/noaa_tides/resources.py index 2922fe0..cdeaa4c 100644 --- a/src/noaa_tides/resources.py +++ b/src/noaa_tides/resources.py @@ -34,7 +34,7 @@ def register(mcp: FastMCP) -> None: if not target: return json.dumps({"error": f"Station {station_id} not found"}) - nearby = noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50) + nearby = await noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50) # Exclude the station itself nearby = [(s, d) for s, d in nearby if s.id != station_id] return json.dumps( diff --git a/src/noaa_tides/server.py b/src/noaa_tides/server.py index e8c43e2..b7683c2 100644 --- a/src/noaa_tides/server.py +++ b/src/noaa_tides/server.py @@ -14,7 +14,19 @@ from noaa_tides.tools import conditions, meteorological, stations, tides async def lifespan(server: FastMCP): """Manage the NOAAClient lifecycle — create, pre-warm station cache, close.""" client = NOAAClient() - await client.initialize() + try: + await client.initialize() + except Exception as exc: + # Start with empty cache — will populate on first station request. + # HTTP client still needs to exist for data fetches. + import httpx + + client._http = httpx.AsyncClient(timeout=30) + print( + f"Warning: station cache pre-warm failed ({exc}). " + "Will retry on first request.", + file=sys.stderr, + ) try: yield {"noaa_client": client} finally: diff --git a/src/noaa_tides/tools/conditions.py b/src/noaa_tides/tools/conditions.py index 1b46b9d..cef12a4 100644 --- a/src/noaa_tides/tools/conditions.py +++ b/src/noaa_tides/tools/conditions.py @@ -52,7 +52,8 @@ def register(mcp: FastMCP) -> None: data = await noaa.get_data(station_id, **params) return name, data except Exception as exc: - return name, str(exc) + msg = str(exc) or type(exc).__name__ + return name, f"{type(exc).__name__}: {msg}" results = await asyncio.gather( *[fetch(name, params) for name, params in requests.items()] diff --git a/src/noaa_tides/tools/stations.py b/src/noaa_tides/tools/stations.py index 5ce2411..57f220c 100644 --- a/src/noaa_tides/tools/stations.py +++ b/src/noaa_tides/tools/stations.py @@ -23,7 +23,7 @@ def register(mcp: FastMCP) -> None: Returns up to 50 matching stations with id, name, state, coordinates, and tidal status. """ noaa: NOAAClient = ctx.lifespan_context["noaa_client"] - results = noaa.search(query=query, state=state, is_tidal=is_tidal) + results = await noaa.search(query=query, state=state, is_tidal=is_tidal) return [s.model_dump() for s in results[:50]] @mcp.tool(tags={"discovery"}) @@ -43,7 +43,11 @@ def register(mcp: FastMCP) -> None: for stations near Narragansett Bay. """ noaa: NOAAClient = ctx.lifespan_context["noaa_client"] - results = noaa.find_nearest( + if limit < 1: + raise ValueError("limit must be at least 1") + if max_distance_nm <= 0: + raise ValueError("max_distance_nm must be positive") + results = await noaa.find_nearest( latitude, longitude, limit=limit, max_distance=max_distance_nm ) return [