Harden error handling and input validation

- Station ID validation (7-digit regex) prevents path traversal
- Stale cache fallback: serve expired data if refresh fails
- Resilient startup: server starts even if NOAA is down
- Contextual HTTP error messages with recovery hints
- Hours range validation (1-720)
- find_nearest limit/distance validation
- search() and find_nearest() now async with cache TTL checks
- Better error type info in conditions snapshot partial failures
This commit is contained in:
Ryan Malloy 2026-02-21 21:08:16 -07:00
parent 6c244b3a63
commit 20059b9476
5 changed files with 89 additions and 17 deletions

View File

@ -1,6 +1,8 @@
"""Async NOAA CO-OPS API client with station caching and proximity search.""" """Async NOAA CO-OPS API client with station caching and proximity search."""
import math import math
import re
import sys
import time import time
import httpx import httpx
@ -11,6 +13,18 @@ DATA_URL = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"
META_URL = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi" META_URL = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi"
CACHE_TTL = 86400 # 24 hours CACHE_TTL = 86400 # 24 hours
MAX_RANGE_HOURS = 720 # NOAA API cap ~30 days
_STATION_ID_RE = re.compile(r"^\d{7}$")
def _validate_station_id(station_id: str) -> str:
"""NOAA station IDs are 7-digit numbers (e.g. '8454000')."""
if not _STATION_ID_RE.match(station_id):
raise ValueError(
f"Invalid station ID '{station_id}': expected a 7-digit number (e.g. '8454000')"
)
return station_id
def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float: def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
@ -54,17 +68,39 @@ class NOAAClient:
async def get_stations(self) -> list[Station]: async def get_stations(self) -> list[Station]:
if time.monotonic() - self._cache_time > CACHE_TTL: if time.monotonic() - self._cache_time > CACHE_TTL:
await self._refresh_stations() try:
await self._refresh_stations()
except Exception:
# Serve stale data rather than failing the request.
# If cache was never populated, re-raise.
if not self._stations:
raise
print(
"Warning: station cache refresh failed, serving stale data",
file=sys.stderr,
)
return list(self._stations) return list(self._stations)
# -- Metadata API -- # -- Metadata API --
async def get_station_metadata(self, station_id: str) -> dict: async def get_station_metadata(self, station_id: str) -> dict:
resp = await self._http.get( _validate_station_id(station_id)
f"{META_URL}/stations/{station_id}.json", try:
params={"expand": "details,sensors,datums,products,disclaimers"}, resp = await self._http.get(
) f"{META_URL}/stations/{station_id}.json",
resp.raise_for_status() params={"expand": "details,sensors,datums,products,disclaimers"},
)
resp.raise_for_status()
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 404:
raise ValueError(
f"Station '{station_id}' not found. "
"Verify the ID using search_stations."
) from exc
raise RuntimeError(
f"NOAA metadata API error ({exc.response.status_code}). "
"The service may be temporarily unavailable."
) from exc
data = resp.json() data = resp.json()
# The metadata API wraps the station in a "stations" list # The metadata API wraps the station in a "stations" list
stations = data.get("stations", []) stations = data.get("stations", [])
@ -91,6 +127,11 @@ class NOAAClient:
Date format: yyyyMMdd or yyyyMMdd HH:mm Date format: yyyyMMdd or yyyyMMdd HH:mm
If no date range or hours specified, defaults to last 24 hours. If no date range or hours specified, defaults to last 24 hours.
""" """
_validate_station_id(station_id)
if hours and (hours < 0 or hours > MAX_RANGE_HOURS):
raise ValueError(f"hours must be between 1 and {MAX_RANGE_HOURS}, got {hours}")
params: dict[str, str] = { params: dict[str, str] = {
"station": station_id, "station": station_id,
"product": product, "product": product,
@ -113,8 +154,19 @@ class NOAAClient:
if not begin_date and not end_date and not hours: if not begin_date and not end_date and not hours:
params["range"] = "24" params["range"] = "24"
resp = await self._http.get(DATA_URL, params=params) try:
resp.raise_for_status() resp = await self._http.get(DATA_URL, params=params)
resp.raise_for_status()
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 404:
raise ValueError(
f"No data for station '{station_id}' product '{product}'. "
"Use get_station_info to check available products."
) from exc
raise RuntimeError(
f"NOAA data API error ({exc.response.status_code}). "
"The service may be temporarily unavailable."
) from exc
result = resp.json() result = resp.json()
if "error" in result: if "error" in result:
@ -124,13 +176,15 @@ class NOAAClient:
# -- In-memory search -- # -- In-memory search --
def search( async def search(
self, self,
query: str = "", query: str = "",
state: str = "", state: str = "",
is_tidal: bool | None = None, is_tidal: bool | None = None,
) -> list[Station]: ) -> list[Station]:
matches = self._stations """Filter cached stations. Triggers cache refresh if TTL expired."""
stations = await self.get_stations()
matches = stations
if query: if query:
q = query.lower() q = query.lower()
matches = [s for s in matches if q in s.name.lower() or q in s.id] matches = [s for s in matches if q in s.name.lower() or q in s.id]
@ -141,7 +195,7 @@ class NOAAClient:
matches = [s for s in matches if s.tidal == is_tidal] matches = [s for s in matches if s.tidal == is_tidal]
return matches return matches
def find_nearest( async def find_nearest(
self, self,
lat: float, lat: float,
lon: float, lon: float,
@ -149,8 +203,9 @@ class NOAAClient:
max_distance: float = 100, max_distance: float = 100,
) -> list[tuple[Station, float]]: ) -> list[tuple[Station, float]]:
"""Return stations within max_distance nautical miles, sorted by proximity.""" """Return stations within max_distance nautical miles, sorted by proximity."""
stations = await self.get_stations()
results: list[tuple[Station, float]] = [] results: list[tuple[Station, float]] = []
for station in self._stations: for station in stations:
dist = haversine(lat, lon, station.lat, station.lng) dist = haversine(lat, lon, station.lat, station.lng)
if dist <= max_distance: if dist <= max_distance:
results.append((station, dist)) results.append((station, dist))

View File

@ -34,7 +34,7 @@ def register(mcp: FastMCP) -> None:
if not target: if not target:
return json.dumps({"error": f"Station {station_id} not found"}) return json.dumps({"error": f"Station {station_id} not found"})
nearby = noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50) nearby = await noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50)
# Exclude the station itself # Exclude the station itself
nearby = [(s, d) for s, d in nearby if s.id != station_id] nearby = [(s, d) for s, d in nearby if s.id != station_id]
return json.dumps( return json.dumps(

View File

@ -14,7 +14,19 @@ from noaa_tides.tools import conditions, meteorological, stations, tides
async def lifespan(server: FastMCP): async def lifespan(server: FastMCP):
"""Manage the NOAAClient lifecycle — create, pre-warm station cache, close.""" """Manage the NOAAClient lifecycle — create, pre-warm station cache, close."""
client = NOAAClient() client = NOAAClient()
await client.initialize() try:
await client.initialize()
except Exception as exc:
# Start with empty cache — will populate on first station request.
# HTTP client still needs to exist for data fetches.
import httpx
client._http = httpx.AsyncClient(timeout=30)
print(
f"Warning: station cache pre-warm failed ({exc}). "
"Will retry on first request.",
file=sys.stderr,
)
try: try:
yield {"noaa_client": client} yield {"noaa_client": client}
finally: finally:

View File

@ -52,7 +52,8 @@ def register(mcp: FastMCP) -> None:
data = await noaa.get_data(station_id, **params) data = await noaa.get_data(station_id, **params)
return name, data return name, data
except Exception as exc: except Exception as exc:
return name, str(exc) msg = str(exc) or type(exc).__name__
return name, f"{type(exc).__name__}: {msg}"
results = await asyncio.gather( results = await asyncio.gather(
*[fetch(name, params) for name, params in requests.items()] *[fetch(name, params) for name, params in requests.items()]

View File

@ -23,7 +23,7 @@ def register(mcp: FastMCP) -> None:
Returns up to 50 matching stations with id, name, state, coordinates, and tidal status. Returns up to 50 matching stations with id, name, state, coordinates, and tidal status.
""" """
noaa: NOAAClient = ctx.lifespan_context["noaa_client"] noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
results = noaa.search(query=query, state=state, is_tidal=is_tidal) results = await noaa.search(query=query, state=state, is_tidal=is_tidal)
return [s.model_dump() for s in results[:50]] return [s.model_dump() for s in results[:50]]
@mcp.tool(tags={"discovery"}) @mcp.tool(tags={"discovery"})
@ -43,7 +43,11 @@ def register(mcp: FastMCP) -> None:
for stations near Narragansett Bay. for stations near Narragansett Bay.
""" """
noaa: NOAAClient = ctx.lifespan_context["noaa_client"] noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
results = noaa.find_nearest( if limit < 1:
raise ValueError("limit must be at least 1")
if max_distance_nm <= 0:
raise ValueError("max_distance_nm must be positive")
results = await noaa.find_nearest(
latitude, longitude, limit=limit, max_distance=max_distance_nm latitude, longitude, limit=limit, max_distance=max_distance_nm
) )
return [ return [