Harden error handling and input validation

- Station ID validation (7-digit regex) prevents path traversal
- Stale cache fallback: serve expired data if refresh fails
- Resilient startup: server starts even if NOAA is down
- Contextual HTTP error messages with recovery hints
- Hours range validation (1-720)
- find_nearest limit/distance validation
- search() and find_nearest() now async with cache TTL checks
- Better error type info in conditions snapshot partial failures
This commit is contained in:
Ryan Malloy 2026-02-21 21:08:16 -07:00
parent 6c244b3a63
commit 20059b9476
5 changed files with 89 additions and 17 deletions

View File

@ -1,6 +1,8 @@
"""Async NOAA CO-OPS API client with station caching and proximity search."""
import math
import re
import sys
import time
import httpx
@ -11,6 +13,18 @@ DATA_URL = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"
META_URL = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi"
CACHE_TTL = 86400 # 24 hours
MAX_RANGE_HOURS = 720 # NOAA API cap ~30 days
_STATION_ID_RE = re.compile(r"^\d{7}$")
def _validate_station_id(station_id: str) -> str:
"""NOAA station IDs are 7-digit numbers (e.g. '8454000')."""
if not _STATION_ID_RE.match(station_id):
raise ValueError(
f"Invalid station ID '{station_id}': expected a 7-digit number (e.g. '8454000')"
)
return station_id
def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
@ -54,17 +68,39 @@ class NOAAClient:
async def get_stations(self) -> list[Station]:
if time.monotonic() - self._cache_time > CACHE_TTL:
try:
await self._refresh_stations()
except Exception:
# Serve stale data rather than failing the request.
# If cache was never populated, re-raise.
if not self._stations:
raise
print(
"Warning: station cache refresh failed, serving stale data",
file=sys.stderr,
)
return list(self._stations)
# -- Metadata API --
async def get_station_metadata(self, station_id: str) -> dict:
_validate_station_id(station_id)
try:
resp = await self._http.get(
f"{META_URL}/stations/{station_id}.json",
params={"expand": "details,sensors,datums,products,disclaimers"},
)
resp.raise_for_status()
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 404:
raise ValueError(
f"Station '{station_id}' not found. "
"Verify the ID using search_stations."
) from exc
raise RuntimeError(
f"NOAA metadata API error ({exc.response.status_code}). "
"The service may be temporarily unavailable."
) from exc
data = resp.json()
# The metadata API wraps the station in a "stations" list
stations = data.get("stations", [])
@ -91,6 +127,11 @@ class NOAAClient:
Date format: yyyyMMdd or yyyyMMdd HH:mm
If no date range or hours specified, defaults to last 24 hours.
"""
_validate_station_id(station_id)
if hours and (hours < 0 or hours > MAX_RANGE_HOURS):
raise ValueError(f"hours must be between 1 and {MAX_RANGE_HOURS}, got {hours}")
params: dict[str, str] = {
"station": station_id,
"product": product,
@ -113,8 +154,19 @@ class NOAAClient:
if not begin_date and not end_date and not hours:
params["range"] = "24"
try:
resp = await self._http.get(DATA_URL, params=params)
resp.raise_for_status()
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 404:
raise ValueError(
f"No data for station '{station_id}' product '{product}'. "
"Use get_station_info to check available products."
) from exc
raise RuntimeError(
f"NOAA data API error ({exc.response.status_code}). "
"The service may be temporarily unavailable."
) from exc
result = resp.json()
if "error" in result:
@ -124,13 +176,15 @@ class NOAAClient:
# -- In-memory search --
def search(
async def search(
self,
query: str = "",
state: str = "",
is_tidal: bool | None = None,
) -> list[Station]:
matches = self._stations
"""Filter cached stations. Triggers cache refresh if TTL expired."""
stations = await self.get_stations()
matches = stations
if query:
q = query.lower()
matches = [s for s in matches if q in s.name.lower() or q in s.id]
@ -141,7 +195,7 @@ class NOAAClient:
matches = [s for s in matches if s.tidal == is_tidal]
return matches
def find_nearest(
async def find_nearest(
self,
lat: float,
lon: float,
@ -149,8 +203,9 @@ class NOAAClient:
max_distance: float = 100,
) -> list[tuple[Station, float]]:
"""Return stations within max_distance nautical miles, sorted by proximity."""
stations = await self.get_stations()
results: list[tuple[Station, float]] = []
for station in self._stations:
for station in stations:
dist = haversine(lat, lon, station.lat, station.lng)
if dist <= max_distance:
results.append((station, dist))

View File

@ -34,7 +34,7 @@ def register(mcp: FastMCP) -> None:
if not target:
return json.dumps({"error": f"Station {station_id} not found"})
nearby = noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50)
nearby = await noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50)
# Exclude the station itself
nearby = [(s, d) for s, d in nearby if s.id != station_id]
return json.dumps(

View File

@ -14,7 +14,19 @@ from noaa_tides.tools import conditions, meteorological, stations, tides
async def lifespan(server: FastMCP):
"""Manage the NOAAClient lifecycle — create, pre-warm station cache, close."""
client = NOAAClient()
try:
await client.initialize()
except Exception as exc:
# Start with empty cache — will populate on first station request.
# HTTP client still needs to exist for data fetches.
import httpx
client._http = httpx.AsyncClient(timeout=30)
print(
f"Warning: station cache pre-warm failed ({exc}). "
"Will retry on first request.",
file=sys.stderr,
)
try:
yield {"noaa_client": client}
finally:

View File

@ -52,7 +52,8 @@ def register(mcp: FastMCP) -> None:
data = await noaa.get_data(station_id, **params)
return name, data
except Exception as exc:
return name, str(exc)
msg = str(exc) or type(exc).__name__
return name, f"{type(exc).__name__}: {msg}"
results = await asyncio.gather(
*[fetch(name, params) for name, params in requests.items()]

View File

@ -23,7 +23,7 @@ def register(mcp: FastMCP) -> None:
Returns up to 50 matching stations with id, name, state, coordinates, and tidal status.
"""
noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
results = noaa.search(query=query, state=state, is_tidal=is_tidal)
results = await noaa.search(query=query, state=state, is_tidal=is_tidal)
return [s.model_dump() for s in results[:50]]
@mcp.tool(tags={"discovery"})
@ -43,7 +43,11 @@ def register(mcp: FastMCP) -> None:
for stations near Narragansett Bay.
"""
noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
results = noaa.find_nearest(
if limit < 1:
raise ValueError("limit must be at least 1")
if max_distance_nm <= 0:
raise ValueError("max_distance_nm must be positive")
results = await noaa.find_nearest(
latitude, longitude, limit=limit, max_distance=max_distance_nm
)
return [