Harden error handling and input validation
- Station ID validation (7-digit regex) prevents path traversal - Stale cache fallback: serve expired data if refresh fails - Resilient startup: server starts even if NOAA is down - Contextual HTTP error messages with recovery hints - Hours range validation (1-720) - find_nearest limit/distance validation - search() and find_nearest() now async with cache TTL checks - Better error type info in conditions snapshot partial failures
This commit is contained in:
parent
6c244b3a63
commit
20059b9476
@ -1,6 +1,8 @@
|
||||
"""Async NOAA CO-OPS API client with station caching and proximity search."""
|
||||
|
||||
import math
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
import httpx
|
||||
@ -11,6 +13,18 @@ DATA_URL = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"
|
||||
META_URL = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi"
|
||||
|
||||
CACHE_TTL = 86400 # 24 hours
|
||||
MAX_RANGE_HOURS = 720 # NOAA API cap ~30 days
|
||||
|
||||
_STATION_ID_RE = re.compile(r"^\d{7}$")
|
||||
|
||||
|
||||
def _validate_station_id(station_id: str) -> str:
|
||||
"""NOAA station IDs are 7-digit numbers (e.g. '8454000')."""
|
||||
if not _STATION_ID_RE.match(station_id):
|
||||
raise ValueError(
|
||||
f"Invalid station ID '{station_id}': expected a 7-digit number (e.g. '8454000')"
|
||||
)
|
||||
return station_id
|
||||
|
||||
|
||||
def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
@ -54,17 +68,39 @@ class NOAAClient:
|
||||
|
||||
async def get_stations(self) -> list[Station]:
|
||||
if time.monotonic() - self._cache_time > CACHE_TTL:
|
||||
await self._refresh_stations()
|
||||
try:
|
||||
await self._refresh_stations()
|
||||
except Exception:
|
||||
# Serve stale data rather than failing the request.
|
||||
# If cache was never populated, re-raise.
|
||||
if not self._stations:
|
||||
raise
|
||||
print(
|
||||
"Warning: station cache refresh failed, serving stale data",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return list(self._stations)
|
||||
|
||||
# -- Metadata API --
|
||||
|
||||
async def get_station_metadata(self, station_id: str) -> dict:
|
||||
resp = await self._http.get(
|
||||
f"{META_URL}/stations/{station_id}.json",
|
||||
params={"expand": "details,sensors,datums,products,disclaimers"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
_validate_station_id(station_id)
|
||||
try:
|
||||
resp = await self._http.get(
|
||||
f"{META_URL}/stations/{station_id}.json",
|
||||
params={"expand": "details,sensors,datums,products,disclaimers"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
if exc.response.status_code == 404:
|
||||
raise ValueError(
|
||||
f"Station '{station_id}' not found. "
|
||||
"Verify the ID using search_stations."
|
||||
) from exc
|
||||
raise RuntimeError(
|
||||
f"NOAA metadata API error ({exc.response.status_code}). "
|
||||
"The service may be temporarily unavailable."
|
||||
) from exc
|
||||
data = resp.json()
|
||||
# The metadata API wraps the station in a "stations" list
|
||||
stations = data.get("stations", [])
|
||||
@ -91,6 +127,11 @@ class NOAAClient:
|
||||
Date format: yyyyMMdd or yyyyMMdd HH:mm
|
||||
If no date range or hours specified, defaults to last 24 hours.
|
||||
"""
|
||||
_validate_station_id(station_id)
|
||||
|
||||
if hours and (hours < 0 or hours > MAX_RANGE_HOURS):
|
||||
raise ValueError(f"hours must be between 1 and {MAX_RANGE_HOURS}, got {hours}")
|
||||
|
||||
params: dict[str, str] = {
|
||||
"station": station_id,
|
||||
"product": product,
|
||||
@ -113,8 +154,19 @@ class NOAAClient:
|
||||
if not begin_date and not end_date and not hours:
|
||||
params["range"] = "24"
|
||||
|
||||
resp = await self._http.get(DATA_URL, params=params)
|
||||
resp.raise_for_status()
|
||||
try:
|
||||
resp = await self._http.get(DATA_URL, params=params)
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as exc:
|
||||
if exc.response.status_code == 404:
|
||||
raise ValueError(
|
||||
f"No data for station '{station_id}' product '{product}'. "
|
||||
"Use get_station_info to check available products."
|
||||
) from exc
|
||||
raise RuntimeError(
|
||||
f"NOAA data API error ({exc.response.status_code}). "
|
||||
"The service may be temporarily unavailable."
|
||||
) from exc
|
||||
result = resp.json()
|
||||
|
||||
if "error" in result:
|
||||
@ -124,13 +176,15 @@ class NOAAClient:
|
||||
|
||||
# -- In-memory search --
|
||||
|
||||
def search(
|
||||
async def search(
|
||||
self,
|
||||
query: str = "",
|
||||
state: str = "",
|
||||
is_tidal: bool | None = None,
|
||||
) -> list[Station]:
|
||||
matches = self._stations
|
||||
"""Filter cached stations. Triggers cache refresh if TTL expired."""
|
||||
stations = await self.get_stations()
|
||||
matches = stations
|
||||
if query:
|
||||
q = query.lower()
|
||||
matches = [s for s in matches if q in s.name.lower() or q in s.id]
|
||||
@ -141,7 +195,7 @@ class NOAAClient:
|
||||
matches = [s for s in matches if s.tidal == is_tidal]
|
||||
return matches
|
||||
|
||||
def find_nearest(
|
||||
async def find_nearest(
|
||||
self,
|
||||
lat: float,
|
||||
lon: float,
|
||||
@ -149,8 +203,9 @@ class NOAAClient:
|
||||
max_distance: float = 100,
|
||||
) -> list[tuple[Station, float]]:
|
||||
"""Return stations within max_distance nautical miles, sorted by proximity."""
|
||||
stations = await self.get_stations()
|
||||
results: list[tuple[Station, float]] = []
|
||||
for station in self._stations:
|
||||
for station in stations:
|
||||
dist = haversine(lat, lon, station.lat, station.lng)
|
||||
if dist <= max_distance:
|
||||
results.append((station, dist))
|
||||
|
||||
@ -34,7 +34,7 @@ def register(mcp: FastMCP) -> None:
|
||||
if not target:
|
||||
return json.dumps({"error": f"Station {station_id} not found"})
|
||||
|
||||
nearby = noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50)
|
||||
nearby = await noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50)
|
||||
# Exclude the station itself
|
||||
nearby = [(s, d) for s, d in nearby if s.id != station_id]
|
||||
return json.dumps(
|
||||
|
||||
@ -14,7 +14,19 @@ from noaa_tides.tools import conditions, meteorological, stations, tides
|
||||
async def lifespan(server: FastMCP):
|
||||
"""Manage the NOAAClient lifecycle — create, pre-warm station cache, close."""
|
||||
client = NOAAClient()
|
||||
await client.initialize()
|
||||
try:
|
||||
await client.initialize()
|
||||
except Exception as exc:
|
||||
# Start with empty cache — will populate on first station request.
|
||||
# HTTP client still needs to exist for data fetches.
|
||||
import httpx
|
||||
|
||||
client._http = httpx.AsyncClient(timeout=30)
|
||||
print(
|
||||
f"Warning: station cache pre-warm failed ({exc}). "
|
||||
"Will retry on first request.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
try:
|
||||
yield {"noaa_client": client}
|
||||
finally:
|
||||
|
||||
@ -52,7 +52,8 @@ def register(mcp: FastMCP) -> None:
|
||||
data = await noaa.get_data(station_id, **params)
|
||||
return name, data
|
||||
except Exception as exc:
|
||||
return name, str(exc)
|
||||
msg = str(exc) or type(exc).__name__
|
||||
return name, f"{type(exc).__name__}: {msg}"
|
||||
|
||||
results = await asyncio.gather(
|
||||
*[fetch(name, params) for name, params in requests.items()]
|
||||
|
||||
@ -23,7 +23,7 @@ def register(mcp: FastMCP) -> None:
|
||||
Returns up to 50 matching stations with id, name, state, coordinates, and tidal status.
|
||||
"""
|
||||
noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
|
||||
results = noaa.search(query=query, state=state, is_tidal=is_tidal)
|
||||
results = await noaa.search(query=query, state=state, is_tidal=is_tidal)
|
||||
return [s.model_dump() for s in results[:50]]
|
||||
|
||||
@mcp.tool(tags={"discovery"})
|
||||
@ -43,7 +43,11 @@ def register(mcp: FastMCP) -> None:
|
||||
for stations near Narragansett Bay.
|
||||
"""
|
||||
noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
|
||||
results = noaa.find_nearest(
|
||||
if limit < 1:
|
||||
raise ValueError("limit must be at least 1")
|
||||
if max_distance_nm <= 0:
|
||||
raise ValueError("max_distance_nm must be positive")
|
||||
results = await noaa.find_nearest(
|
||||
latitude, longitude, limit=limit, max_distance=max_distance_nm
|
||||
)
|
||||
return [
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user