Harden error handling and input validation
- Station ID validation (7-digit regex) prevents path traversal - Stale cache fallback: serve expired data if refresh fails - Resilient startup: server starts even if NOAA is down - Contextual HTTP error messages with recovery hints - Hours range validation (1-720) - find_nearest limit/distance validation - search() and find_nearest() now async with cache TTL checks - Better error type info in conditions snapshot partial failures
This commit is contained in:
parent
6c244b3a63
commit
20059b9476
@ -1,6 +1,8 @@
|
|||||||
"""Async NOAA CO-OPS API client with station caching and proximity search."""
|
"""Async NOAA CO-OPS API client with station caching and proximity search."""
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
@ -11,6 +13,18 @@ DATA_URL = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"
|
|||||||
META_URL = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi"
|
META_URL = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi"
|
||||||
|
|
||||||
CACHE_TTL = 86400 # 24 hours
|
CACHE_TTL = 86400 # 24 hours
|
||||||
|
MAX_RANGE_HOURS = 720 # NOAA API cap ~30 days
|
||||||
|
|
||||||
|
_STATION_ID_RE = re.compile(r"^\d{7}$")
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_station_id(station_id: str) -> str:
|
||||||
|
"""NOAA station IDs are 7-digit numbers (e.g. '8454000')."""
|
||||||
|
if not _STATION_ID_RE.match(station_id):
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid station ID '{station_id}': expected a 7-digit number (e.g. '8454000')"
|
||||||
|
)
|
||||||
|
return station_id
|
||||||
|
|
||||||
|
|
||||||
def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||||
@ -54,17 +68,39 @@ class NOAAClient:
|
|||||||
|
|
||||||
async def get_stations(self) -> list[Station]:
|
async def get_stations(self) -> list[Station]:
|
||||||
if time.monotonic() - self._cache_time > CACHE_TTL:
|
if time.monotonic() - self._cache_time > CACHE_TTL:
|
||||||
await self._refresh_stations()
|
try:
|
||||||
|
await self._refresh_stations()
|
||||||
|
except Exception:
|
||||||
|
# Serve stale data rather than failing the request.
|
||||||
|
# If cache was never populated, re-raise.
|
||||||
|
if not self._stations:
|
||||||
|
raise
|
||||||
|
print(
|
||||||
|
"Warning: station cache refresh failed, serving stale data",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
return list(self._stations)
|
return list(self._stations)
|
||||||
|
|
||||||
# -- Metadata API --
|
# -- Metadata API --
|
||||||
|
|
||||||
async def get_station_metadata(self, station_id: str) -> dict:
|
async def get_station_metadata(self, station_id: str) -> dict:
|
||||||
resp = await self._http.get(
|
_validate_station_id(station_id)
|
||||||
f"{META_URL}/stations/{station_id}.json",
|
try:
|
||||||
params={"expand": "details,sensors,datums,products,disclaimers"},
|
resp = await self._http.get(
|
||||||
)
|
f"{META_URL}/stations/{station_id}.json",
|
||||||
resp.raise_for_status()
|
params={"expand": "details,sensors,datums,products,disclaimers"},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
if exc.response.status_code == 404:
|
||||||
|
raise ValueError(
|
||||||
|
f"Station '{station_id}' not found. "
|
||||||
|
"Verify the ID using search_stations."
|
||||||
|
) from exc
|
||||||
|
raise RuntimeError(
|
||||||
|
f"NOAA metadata API error ({exc.response.status_code}). "
|
||||||
|
"The service may be temporarily unavailable."
|
||||||
|
) from exc
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
# The metadata API wraps the station in a "stations" list
|
# The metadata API wraps the station in a "stations" list
|
||||||
stations = data.get("stations", [])
|
stations = data.get("stations", [])
|
||||||
@ -91,6 +127,11 @@ class NOAAClient:
|
|||||||
Date format: yyyyMMdd or yyyyMMdd HH:mm
|
Date format: yyyyMMdd or yyyyMMdd HH:mm
|
||||||
If no date range or hours specified, defaults to last 24 hours.
|
If no date range or hours specified, defaults to last 24 hours.
|
||||||
"""
|
"""
|
||||||
|
_validate_station_id(station_id)
|
||||||
|
|
||||||
|
if hours and (hours < 0 or hours > MAX_RANGE_HOURS):
|
||||||
|
raise ValueError(f"hours must be between 1 and {MAX_RANGE_HOURS}, got {hours}")
|
||||||
|
|
||||||
params: dict[str, str] = {
|
params: dict[str, str] = {
|
||||||
"station": station_id,
|
"station": station_id,
|
||||||
"product": product,
|
"product": product,
|
||||||
@ -113,8 +154,19 @@ class NOAAClient:
|
|||||||
if not begin_date and not end_date and not hours:
|
if not begin_date and not end_date and not hours:
|
||||||
params["range"] = "24"
|
params["range"] = "24"
|
||||||
|
|
||||||
resp = await self._http.get(DATA_URL, params=params)
|
try:
|
||||||
resp.raise_for_status()
|
resp = await self._http.get(DATA_URL, params=params)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
if exc.response.status_code == 404:
|
||||||
|
raise ValueError(
|
||||||
|
f"No data for station '{station_id}' product '{product}'. "
|
||||||
|
"Use get_station_info to check available products."
|
||||||
|
) from exc
|
||||||
|
raise RuntimeError(
|
||||||
|
f"NOAA data API error ({exc.response.status_code}). "
|
||||||
|
"The service may be temporarily unavailable."
|
||||||
|
) from exc
|
||||||
result = resp.json()
|
result = resp.json()
|
||||||
|
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
@ -124,13 +176,15 @@ class NOAAClient:
|
|||||||
|
|
||||||
# -- In-memory search --
|
# -- In-memory search --
|
||||||
|
|
||||||
def search(
|
async def search(
|
||||||
self,
|
self,
|
||||||
query: str = "",
|
query: str = "",
|
||||||
state: str = "",
|
state: str = "",
|
||||||
is_tidal: bool | None = None,
|
is_tidal: bool | None = None,
|
||||||
) -> list[Station]:
|
) -> list[Station]:
|
||||||
matches = self._stations
|
"""Filter cached stations. Triggers cache refresh if TTL expired."""
|
||||||
|
stations = await self.get_stations()
|
||||||
|
matches = stations
|
||||||
if query:
|
if query:
|
||||||
q = query.lower()
|
q = query.lower()
|
||||||
matches = [s for s in matches if q in s.name.lower() or q in s.id]
|
matches = [s for s in matches if q in s.name.lower() or q in s.id]
|
||||||
@ -141,7 +195,7 @@ class NOAAClient:
|
|||||||
matches = [s for s in matches if s.tidal == is_tidal]
|
matches = [s for s in matches if s.tidal == is_tidal]
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
def find_nearest(
|
async def find_nearest(
|
||||||
self,
|
self,
|
||||||
lat: float,
|
lat: float,
|
||||||
lon: float,
|
lon: float,
|
||||||
@ -149,8 +203,9 @@ class NOAAClient:
|
|||||||
max_distance: float = 100,
|
max_distance: float = 100,
|
||||||
) -> list[tuple[Station, float]]:
|
) -> list[tuple[Station, float]]:
|
||||||
"""Return stations within max_distance nautical miles, sorted by proximity."""
|
"""Return stations within max_distance nautical miles, sorted by proximity."""
|
||||||
|
stations = await self.get_stations()
|
||||||
results: list[tuple[Station, float]] = []
|
results: list[tuple[Station, float]] = []
|
||||||
for station in self._stations:
|
for station in stations:
|
||||||
dist = haversine(lat, lon, station.lat, station.lng)
|
dist = haversine(lat, lon, station.lat, station.lng)
|
||||||
if dist <= max_distance:
|
if dist <= max_distance:
|
||||||
results.append((station, dist))
|
results.append((station, dist))
|
||||||
|
|||||||
@ -34,7 +34,7 @@ def register(mcp: FastMCP) -> None:
|
|||||||
if not target:
|
if not target:
|
||||||
return json.dumps({"error": f"Station {station_id} not found"})
|
return json.dumps({"error": f"Station {station_id} not found"})
|
||||||
|
|
||||||
nearby = noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50)
|
nearby = await noaa.find_nearest(target.lat, target.lng, limit=10, max_distance=50)
|
||||||
# Exclude the station itself
|
# Exclude the station itself
|
||||||
nearby = [(s, d) for s, d in nearby if s.id != station_id]
|
nearby = [(s, d) for s, d in nearby if s.id != station_id]
|
||||||
return json.dumps(
|
return json.dumps(
|
||||||
|
|||||||
@ -14,7 +14,19 @@ from noaa_tides.tools import conditions, meteorological, stations, tides
|
|||||||
async def lifespan(server: FastMCP):
|
async def lifespan(server: FastMCP):
|
||||||
"""Manage the NOAAClient lifecycle — create, pre-warm station cache, close."""
|
"""Manage the NOAAClient lifecycle — create, pre-warm station cache, close."""
|
||||||
client = NOAAClient()
|
client = NOAAClient()
|
||||||
await client.initialize()
|
try:
|
||||||
|
await client.initialize()
|
||||||
|
except Exception as exc:
|
||||||
|
# Start with empty cache — will populate on first station request.
|
||||||
|
# HTTP client still needs to exist for data fetches.
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
client._http = httpx.AsyncClient(timeout=30)
|
||||||
|
print(
|
||||||
|
f"Warning: station cache pre-warm failed ({exc}). "
|
||||||
|
"Will retry on first request.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
yield {"noaa_client": client}
|
yield {"noaa_client": client}
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@ -52,7 +52,8 @@ def register(mcp: FastMCP) -> None:
|
|||||||
data = await noaa.get_data(station_id, **params)
|
data = await noaa.get_data(station_id, **params)
|
||||||
return name, data
|
return name, data
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return name, str(exc)
|
msg = str(exc) or type(exc).__name__
|
||||||
|
return name, f"{type(exc).__name__}: {msg}"
|
||||||
|
|
||||||
results = await asyncio.gather(
|
results = await asyncio.gather(
|
||||||
*[fetch(name, params) for name, params in requests.items()]
|
*[fetch(name, params) for name, params in requests.items()]
|
||||||
|
|||||||
@ -23,7 +23,7 @@ def register(mcp: FastMCP) -> None:
|
|||||||
Returns up to 50 matching stations with id, name, state, coordinates, and tidal status.
|
Returns up to 50 matching stations with id, name, state, coordinates, and tidal status.
|
||||||
"""
|
"""
|
||||||
noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
|
noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
|
||||||
results = noaa.search(query=query, state=state, is_tidal=is_tidal)
|
results = await noaa.search(query=query, state=state, is_tidal=is_tidal)
|
||||||
return [s.model_dump() for s in results[:50]]
|
return [s.model_dump() for s in results[:50]]
|
||||||
|
|
||||||
@mcp.tool(tags={"discovery"})
|
@mcp.tool(tags={"discovery"})
|
||||||
@ -43,7 +43,11 @@ def register(mcp: FastMCP) -> None:
|
|||||||
for stations near Narragansett Bay.
|
for stations near Narragansett Bay.
|
||||||
"""
|
"""
|
||||||
noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
|
noaa: NOAAClient = ctx.lifespan_context["noaa_client"]
|
||||||
results = noaa.find_nearest(
|
if limit < 1:
|
||||||
|
raise ValueError("limit must be at least 1")
|
||||||
|
if max_distance_nm <= 0:
|
||||||
|
raise ValueError("max_distance_nm must be positive")
|
||||||
|
results = await noaa.find_nearest(
|
||||||
latitude, longitude, limit=limit, max_distance=max_distance_nm
|
latitude, longitude, limit=limit, max_distance=max_distance_nm
|
||||||
)
|
)
|
||||||
return [
|
return [
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user