FastMCP server wrapping archive.org's public read APIs: - search_items / scrape_items: advanced search + bulk cursor pagination - get_item_metadata / list_files: progressive disclosure with filtering - get_file_url / download_file: canonical URLs and streaming downloads with HTTP Range resume + optional MD5 verification Smoke-tested end-to-end via claude -p headless MCP and pytest against live archive.org endpoints.
53 lines
1.5 KiB
Python
53 lines
1.5 KiB
Python
"""End-to-end smoke tests against live archive.org (network required).
|
|
|
|
Run with: uv run pytest -v
|
|
Skip with: uv run pytest -v -m 'not network'
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from mcarchive_org.client import ArchiveClient
|
|
|
|
pytestmark = [pytest.mark.asyncio, pytest.mark.network]
|
|
|
|
|
|
async def test_search_nasa_item():
|
|
async with ArchiveClient() as c:
|
|
result = await c.search(query="identifier:nasa", rows=5)
|
|
assert result["num_found"] >= 1
|
|
assert any(d["identifier"] == "nasa" for d in result["docs"])
|
|
|
|
|
|
async def test_metadata_nasa():
|
|
async with ArchiveClient() as c:
|
|
data = await c.metadata("nasa")
|
|
assert data["metadata"]["identifier"] == "nasa"
|
|
assert isinstance(data["files"], list) and data["files"]
|
|
|
|
|
|
async def test_download_small_file(tmp_path: Path):
|
|
async with ArchiveClient() as c:
|
|
files = await c.files("nasa")
|
|
# pick the smallest file to keep the test fast
|
|
small = min(
|
|
(f for f in files if f.get("size") and str(f["size"]).isdigit()),
|
|
key=lambda f: int(f["size"]),
|
|
)
|
|
dest = tmp_path / small["name"]
|
|
result = await c.download_to_file(
|
|
"nasa", small["name"], dest, verify_md5=small.get("md5")
|
|
)
|
|
assert result["bytes"] > 0
|
|
if small.get("md5"):
|
|
assert result["md5_ok"] is True
|
|
|
|
|
|
async def test_scrape_requires_min_count():
|
|
async with ArchiveClient() as c:
|
|
with pytest.raises(ValueError):
|
|
await c.scrape(query="identifier:nasa", count=10)
|