Critical fixes:
- Validate identifier (^[A-Za-z0-9._-]+$) and filename (no '..', absolute
paths, NUL bytes, drive letters) at the client boundary
- Confine download destinations under MCARCHIVE_DOWNLOAD_ROOT via
Path.resolve() + is_relative_to() check; reject symlinked dirs
- Use O_NOFOLLOW on the destination open() to refuse symlink substitution
- Detect Range-ignored responses: if resume requested but server returns 200
(or 206 with wrong Content-Range start), raise ArchiveError BEFORE writing
any bytes — closes the silent file-corruption hole
Usability:
- Wrap raise_for_status everywhere with ArchiveError that includes the
response body preview — 4xx Solr errors now tell you what's wrong
- URL-encode filenames in download URLs (handles spaces and special chars)
- Map archive.org's {"error": ...} payloads on /metadata/{id}/files to
ArchiveError with the server's message
- Lazy-resolve download root so env-var changes after import are honored
- Refactor item_resource to a shared async helper (drops .fn type-ignore)
- Rename result key 'bytes' -> 'bytes_written' (avoids shadowing builtin)
Tests:
- New tests/test_client_mocked.py: 29 regression tests using
httpx.MockTransport covering every Hamilton finding above (path traversal,
symlink refusal, Range-ignored, Content-Range mismatch, error body
surfacing, malformed JSON, dark items, etc.)
- Set asyncio_mode = "auto" in pyproject for cleaner test markers
33/33 tests pass (4 live + 29 mocked), ruff clean.
58 lines
1.4 KiB
TOML
58 lines
1.4 KiB
TOML
[project]
|
|
name = "mcarchive-org"
|
|
version = "2026.04.21"
|
|
description = "MCP server for searching and downloading files from the Internet Archive (archive.org)"
|
|
readme = "README.md"
|
|
requires-python = ">=3.10"
|
|
license = { text = "MIT" }
|
|
authors = [
|
|
{ name = "Ryan Malloy", email = "ryan@supported.systems" },
|
|
]
|
|
keywords = ["mcp", "archive.org", "internet-archive", "fastmcp", "llm"]
|
|
classifiers = [
|
|
"Development Status :: 4 - Beta",
|
|
"Intended Audience :: Developers",
|
|
"License :: OSI Approved :: MIT License",
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
"Programming Language :: Python :: 3.13",
|
|
"Topic :: Internet :: WWW/HTTP",
|
|
]
|
|
dependencies = [
|
|
"fastmcp>=3.2.4",
|
|
"httpx>=0.28.1",
|
|
]
|
|
|
|
[project.scripts]
|
|
mcarchive-org = "mcarchive_org.server:main"
|
|
|
|
[project.urls]
|
|
Homepage = "https://archive.org/developers/"
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["src/mcarchive_org"]
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
|
|
[tool.ruff]
|
|
line-length = 100
|
|
target-version = "py310"
|
|
|
|
[tool.ruff.lint]
|
|
select = ["E", "F", "W", "I", "UP", "B", "SIM", "RUF"]
|
|
ignore = ["E501"]
|
|
|
|
[dependency-groups]
|
|
dev = [
|
|
"pytest>=8.0",
|
|
"pytest-asyncio>=0.23",
|
|
"ruff>=0.5",
|
|
]
|