mcp-pdf-tools/test_url_support.py
Ryan Malloy 58d43851b9 Add HTTPS URL support and fix MCP parameter validation
Features:
- HTTPS URL support: Process PDFs directly from URLs with intelligent caching
- Smart caching: 1-hour cache to avoid repeated downloads
- Content validation: Verify downloads are actually PDF files
- Security: Proper User-Agent headers, HTTPS preferred over HTTP
- MCP parameter fixes: Handle pages parameter as string "[2,3]" format
- Backward compatibility: Still supports local file paths and list parameters

Technical changes:
- Added download_pdf_from_url() with caching and validation
- Updated validate_pdf_path() to handle URLs and local paths
- Added parse_pages_parameter() for flexible parameter parsing
- Updated all 8 tools to accept string pages parameters
- Enhanced error handling for network and validation issues

All tools now support:
- Local paths: "/path/to/file.pdf"
- HTTPS URLs: "https://example.com/document.pdf"
- Flexible pages: "[2,3]", "1,2,3", or [1,2,3]

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-11 02:25:53 -06:00

71 lines
2.0 KiB
Python

#!/usr/bin/env python3
"""
Test URL support for MCP PDF Tools
"""
import asyncio
import sys
import os
# Add src to path
sys.path.insert(0, 'src')
from mcp_pdf_tools.server import validate_pdf_path, download_pdf_from_url
async def test_url_validation():
"""Test URL validation and download"""
print("Testing URL validation and download...")
# Test with a known PDF URL (using a publicly available sample)
test_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
try:
print(f"Testing URL: {test_url}")
path = await validate_pdf_path(test_url)
print(f"✅ Successfully downloaded and validated PDF: {path}")
print(f" File size: {path.stat().st_size} bytes")
return True
except Exception as e:
print(f"❌ URL test failed: {e}")
return False
async def test_local_path():
"""Test that local paths still work"""
print("\nTesting local path validation...")
# Test with our existing test PDF
test_path = "/tmp/test_text.pdf"
if not os.path.exists(test_path):
print(f"⚠️ Test file {test_path} not found, skipping local test")
return True
try:
path = await validate_pdf_path(test_path)
print(f"✅ Local path validation works: {path}")
return True
except Exception as e:
print(f"❌ Local path test failed: {e}")
return False
async def main():
print("🧪 Testing MCP PDF Tools URL Support\n")
url_success = await test_url_validation()
local_success = await test_local_path()
print(f"\n📊 Test Results:")
print(f" URL support: {'✅ PASS' if url_success else '❌ FAIL'}")
print(f" Local paths: {'✅ PASS' if local_success else '❌ FAIL'}")
if url_success and local_success:
print("\n🎉 All tests passed! URL support is working.")
return 0
else:
print("\n🚨 Some tests failed.")
return 1
if __name__ == "__main__":
sys.exit(asyncio.run(main()))