diff --git a/LOCAL_DEVELOPMENT.md b/LOCAL_DEVELOPMENT.md new file mode 100644 index 0000000..ca1640f --- /dev/null +++ b/LOCAL_DEVELOPMENT.md @@ -0,0 +1,186 @@ +# 🔧 Local Development Guide for MCP PDF + +This guide shows how to test MCP PDF locally during development before publishing to PyPI. + +## 📋 Prerequisites + +- Python 3.10+ +- uv package manager +- Claude Desktop app +- Git repository cloned locally + +## 🚀 Quick Start for Local Testing + +### 1. Clone and Setup + +```bash +# Clone the repository +git clone https://github.com/rsp2k/mcp-pdf.git +cd mcp-pdf + +# Install dependencies +uv sync --dev + +# Verify installation +uv run python -c "from mcp_pdf.server import create_server; print('✅ MCP PDF loads successfully')" +``` + +### 2. Test with Claude Code (Local Development) + +Use the `-t local` flag to point Claude Code to your local development copy: + +```bash +# Start Claude Code with local MCP PDF server +claude-code -t local /path/to/mcp-pdf +``` + +Or if you're already in the mcp-pdf directory: + +```bash +claude-code -t local . +``` + +### 3. Alternative: Manual Server Testing + +You can also run the server manually for debugging: + +```bash +# Run the MCP server directly +uv run mcp-pdf + +# Or run with specific FastMCP options +uv run python -m mcp_pdf.server +``` + +### 4. Test Core Functionality + +Once connected to Claude Code, test these key features: + +#### Basic PDF Processing +``` +"Extract text from this PDF file: /path/to/test.pdf" +"Get metadata from this PDF: /path/to/document.pdf" +"Check if this PDF is scanned: /path/to/scan.pdf" +``` + +#### Security Features +``` +"Try to extract text from a very large PDF" +"Process a PDF with 2000 pages" (should be limited to 1000) +``` + +#### Advanced Features +``` +"Extract tables from this PDF: /path/to/tables.pdf" +"Convert this PDF to markdown: /path/to/document.pdf" +"Add annotations to this PDF: /path/to/target.pdf" +``` + +## 🔒 Security Testing + +Verify the security hardening works: + +### File Size Limits +- Try processing a PDF larger than 100MB +- Should see: "PDF file too large: X bytes > 104857600" + +### Page Count Limits +- Try processing a PDF with >1000 pages +- Should see: "PDF too large for processing: X pages > 1000" + +### Path Traversal Protection +- Test with malicious paths like `../../../etc/passwd` +- Should be blocked with security error + +### JSON Input Validation +- Large JSON inputs (>10KB) should be rejected +- Malformed JSON should return clean error messages + +## 🐛 Debugging + +### Enable Debug Logging +```bash +export DEBUG=true +uv run mcp-pdf +``` + +### Check Security Functions +```bash +# Test security validation functions +uv run python test_security_features.py + +# Run integration tests +uv run python test_integration.py +``` + +### Verify Package Structure +```bash +# Check package builds correctly +uv build + +# Verify package metadata +uv run twine check dist/* +``` + +## 📊 Testing Checklist + +Before publishing, verify: + +- [ ] All 23 PDF tools work correctly +- [ ] Security limits are enforced (file size, page count) +- [ ] Error messages are clean and helpful +- [ ] No sensitive information leaked in errors +- [ ] Path traversal protection works +- [ ] JSON input validation works +- [ ] Memory limits prevent crashes +- [ ] CLI command `mcp-pdf` works +- [ ] Package imports correctly: `from mcp_pdf.server import create_server` + +## 🚀 Publishing Pipeline + +Once local testing passes: + +1. **Version Bump**: Update version in `pyproject.toml` +2. **Build**: `uv build` +3. **Test Upload**: `uv run twine upload --repository testpypi dist/*` +4. **Test Install**: `pip install -i https://test.pypi.org/simple/ mcp-pdf` +5. **Production Upload**: `uv run twine upload dist/*` + +## 🔧 Development Commands + +```bash +# Format code +uv run black src/ tests/ + +# Lint code +uv run ruff check src/ tests/ + +# Run tests +uv run pytest + +# Security scan +uv run pip-audit + +# Build package +uv build + +# Install editable for development +pip install -e . # (in a venv) +``` + +## 🆘 Troubleshooting + +### "Module not found" errors +- Ensure you're in the right directory +- Run `uv sync` to install dependencies +- Check Python path with `uv run python -c "import sys; print(sys.path)"` + +### MCP server won't start +- Check that all system dependencies are installed (tesseract, java, ghostscript) +- Verify with: `uv run python examples/verify_installation.py` + +### Security tests fail +- Run `uv run python test_security_features.py -v` for detailed output +- Check that security constants are properly set + +This setup allows for rapid development and testing without polluting your system Python or needing to publish to PyPI for every change. \ No newline at end of file diff --git a/claude-mcp-manager b/claude-mcp-manager new file mode 100644 index 0000000..81d157f --- /dev/null +++ b/claude-mcp-manager @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +""" +Claude MCP Manager - Easy management of MCP servers in Claude Desktop +Usage: claude mcp add [args...] +""" + +import json +import sys +import os +from pathlib import Path +import shutil +import subprocess +from typing import Dict, List, Any, Optional + + +class ClaudeMCPManager: + def __init__(self): + self.config_path = Path.home() / ".config" / "Claude" / "claude_desktop_config.json" + self.config_backup_dir = Path.home() / ".config" / "Claude" / "backups" + self.config_backup_dir.mkdir(exist_ok=True) + + def load_config(self) -> Dict[str, Any]: + """Load Claude Desktop configuration""" + if not self.config_path.exists(): + return {"mcpServers": {}, "globalShortcut": ""} + + try: + with open(self.config_path) as f: + return json.load(f) + except json.JSONDecodeError as e: + print(f"❌ Error parsing config: {e}") + sys.exit(1) + + def save_config(self, config: Dict[str, Any]): + """Save configuration with backup""" + # Create backup + if self.config_path.exists(): + backup_name = f"claude_desktop_config_backup_{int(__import__('time').time())}.json" + backup_path = self.config_backup_dir / backup_name + shutil.copy2(self.config_path, backup_path) + print(f"📁 Config backed up to: {backup_path}") + + # Save new config + with open(self.config_path, 'w') as f: + json.dump(config, f, indent=2) + print(f"✅ Configuration saved to: {self.config_path}") + + def add_server(self, name: str, command: str, args: List[str], env: Optional[Dict[str, str]] = None, directory: Optional[str] = None): + """Add a new MCP server""" + config = self.load_config() + + if name in config["mcpServers"]: + print(f"⚠️ Server '{name}' already exists. Use 'claude mcp update' to modify.") + return False + + server_config = { + "command": command, + "args": args + } + + if env: + server_config["env"] = env + + if directory: + server_config["cwd"] = directory + + config["mcpServers"][name] = server_config + self.save_config(config) + print(f"🚀 Added MCP server: {name}") + return True + + def remove_server(self, name: str): + """Remove an MCP server""" + config = self.load_config() + + if name not in config["mcpServers"]: + print(f"❌ Server '{name}' not found") + return False + + del config["mcpServers"][name] + self.save_config(config) + print(f"🗑️ Removed MCP server: {name}") + return True + + def list_servers(self): + """List all configured MCP servers""" + config = self.load_config() + servers = config.get("mcpServers", {}) + + if not servers: + print("📭 No MCP servers configured") + return + + print("📋 Configured MCP servers:") + print("=" * 50) + + for name, server_config in servers.items(): + command = server_config.get("command", "") + args = server_config.get("args", []) + env = server_config.get("env", {}) + cwd = server_config.get("cwd", "") + + print(f"🔧 {name}") + print(f" Command: {command}") + if args: + print(f" Args: {' '.join(args)}") + if env: + print(f" Environment: {dict(list(env.items())[:3])}{'...' if len(env) > 3 else ''}") + if cwd: + print(f" Directory: {cwd}") + print() + + def add_mcp_pdf_local(self, directory: str): + """Add MCP PDF from local development directory""" + abs_dir = os.path.abspath(directory) + + if not os.path.exists(abs_dir): + print(f"❌ Directory not found: {abs_dir}") + return False + + # Check if it's a valid MCP PDF directory + required_files = ["pyproject.toml", "src/mcp_pdf/server.py"] + for file in required_files: + if not os.path.exists(os.path.join(abs_dir, file)): + print(f"❌ Not a valid MCP PDF directory (missing: {file})") + return False + + return self.add_server( + name="mcp-pdf-local", + command="uv", + args=[ + "--directory", abs_dir, + "run", "mcp-pdf" + ], + env={"PDF_TEMP_DIR": "/tmp/mcp-pdf-processing"}, + directory=abs_dir + ) + + def add_mcp_pdf_pip(self): + """Add MCP PDF from pip installation""" + return self.add_server( + name="mcp-pdf", + command="mcp-pdf", + args=[], + env={"PDF_TEMP_DIR": "/tmp/mcp-pdf-processing"} + ) + + +def print_usage(): + """Print usage information""" + print(""" +🔧 Claude MCP Manager - Easy MCP server management + +USAGE: + claude mcp add [args...] # Add generic MCP server + claude mcp add-local # Add MCP PDF from local dev + claude mcp add-pip # Add MCP PDF from pip + claude mcp remove # Remove MCP server + claude mcp list # List all servers + claude mcp help # Show this help + +EXAMPLES: + # Add MCP PDF from local development + claude mcp add-local /home/user/mcp-pdf + + # Add MCP PDF from pip (after pip install mcp-pdf) + claude mcp add-pip + + # Add generic MCP server + claude mcp add memory npx -y @modelcontextprotocol/server-memory + + # Add server with environment variables + claude mcp add github docker run -i --rm -e GITHUB_TOKEN ghcr.io/github/github-mcp-server + + # Remove a server + claude mcp remove mcp-pdf-local + + # List all configured servers + claude mcp list + +NOTES: + • Configuration saved to: ~/.config/Claude/claude_desktop_config.json + • Automatic backups created before changes + • Restart Claude Desktop after adding/removing servers + """) + + +def main(): + if len(sys.argv) < 2: + print_usage() + sys.exit(1) + + manager = ClaudeMCPManager() + command = sys.argv[1].lower() + + if command == "add": + if len(sys.argv) < 4: + print("❌ Usage: claude mcp add [args...]") + sys.exit(1) + + name = sys.argv[2] + command = sys.argv[3] + args = sys.argv[4:] if len(sys.argv) > 4 else [] + + manager.add_server(name, command, args) + + elif command == "add-local": + if len(sys.argv) != 3: + print("❌ Usage: claude mcp add-local ") + sys.exit(1) + + directory = sys.argv[2] + manager.add_mcp_pdf_local(directory) + + elif command == "add-pip": + manager.add_mcp_pdf_pip() + + elif command == "remove": + if len(sys.argv) != 3: + print("❌ Usage: claude mcp remove ") + sys.exit(1) + + name = sys.argv[2] + manager.remove_server(name) + + elif command == "list": + manager.list_servers() + + elif command in ["help", "--help", "-h"]: + print_usage() + + else: + print(f"❌ Unknown command: {command}") + print_usage() + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 2908688..0adaeb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ [project] name = "mcp-pdf" -version = "1.0.0" +version = "1.0.1" description = "Secure FastMCP server for comprehensive PDF processing - text extraction, OCR, table extraction, forms, annotations, and more" -authors = [{name = "MCP Team", email = "team@fastmcp.org"}] +authors = [{name = "Ryan Malloy", email = "ryan@malloys.us"}] readme = "README.md" license = {text = "MIT"} requires-python = ">=3.10" @@ -98,4 +98,5 @@ dev = [ "pytest-cov>=6.2.1", "reportlab>=4.4.3", "safety>=3.2.11", + "twine>=6.1.0", ] diff --git a/src/mcp_pdf/server.py b/src/mcp_pdf/server.py index bc8fa21..2eb3496 100644 --- a/src/mcp_pdf/server.py +++ b/src/mcp_pdf/server.py @@ -547,6 +547,9 @@ async def extract_text( } doc.close() + # Enforce MCP hard limit regardless of user max_tokens setting + effective_max_tokens = min(max_tokens, 24000) # Stay safely under MCP's 25000 limit + # Early chunking decision based on size analysis should_chunk_early = ( total_pages > 50 or # Large page count @@ -592,9 +595,6 @@ async def extract_text( # Estimate token count (rough approximation: 1 token ≈ 4 characters) estimated_tokens = len(text) // 4 - # Enforce MCP hard limit regardless of user max_tokens setting - effective_max_tokens = min(max_tokens, 24000) # Stay safely under MCP's 25000 limit - # Handle large responses with intelligent chunking if estimated_tokens > effective_max_tokens: # Calculate chunk size based on effective token limit diff --git a/uv.lock b/uv.lock index 64a1416..43023fe 100644 --- a/uv.lock +++ b/uv.lock @@ -1032,7 +1032,7 @@ wheels = [ [[package]] name = "mcp-pdf" -version = "1.0.0" +version = "1.0.1" source = { editable = "." } dependencies = [ { name = "camelot-py", extra = ["cv"] }, @@ -1073,6 +1073,7 @@ dev = [ { name = "pytest-cov" }, { name = "reportlab" }, { name = "safety" }, + { name = "twine" }, ] [package.metadata] @@ -1112,6 +1113,7 @@ dev = [ { name = "pytest-cov", specifier = ">=6.2.1" }, { name = "reportlab", specifier = ">=4.4.3" }, { name = "safety", specifier = ">=3.2.11" }, + { name = "twine", specifier = ">=6.1.0" }, ] [[package]]