""" Example usage of MCP PDF Tools server This script demonstrates how to test the PDF tools locally. """ import asyncio import sys import json from pathlib import Path # Add the src directory to the path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from mcp_pdf_tools.server import create_server async def call_tool(mcp, tool_name: str, **kwargs): """Call a tool through the MCP server""" tools = await mcp.get_tools() if tool_name not in tools: raise ValueError(f"Tool '{tool_name}' not found") tool = tools[tool_name] # Call the tool's function directly using the fn attribute result = await tool.fn(**kwargs) return result async def test_pdf_tools(pdf_path: str): """Test various PDF tools on a given PDF file""" # Create the MCP server mcp = create_server() print(f"\n{'='*60}") print(f"Testing PDF Tools on: {pdf_path}") print(f"{'='*60}\n") # 1. Check if PDF is scanned print("1. Checking if PDF is scanned...") scan_result = await call_tool(mcp, "is_scanned_pdf", pdf_path=pdf_path) print(f" Is scanned: {scan_result.get('is_scanned', 'Unknown')}") print(f" Recommendation: {scan_result.get('recommendation', 'N/A')}") # 2. Extract metadata print("\n2. Extracting metadata...") metadata_result = await call_tool(mcp, "extract_metadata", pdf_path=pdf_path) if "error" not in metadata_result: print(f" Title: {metadata_result['metadata'].get('title', 'N/A')}") print(f" Author: {metadata_result['metadata'].get('author', 'N/A')}") print(f" Pages: {metadata_result['statistics'].get('page_count', 'N/A')}") print(f" File size: {metadata_result['file_info'].get('size_mb', 'N/A')} MB") else: print(f" Error: {metadata_result['error']}") # 3. Get document structure print("\n3. Getting document structure...") structure_result = await call_tool(mcp, "get_document_structure", pdf_path=pdf_path) if "error" not in structure_result: print(f" Outline items: {len(structure_result.get('outline', []))}") fonts = structure_result.get('fonts', []) if fonts: print(f" Fonts used: {', '.join(fonts[:3])}...") else: print(f" Error: {structure_result['error']}") # 4. Extract text (if not scanned) if not scan_result.get('is_scanned', True): print("\n4. Extracting text...") text_result = await call_tool(mcp, "extract_text", pdf_path=pdf_path, pages=[0]) # First page only if "error" not in text_result: text_preview = text_result['text'][:200].replace('\n', ' ') print(f" Method used: {text_result['method_used']}") print(f" Text preview: {text_preview}...") else: print(f" Error: {text_result['error']}") else: print("\n4. Skipping text extraction (PDF is scanned)") # 5. Extract tables print("\n5. Extracting tables...") table_result = await call_tool(mcp, "extract_tables", pdf_path=pdf_path, pages=[0]) # First page only if "error" not in table_result: print(f" Tables found: {table_result['total_tables']}") print(f" Method used: {table_result['method_used']}") if table_result['total_tables'] > 0: first_table = table_result['tables'][0] print(f" First table shape: {first_table['shape']['rows']}x{first_table['shape']['columns']}") else: print(f" Error: {table_result['error']}") # 6. Convert to Markdown (first page) print("\n6. Converting to Markdown...") markdown_result = await call_tool(mcp, "pdf_to_markdown", pdf_path=pdf_path, pages=[0], include_images=False) if "error" not in markdown_result: md_preview = markdown_result['markdown'][:200].replace('\n', ' ') print(f" Markdown preview: {md_preview}...") else: print(f" Error: {markdown_result['error']}") # 7. Extract images print("\n7. Extracting images...") images_result = await call_tool(mcp, "extract_images", pdf_path=pdf_path, pages=[0]) if "error" not in images_result: print(f" Images found: {images_result['total_images']}") if images_result['total_images'] > 0: first_image = images_result['images'][0] print(f" First image size: {first_image['width']}x{first_image['height']}") else: print(f" Error: {images_result['error']}") print(f"\n{'='*60}") print("Testing complete!") print(f"{'='*60}\n") async def main(): """Main function to run the tests""" if len(sys.argv) < 2: print("Usage: python test_pdf_tools.py ") print("\nExample:") print(" python test_pdf_tools.py /path/to/document.pdf") sys.exit(1) pdf_path = sys.argv[1] # Check if file exists if not Path(pdf_path).exists(): print(f"Error: File not found: {pdf_path}") sys.exit(1) # Check if it's a PDF if not pdf_path.lower().endswith('.pdf'): print(f"Error: File must be a PDF: {pdf_path}") sys.exit(1) try: await test_pdf_tools(pdf_path) except Exception as e: print(f"\nError during testing: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": asyncio.run(main())