mcp-pdf-tools/examples/create_test_pdf.py
Ryan Malloy c902e81e4d Initial commit: Complete MCP PDF Tools server implementation
Features:
- 8 comprehensive PDF processing tools with intelligent fallbacks
- Text extraction (PyMuPDF, pdfplumber, pypdf with auto-selection)
- Table extraction (Camelot → pdfplumber → Tabula fallback chain)
- OCR processing with Tesseract and preprocessing options
- Document analysis (structure, metadata, scanned detection)
- Image extraction with filtering capabilities
- PDF to markdown conversion with metadata
- Built on FastMCP framework with full MCP protocol support
- Comprehensive error handling and user-friendly messages
- Docker support and cross-platform compatibility
- Complete test suite and examples

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-10 16:36:21 -06:00

102 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""Create a test PDF for testing the MCP PDF Tools"""
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors
def create_test_pdf(filename="test_document.pdf"):
"""Create a test PDF with text, tables, and metadata"""
# Create the PDF
doc = SimpleDocTemplate(filename, pagesize=letter)
story = []
styles = getSampleStyleSheet()
# Title
title = Paragraph("MCP PDF Tools Test Document", styles['Title'])
story.append(title)
story.append(Spacer(1, 0.3*inch))
# Introduction
intro = Paragraph(
"This is a test document created to demonstrate the capabilities of the MCP PDF Tools server. "
"It contains various elements including text, tables, and metadata to test different extraction features.",
styles['Normal']
)
story.append(intro)
story.append(Spacer(1, 0.2*inch))
# Section 1
section1 = Paragraph("1. Text Extraction Test", styles['Heading2'])
story.append(section1)
story.append(Spacer(1, 0.1*inch))
text1 = Paragraph(
"This section contains regular paragraph text that should be easily extractable using any of the "
"text extraction methods (PyMuPDF, pdfplumber, or pypdf). The text includes various formatting "
"and should maintain its structure when extracted with layout preservation enabled.",
styles['Normal']
)
story.append(text1)
story.append(Spacer(1, 0.2*inch))
# Section 2 - Table
section2 = Paragraph("2. Table Extraction Test", styles['Heading2'])
story.append(section2)
story.append(Spacer(1, 0.1*inch))
# Create a table
data = [
['Product', 'Price', 'Quantity', 'Total'],
['Widget A', '$10.00', '5', '$50.00'],
['Widget B', '$15.00', '3', '$45.00'],
['Widget C', '$20.00', '2', '$40.00'],
['Total', '', '', '$135.00']
]
table = Table(data)
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 14),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]))
story.append(table)
story.append(Spacer(1, 0.2*inch))
# Section 3
section3 = Paragraph("3. Document Structure Test", styles['Heading2'])
story.append(section3)
story.append(Spacer(1, 0.1*inch))
text3 = Paragraph(
"This document has a clear structure with numbered sections and headings. "
"The document structure extraction should identify these sections and create "
"an outline or table of contents.",
styles['Normal']
)
story.append(text3)
story.append(Spacer(1, 0.2*inch))
# Add metadata
doc.title = "MCP PDF Tools Test Document"
doc.author = "MCP PDF Tools Tester"
doc.subject = "Testing PDF Processing"
doc.keywords = ["test", "pdf", "mcp", "extraction"]
# Build the PDF
doc.build(story)
print(f"✅ Created test PDF: {filename}")
if __name__ == "__main__":
create_test_pdf()