Features: - 8 comprehensive PDF processing tools with intelligent fallbacks - Text extraction (PyMuPDF, pdfplumber, pypdf with auto-selection) - Table extraction (Camelot → pdfplumber → Tabula fallback chain) - OCR processing with Tesseract and preprocessing options - Document analysis (structure, metadata, scanned detection) - Image extraction with filtering capabilities - PDF to markdown conversion with metadata - Built on FastMCP framework with full MCP protocol support - Comprehensive error handling and user-friendly messages - Docker support and cross-platform compatibility - Complete test suite and examples 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
40 lines
1.2 KiB
Plaintext
40 lines
1.2 KiB
Plaintext
# MCP PDF Tools Configuration
|
|
|
|
# Tesseract OCR configuration
|
|
# Path to Tesseract data directory (for language files)
|
|
# Ubuntu/Debian: /usr/share/tesseract-ocr/5/tessdata
|
|
# macOS (Homebrew): /usr/local/share/tessdata
|
|
# Windows: C:\Program Files\Tesseract-OCR\tessdata
|
|
TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata
|
|
|
|
# Temporary directory for PDF processing
|
|
# Used for intermediate files during conversion
|
|
PDF_TEMP_DIR=/tmp/pdf_processing
|
|
|
|
# Poppler utilities path (for pdf2image)
|
|
# Only needed if poppler-utils is not in PATH
|
|
# Ubuntu/Debian: Usually in PATH
|
|
# macOS: /usr/local/bin (if installed via Homebrew)
|
|
# Windows: C:\Program Files\poppler-0.68.0\bin
|
|
# POPPLER_PATH=/usr/local/bin
|
|
|
|
# Java home for Tabula (table extraction)
|
|
# Only needed if Java is not in PATH
|
|
# JAVA_HOME=/usr/lib/jvm/java-11-openjdk
|
|
|
|
# Debug mode
|
|
# Set to true for verbose logging
|
|
DEBUG=false
|
|
|
|
# Maximum file size in MB
|
|
# PDFs larger than this will be rejected
|
|
MAX_PDF_SIZE_MB=100
|
|
|
|
# Default DPI for PDF to image conversion
|
|
# Higher values = better quality but slower processing
|
|
DEFAULT_DPI=300
|
|
|
|
# Default OCR languages (comma-separated)
|
|
# Common codes: eng (English), fra (French), deu (German), spa (Spanish)
|
|
DEFAULT_OCR_LANGUAGES=eng
|