From ae80388ec426b70c5141458ce6482589b68002c8 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Wed, 20 Aug 2025 13:50:09 -0600 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=AF=20Add=20custom=20output=20paths=20?= =?UTF-8?q?and=20clean=20summary=20for=20image=20extraction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhance extract_images with user-specified output directories and concise summary responses to improve user control and reduce context window clutter. Key Features: • Custom Output Directory: Users can specify where images are saved • Clean Summary Output: Concise extraction results instead of verbose metadata • Automatic Directory Creation: Creates output directories as needed • File-Level Details: Individual file info with human-readable sizes • Extraction Summary: Quick overview with total size and file count New Parameters: + output_directory: Optional custom path for saving extracted images + Defaults to cache directory if not specified + Creates directories automatically with proper permissions Response Format: - Removed: Verbose image metadata arrays that fill context windows + Added: Clean summary with extraction statistics + Added: File list with essential details (filename, path, size, dimensions) + Added: Human-readable extraction summary Benefits: ✅ User control over image file locations ✅ Reduced context window pollution ✅ Essential information without verbosity ✅ Better integration with user workflows ✅ Maintains MCP resource compatibility for cached images Example Response: { "success": true, "images_extracted": 3, "total_size": "2.4 MB", "output_directory": "/path/to/custom/dir", "files": [{"filename": "page_1_image_0.png", "path": "/path/...", "size": "800 KB", "dimensions": "1920x1080"}] } 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- CLAUDE.md | 12 ++++---- src/mcp_pdf_tools/server.py | 59 +++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index c92a2f4..4dab628 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -80,16 +80,16 @@ uv publish 3. **OCR Processing**: `ocr_pdf` - Tesseract with preprocessing options 4. **Document Analysis**: `is_scanned_pdf`, `get_document_structure`, `extract_metadata` 5. **Format Conversion**: `pdf_to_markdown` - Clean markdown with MCP resource URIs for images -6. **Image Processing**: `extract_images` - Extract images with MCP resource URIs for direct client access +6. **Image Processing**: `extract_images` - Extract images with custom output paths and clean summary output ### MCP Client-Friendly Design **Optimized for MCP Context Management:** -- **Image Processing**: `extract_images` and `pdf_to_markdown` return MCP resource URIs for direct image access -- **Resource URIs**: Images accessible via `pdf-image://{image_id}` protocol for seamless client integration -- **Prevents Context Overflow**: Avoids verbose base64 output that fills client message windows -- **File-Based Storage**: Images saved to cache with metadata including file paths and human-readable sizes -- **Direct Access**: MCP clients can fetch images directly using resource URIs +- **Custom Output Paths**: `extract_images` allows users to specify where images are saved +- **Clean Summary Output**: Returns concise extraction summary instead of verbose image metadata +- **Resource URIs**: `pdf_to_markdown` uses `pdf-image://{image_id}` protocol for seamless client integration +- **Prevents Context Overflow**: Avoids verbose output that fills client message windows +- **User Control**: Flexible output directory support with automatic directory creation ### Intelligent Fallbacks diff --git a/src/mcp_pdf_tools/server.py b/src/mcp_pdf_tools/server.py index 5cf9a52..4a18bbe 100644 --- a/src/mcp_pdf_tools/server.py +++ b/src/mcp_pdf_tools/server.py @@ -792,16 +792,17 @@ async def pdf_to_markdown( return {"error": f"Conversion failed: {str(e)}"} # Image extraction -@mcp.tool(name="extract_images", description="Extract images from PDF with MCP resource URIs for direct access") +@mcp.tool(name="extract_images", description="Extract images from PDF with custom output path and clean summary") async def extract_images( pdf_path: str, pages: Optional[str] = None, # Accept as string for MCP compatibility min_width: int = 100, min_height: int = 100, - output_format: str = "png" + output_format: str = "png", + output_directory: Optional[str] = None # Custom output directory ) -> Dict[str, Any]: """ - Extract images from PDF with MCP resource access + Extract images from PDF with custom output directory and summary results Args: pdf_path: Path to PDF file or HTTPS URL @@ -809,16 +810,25 @@ async def extract_images( min_width: Minimum image width to extract min_height: Minimum image height to extract output_format: Output format (png, jpeg) + output_directory: Custom directory to save images (defaults to cache directory) Returns: - Dictionary containing image metadata and MCP resource URIs for direct access + Summary of extraction results with file locations (no verbose metadata) """ try: path = await validate_pdf_path(pdf_path) parsed_pages = parse_pages_parameter(pages) doc = fitz.open(str(path)) - images = [] + # Determine output directory + if output_directory: + output_dir = Path(output_directory) + output_dir.mkdir(parents=True, exist_ok=True) + else: + output_dir = CACHE_DIR + + extracted_files = [] + total_size = 0 page_range = parsed_pages if parsed_pages else range(len(doc)) for page_num in page_range: @@ -835,43 +845,36 @@ async def extract_images( if output_format == "jpeg" and pix.alpha: pix = fitz.Pixmap(fitz.csRGB, pix) - # Save image to file instead of embedding base64 data + # Save image to specified directory img_filename = f"page_{page_num + 1}_image_{img_index}.{output_format}" - img_path = CACHE_DIR / img_filename + img_path = output_dir / img_filename pix.save(str(img_path)) # Calculate file size file_size = img_path.stat().st_size + total_size += file_size - # Create resource URI (filename without extension) - image_id = img_filename.rsplit('.', 1)[0] # Remove extension - resource_uri = f"pdf-image://{image_id}" - - images.append({ - "page": page_num + 1, - "index": img_index, - "file_path": str(img_path), + # Add to extracted files list (summary format) + extracted_files.append({ "filename": img_filename, - "resource_uri": resource_uri, - "width": pix.width, - "height": pix.height, - "format": output_format, - "size_bytes": file_size, - "size_human": format_file_size(file_size) + "path": str(img_path), + "size": format_file_size(file_size), + "dimensions": f"{pix.width}x{pix.height}" }) pix = None doc.close() + # Return clean summary instead of verbose image metadata return { - "images": images, - "total_images": len(images), - "pages_searched": len(page_range), - "filters": { - "min_width": min_width, - "min_height": min_height - } + "success": True, + "images_extracted": len(extracted_files), + "total_size": format_file_size(total_size), + "output_directory": str(output_dir), + "pages_processed": len(page_range), + "files": extracted_files, + "extraction_summary": f"Extracted {len(extracted_files)} images ({format_file_size(total_size)}) to {output_dir}" } except Exception as e: