🎯 Add custom output paths and clean summary for image extraction
Enhance extract_images with user-specified output directories and concise summary responses to improve user control and reduce context window clutter. Key Features: • Custom Output Directory: Users can specify where images are saved • Clean Summary Output: Concise extraction results instead of verbose metadata • Automatic Directory Creation: Creates output directories as needed • File-Level Details: Individual file info with human-readable sizes • Extraction Summary: Quick overview with total size and file count New Parameters: + output_directory: Optional custom path for saving extracted images + Defaults to cache directory if not specified + Creates directories automatically with proper permissions Response Format: - Removed: Verbose image metadata arrays that fill context windows + Added: Clean summary with extraction statistics + Added: File list with essential details (filename, path, size, dimensions) + Added: Human-readable extraction summary Benefits: ✅ User control over image file locations ✅ Reduced context window pollution ✅ Essential information without verbosity ✅ Better integration with user workflows ✅ Maintains MCP resource compatibility for cached images Example Response: { "success": true, "images_extracted": 3, "total_size": "2.4 MB", "output_directory": "/path/to/custom/dir", "files": [{"filename": "page_1_image_0.png", "path": "/path/...", "size": "800 KB", "dimensions": "1920x1080"}] } 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
e087a3b7a0
commit
ae80388ec4
12
CLAUDE.md
12
CLAUDE.md
@ -80,16 +80,16 @@ uv publish
|
||||
3. **OCR Processing**: `ocr_pdf` - Tesseract with preprocessing options
|
||||
4. **Document Analysis**: `is_scanned_pdf`, `get_document_structure`, `extract_metadata`
|
||||
5. **Format Conversion**: `pdf_to_markdown` - Clean markdown with MCP resource URIs for images
|
||||
6. **Image Processing**: `extract_images` - Extract images with MCP resource URIs for direct client access
|
||||
6. **Image Processing**: `extract_images` - Extract images with custom output paths and clean summary output
|
||||
|
||||
### MCP Client-Friendly Design
|
||||
|
||||
**Optimized for MCP Context Management:**
|
||||
- **Image Processing**: `extract_images` and `pdf_to_markdown` return MCP resource URIs for direct image access
|
||||
- **Resource URIs**: Images accessible via `pdf-image://{image_id}` protocol for seamless client integration
|
||||
- **Prevents Context Overflow**: Avoids verbose base64 output that fills client message windows
|
||||
- **File-Based Storage**: Images saved to cache with metadata including file paths and human-readable sizes
|
||||
- **Direct Access**: MCP clients can fetch images directly using resource URIs
|
||||
- **Custom Output Paths**: `extract_images` allows users to specify where images are saved
|
||||
- **Clean Summary Output**: Returns concise extraction summary instead of verbose image metadata
|
||||
- **Resource URIs**: `pdf_to_markdown` uses `pdf-image://{image_id}` protocol for seamless client integration
|
||||
- **Prevents Context Overflow**: Avoids verbose output that fills client message windows
|
||||
- **User Control**: Flexible output directory support with automatic directory creation
|
||||
|
||||
### Intelligent Fallbacks
|
||||
|
||||
|
@ -792,16 +792,17 @@ async def pdf_to_markdown(
|
||||
return {"error": f"Conversion failed: {str(e)}"}
|
||||
|
||||
# Image extraction
|
||||
@mcp.tool(name="extract_images", description="Extract images from PDF with MCP resource URIs for direct access")
|
||||
@mcp.tool(name="extract_images", description="Extract images from PDF with custom output path and clean summary")
|
||||
async def extract_images(
|
||||
pdf_path: str,
|
||||
pages: Optional[str] = None, # Accept as string for MCP compatibility
|
||||
min_width: int = 100,
|
||||
min_height: int = 100,
|
||||
output_format: str = "png"
|
||||
output_format: str = "png",
|
||||
output_directory: Optional[str] = None # Custom output directory
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract images from PDF with MCP resource access
|
||||
Extract images from PDF with custom output directory and summary results
|
||||
|
||||
Args:
|
||||
pdf_path: Path to PDF file or HTTPS URL
|
||||
@ -809,16 +810,25 @@ async def extract_images(
|
||||
min_width: Minimum image width to extract
|
||||
min_height: Minimum image height to extract
|
||||
output_format: Output format (png, jpeg)
|
||||
output_directory: Custom directory to save images (defaults to cache directory)
|
||||
|
||||
Returns:
|
||||
Dictionary containing image metadata and MCP resource URIs for direct access
|
||||
Summary of extraction results with file locations (no verbose metadata)
|
||||
"""
|
||||
try:
|
||||
path = await validate_pdf_path(pdf_path)
|
||||
parsed_pages = parse_pages_parameter(pages)
|
||||
doc = fitz.open(str(path))
|
||||
|
||||
images = []
|
||||
# Determine output directory
|
||||
if output_directory:
|
||||
output_dir = Path(output_directory)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
else:
|
||||
output_dir = CACHE_DIR
|
||||
|
||||
extracted_files = []
|
||||
total_size = 0
|
||||
page_range = parsed_pages if parsed_pages else range(len(doc))
|
||||
|
||||
for page_num in page_range:
|
||||
@ -835,43 +845,36 @@ async def extract_images(
|
||||
if output_format == "jpeg" and pix.alpha:
|
||||
pix = fitz.Pixmap(fitz.csRGB, pix)
|
||||
|
||||
# Save image to file instead of embedding base64 data
|
||||
# Save image to specified directory
|
||||
img_filename = f"page_{page_num + 1}_image_{img_index}.{output_format}"
|
||||
img_path = CACHE_DIR / img_filename
|
||||
img_path = output_dir / img_filename
|
||||
pix.save(str(img_path))
|
||||
|
||||
# Calculate file size
|
||||
file_size = img_path.stat().st_size
|
||||
total_size += file_size
|
||||
|
||||
# Create resource URI (filename without extension)
|
||||
image_id = img_filename.rsplit('.', 1)[0] # Remove extension
|
||||
resource_uri = f"pdf-image://{image_id}"
|
||||
|
||||
images.append({
|
||||
"page": page_num + 1,
|
||||
"index": img_index,
|
||||
"file_path": str(img_path),
|
||||
# Add to extracted files list (summary format)
|
||||
extracted_files.append({
|
||||
"filename": img_filename,
|
||||
"resource_uri": resource_uri,
|
||||
"width": pix.width,
|
||||
"height": pix.height,
|
||||
"format": output_format,
|
||||
"size_bytes": file_size,
|
||||
"size_human": format_file_size(file_size)
|
||||
"path": str(img_path),
|
||||
"size": format_file_size(file_size),
|
||||
"dimensions": f"{pix.width}x{pix.height}"
|
||||
})
|
||||
|
||||
pix = None
|
||||
|
||||
doc.close()
|
||||
|
||||
# Return clean summary instead of verbose image metadata
|
||||
return {
|
||||
"images": images,
|
||||
"total_images": len(images),
|
||||
"pages_searched": len(page_range),
|
||||
"filters": {
|
||||
"min_width": min_width,
|
||||
"min_height": min_height
|
||||
}
|
||||
"success": True,
|
||||
"images_extracted": len(extracted_files),
|
||||
"total_size": format_file_size(total_size),
|
||||
"output_directory": str(output_dir),
|
||||
"pages_processed": len(page_range),
|
||||
"files": extracted_files,
|
||||
"extraction_summary": f"Extracted {len(extracted_files)} images ({format_file_size(total_size)}) to {output_dir}"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
Loading…
x
Reference in New Issue
Block a user