- Comprehensive Microsoft Office document processing server
- Support for Word (.docx, .doc), Excel (.xlsx, .xls), PowerPoint (.pptx, .ppt), CSV
- 6 universal tools: extract_text, extract_images, extract_metadata, detect_office_format, analyze_document_health, get_supported_formats
- Multi-library fallback system for robust processing
- URL support with intelligent caching
- Legacy Office format support (97-2003)
- FastMCP integration with async architecture
- Production ready with comprehensive documentation
🤖 Generated with Claude Code (claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
238 lines
8.5 KiB
Python
238 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Example script to test MCP Office Tools functionality."""
|
|
|
|
import asyncio
|
|
import sys
|
|
import tempfile
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# Add the package to Python path for local testing
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|
|
|
from mcp_office_tools.server import (
|
|
extract_text,
|
|
extract_images,
|
|
extract_metadata,
|
|
detect_office_format,
|
|
analyze_document_health,
|
|
get_supported_formats
|
|
)
|
|
|
|
|
|
def create_sample_csv():
|
|
"""Create a sample CSV file for testing."""
|
|
temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w')
|
|
temp_file.write("""Name,Age,Department,Salary
|
|
John Smith,30,Engineering,75000
|
|
Jane Doe,25,Marketing,65000
|
|
Bob Johnson,35,Sales,70000
|
|
Alice Brown,28,Engineering,80000
|
|
Charlie Wilson,32,HR,60000""")
|
|
temp_file.close()
|
|
return temp_file.name
|
|
|
|
|
|
async def test_supported_formats():
|
|
"""Test getting supported formats."""
|
|
print("🔍 Testing supported formats...")
|
|
|
|
try:
|
|
result = await get_supported_formats()
|
|
|
|
print(f"✅ Total supported formats: {result['total_formats']}")
|
|
print(f"📝 Word formats: {', '.join(result['categories']['word'])}")
|
|
print(f"📊 Excel formats: {', '.join(result['categories']['excel'])}")
|
|
print(f"🎯 PowerPoint formats: {', '.join(result['categories']['powerpoint'])}")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error testing supported formats: {e}")
|
|
return False
|
|
|
|
|
|
async def test_csv_processing():
|
|
"""Test CSV file processing."""
|
|
print("\n📊 Testing CSV processing...")
|
|
|
|
csv_file = create_sample_csv()
|
|
|
|
try:
|
|
# Test format detection
|
|
print("🔍 Detecting CSV format...")
|
|
format_result = await detect_office_format(csv_file)
|
|
|
|
if format_result["supported"]:
|
|
print("✅ CSV format detected and supported")
|
|
|
|
# Test text extraction
|
|
print("📄 Extracting text from CSV...")
|
|
text_result = await extract_text(csv_file, preserve_formatting=True)
|
|
|
|
print(f"✅ Text extracted successfully")
|
|
print(f"📊 Character count: {text_result['character_count']}")
|
|
print(f"📊 Word count: {text_result['word_count']}")
|
|
print(f"🔧 Method used: {text_result['method_used']}")
|
|
print(f"⏱️ Extraction time: {text_result['extraction_time']}s")
|
|
|
|
# Show sample of extracted text
|
|
text_sample = text_result['text'][:200] + "..." if len(text_result['text']) > 200 else text_result['text']
|
|
print(f"📝 Text sample:\n{text_sample}")
|
|
|
|
# Test metadata extraction
|
|
print("\n🏷️ Extracting metadata...")
|
|
metadata_result = await extract_metadata(csv_file)
|
|
|
|
print(f"✅ Metadata extracted")
|
|
print(f"📁 File size: {metadata_result['file_metadata']['file_size']} bytes")
|
|
print(f"📅 Format: {metadata_result['format_info']['format_name']}")
|
|
|
|
# Test health analysis
|
|
print("\n🩺 Analyzing document health...")
|
|
health_result = await analyze_document_health(csv_file)
|
|
|
|
print(f"✅ Health analysis complete")
|
|
print(f"💚 Overall health: {health_result['overall_health']}")
|
|
print(f"📊 Health score: {health_result['health_score']}/10")
|
|
|
|
if health_result['recommendations']:
|
|
print("📋 Recommendations:")
|
|
for rec in health_result['recommendations']:
|
|
print(f" • {rec}")
|
|
|
|
return True
|
|
else:
|
|
print("❌ CSV format not supported")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error processing CSV: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
finally:
|
|
# Clean up
|
|
try:
|
|
os.unlink(csv_file)
|
|
except OSError:
|
|
pass
|
|
|
|
|
|
async def test_file_with_path(file_path):
|
|
"""Test processing a specific file."""
|
|
print(f"\n📁 Testing file: {file_path}")
|
|
|
|
if not os.path.exists(file_path):
|
|
print(f"❌ File not found: {file_path}")
|
|
return False
|
|
|
|
try:
|
|
# Test format detection
|
|
print("🔍 Detecting file format...")
|
|
format_result = await detect_office_format(file_path)
|
|
|
|
print(f"📋 Format: {format_result['format_detection']['format_name']}")
|
|
print(f"📂 Category: {format_result['format_detection']['category']}")
|
|
print(f"✅ Supported: {format_result['supported']}")
|
|
|
|
if format_result["supported"]:
|
|
# Test text extraction
|
|
print("📄 Extracting text...")
|
|
text_result = await extract_text(file_path, include_metadata=True)
|
|
|
|
print(f"✅ Text extracted successfully")
|
|
print(f"📊 Character count: {text_result['character_count']}")
|
|
print(f"📊 Word count: {text_result['word_count']}")
|
|
print(f"🔧 Method used: {text_result['method_used']}")
|
|
print(f"⏱️ Extraction time: {text_result['extraction_time']}s")
|
|
|
|
# Show sample of extracted text
|
|
text_sample = text_result['text'][:300] + "..." if len(text_result['text']) > 300 else text_result['text']
|
|
print(f"📝 Text sample:\n{text_sample}")
|
|
|
|
# Test image extraction for supported formats
|
|
if format_result['format_detection']['category'] in ['word', 'excel', 'powerpoint']:
|
|
print("\n🖼️ Extracting images...")
|
|
try:
|
|
image_result = await extract_images(file_path, min_width=50, min_height=50)
|
|
print(f"✅ Image extraction complete")
|
|
print(f"🖼️ Images found: {image_result['image_count']}")
|
|
|
|
if image_result['images']:
|
|
print("📋 Image details:")
|
|
for i, img in enumerate(image_result['images'][:3]): # Show first 3
|
|
print(f" {i+1}. {img['filename']} ({img['width']}x{img['height']})")
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Image extraction failed: {e}")
|
|
|
|
# Test health analysis
|
|
print("\n🩺 Analyzing document health...")
|
|
health_result = await analyze_document_health(file_path)
|
|
|
|
print(f"✅ Health analysis complete")
|
|
print(f"💚 Overall health: {health_result['overall_health']}")
|
|
print(f"📊 Health score: {health_result['health_score']}/10")
|
|
|
|
if health_result['recommendations']:
|
|
print("📋 Recommendations:")
|
|
for rec in health_result['recommendations']:
|
|
print(f" • {rec}")
|
|
|
|
return True
|
|
else:
|
|
print("❌ File format not supported by MCP Office Tools")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error processing file: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
async def main():
|
|
"""Main test function."""
|
|
print("🚀 MCP Office Tools - Testing Suite")
|
|
print("=" * 50)
|
|
|
|
# Test supported formats
|
|
success_count = 0
|
|
total_tests = 0
|
|
|
|
total_tests += 1
|
|
if await test_supported_formats():
|
|
success_count += 1
|
|
|
|
# Test CSV processing
|
|
total_tests += 1
|
|
if await test_csv_processing():
|
|
success_count += 1
|
|
|
|
# Test specific file if provided
|
|
if len(sys.argv) > 1:
|
|
file_path = sys.argv[1]
|
|
total_tests += 1
|
|
if await test_file_with_path(file_path):
|
|
success_count += 1
|
|
else:
|
|
print("\n💡 Usage: python test_office_tools.py [path_to_office_file]")
|
|
print(" Example: python test_office_tools.py document.docx")
|
|
print(" Example: python test_office_tools.py spreadsheet.xlsx")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 50)
|
|
print(f"📊 Test Results: {success_count}/{total_tests} tests passed")
|
|
|
|
if success_count == total_tests:
|
|
print("🎉 All tests passed! MCP Office Tools is working correctly.")
|
|
return 0
|
|
else:
|
|
print("⚠️ Some tests failed. Check the output above for details.")
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit_code = asyncio.run(main()) |