mcp-office-tools/examples/test_office_tools.py
Ryan Malloy b681cb030b Initial commit: MCP Office Tools v0.1.0
- Comprehensive Microsoft Office document processing server
- Support for Word (.docx, .doc), Excel (.xlsx, .xls), PowerPoint (.pptx, .ppt), CSV
- 6 universal tools: extract_text, extract_images, extract_metadata, detect_office_format, analyze_document_health, get_supported_formats
- Multi-library fallback system for robust processing
- URL support with intelligent caching
- Legacy Office format support (97-2003)
- FastMCP integration with async architecture
- Production ready with comprehensive documentation

🤖 Generated with Claude Code (claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 01:01:48 -06:00

238 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""Example script to test MCP Office Tools functionality."""
import asyncio
import sys
import tempfile
import os
from pathlib import Path
# Add the package to Python path for local testing
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from mcp_office_tools.server import (
extract_text,
extract_images,
extract_metadata,
detect_office_format,
analyze_document_health,
get_supported_formats
)
def create_sample_csv():
"""Create a sample CSV file for testing."""
temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w')
temp_file.write("""Name,Age,Department,Salary
John Smith,30,Engineering,75000
Jane Doe,25,Marketing,65000
Bob Johnson,35,Sales,70000
Alice Brown,28,Engineering,80000
Charlie Wilson,32,HR,60000""")
temp_file.close()
return temp_file.name
async def test_supported_formats():
"""Test getting supported formats."""
print("🔍 Testing supported formats...")
try:
result = await get_supported_formats()
print(f"✅ Total supported formats: {result['total_formats']}")
print(f"📝 Word formats: {', '.join(result['categories']['word'])}")
print(f"📊 Excel formats: {', '.join(result['categories']['excel'])}")
print(f"🎯 PowerPoint formats: {', '.join(result['categories']['powerpoint'])}")
return True
except Exception as e:
print(f"❌ Error testing supported formats: {e}")
return False
async def test_csv_processing():
"""Test CSV file processing."""
print("\n📊 Testing CSV processing...")
csv_file = create_sample_csv()
try:
# Test format detection
print("🔍 Detecting CSV format...")
format_result = await detect_office_format(csv_file)
if format_result["supported"]:
print("✅ CSV format detected and supported")
# Test text extraction
print("📄 Extracting text from CSV...")
text_result = await extract_text(csv_file, preserve_formatting=True)
print(f"✅ Text extracted successfully")
print(f"📊 Character count: {text_result['character_count']}")
print(f"📊 Word count: {text_result['word_count']}")
print(f"🔧 Method used: {text_result['method_used']}")
print(f"⏱️ Extraction time: {text_result['extraction_time']}s")
# Show sample of extracted text
text_sample = text_result['text'][:200] + "..." if len(text_result['text']) > 200 else text_result['text']
print(f"📝 Text sample:\n{text_sample}")
# Test metadata extraction
print("\n🏷️ Extracting metadata...")
metadata_result = await extract_metadata(csv_file)
print(f"✅ Metadata extracted")
print(f"📁 File size: {metadata_result['file_metadata']['file_size']} bytes")
print(f"📅 Format: {metadata_result['format_info']['format_name']}")
# Test health analysis
print("\n🩺 Analyzing document health...")
health_result = await analyze_document_health(csv_file)
print(f"✅ Health analysis complete")
print(f"💚 Overall health: {health_result['overall_health']}")
print(f"📊 Health score: {health_result['health_score']}/10")
if health_result['recommendations']:
print("📋 Recommendations:")
for rec in health_result['recommendations']:
print(f"{rec}")
return True
else:
print("❌ CSV format not supported")
return False
except Exception as e:
print(f"❌ Error processing CSV: {e}")
import traceback
traceback.print_exc()
return False
finally:
# Clean up
try:
os.unlink(csv_file)
except OSError:
pass
async def test_file_with_path(file_path):
"""Test processing a specific file."""
print(f"\n📁 Testing file: {file_path}")
if not os.path.exists(file_path):
print(f"❌ File not found: {file_path}")
return False
try:
# Test format detection
print("🔍 Detecting file format...")
format_result = await detect_office_format(file_path)
print(f"📋 Format: {format_result['format_detection']['format_name']}")
print(f"📂 Category: {format_result['format_detection']['category']}")
print(f"✅ Supported: {format_result['supported']}")
if format_result["supported"]:
# Test text extraction
print("📄 Extracting text...")
text_result = await extract_text(file_path, include_metadata=True)
print(f"✅ Text extracted successfully")
print(f"📊 Character count: {text_result['character_count']}")
print(f"📊 Word count: {text_result['word_count']}")
print(f"🔧 Method used: {text_result['method_used']}")
print(f"⏱️ Extraction time: {text_result['extraction_time']}s")
# Show sample of extracted text
text_sample = text_result['text'][:300] + "..." if len(text_result['text']) > 300 else text_result['text']
print(f"📝 Text sample:\n{text_sample}")
# Test image extraction for supported formats
if format_result['format_detection']['category'] in ['word', 'excel', 'powerpoint']:
print("\n🖼️ Extracting images...")
try:
image_result = await extract_images(file_path, min_width=50, min_height=50)
print(f"✅ Image extraction complete")
print(f"🖼️ Images found: {image_result['image_count']}")
if image_result['images']:
print("📋 Image details:")
for i, img in enumerate(image_result['images'][:3]): # Show first 3
print(f" {i+1}. {img['filename']} ({img['width']}x{img['height']})")
except Exception as e:
print(f"⚠️ Image extraction failed: {e}")
# Test health analysis
print("\n🩺 Analyzing document health...")
health_result = await analyze_document_health(file_path)
print(f"✅ Health analysis complete")
print(f"💚 Overall health: {health_result['overall_health']}")
print(f"📊 Health score: {health_result['health_score']}/10")
if health_result['recommendations']:
print("📋 Recommendations:")
for rec in health_result['recommendations']:
print(f"{rec}")
return True
else:
print("❌ File format not supported by MCP Office Tools")
return False
except Exception as e:
print(f"❌ Error processing file: {e}")
import traceback
traceback.print_exc()
return False
async def main():
"""Main test function."""
print("🚀 MCP Office Tools - Testing Suite")
print("=" * 50)
# Test supported formats
success_count = 0
total_tests = 0
total_tests += 1
if await test_supported_formats():
success_count += 1
# Test CSV processing
total_tests += 1
if await test_csv_processing():
success_count += 1
# Test specific file if provided
if len(sys.argv) > 1:
file_path = sys.argv[1]
total_tests += 1
if await test_file_with_path(file_path):
success_count += 1
else:
print("\n💡 Usage: python test_office_tools.py [path_to_office_file]")
print(" Example: python test_office_tools.py document.docx")
print(" Example: python test_office_tools.py spreadsheet.xlsx")
# Summary
print("\n" + "=" * 50)
print(f"📊 Test Results: {success_count}/{total_tests} tests passed")
if success_count == total_tests:
print("🎉 All tests passed! MCP Office Tools is working correctly.")
return 0
else:
print("⚠️ Some tests failed. Check the output above for details.")
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())