mcp-office-tools/examples/test_office_tools.py
Ryan Malloy 31948d6ffc
Some checks are pending
Test Dashboard / test-and-dashboard (push) Waiting to run
Rename package to mcwaddams
Named for Milton Waddams, who was relocated to the basement with
boxes of legacy documents. He handles the .doc and .xls files from
1997 that nobody else wants to touch.

- Rename package from mcp-office-tools to mcwaddams
- Update author to Ryan Malloy
- Update all imports and references
- Add Office Space themed README narrative
- All 53 tests passing
2026-01-11 11:35:35 -07:00

238 lines
8.5 KiB
Python

#!/usr/bin/env python3
"""Example script to test MCP Office Tools functionality."""
import asyncio
import sys
import tempfile
import os
from pathlib import Path
# Add the package to Python path for local testing
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from mcwaddams.server import (
extract_text,
extract_images,
extract_metadata,
detect_office_format,
analyze_document_health,
get_supported_formats
)
def create_sample_csv():
"""Create a sample CSV file for testing."""
temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w')
temp_file.write("""Name,Age,Department,Salary
John Smith,30,Engineering,75000
Jane Doe,25,Marketing,65000
Bob Johnson,35,Sales,70000
Alice Brown,28,Engineering,80000
Charlie Wilson,32,HR,60000""")
temp_file.close()
return temp_file.name
async def test_supported_formats():
"""Test getting supported formats."""
print("🔍 Testing supported formats...")
try:
result = await get_supported_formats()
print(f"✅ Total supported formats: {result['total_formats']}")
print(f"📝 Word formats: {', '.join(result['categories']['word'])}")
print(f"📊 Excel formats: {', '.join(result['categories']['excel'])}")
print(f"🎯 PowerPoint formats: {', '.join(result['categories']['powerpoint'])}")
return True
except Exception as e:
print(f"❌ Error testing supported formats: {e}")
return False
async def test_csv_processing():
"""Test CSV file processing."""
print("\n📊 Testing CSV processing...")
csv_file = create_sample_csv()
try:
# Test format detection
print("🔍 Detecting CSV format...")
format_result = await detect_office_format(csv_file)
if format_result["supported"]:
print("✅ CSV format detected and supported")
# Test text extraction
print("📄 Extracting text from CSV...")
text_result = await extract_text(csv_file, preserve_formatting=True)
print(f"✅ Text extracted successfully")
print(f"📊 Character count: {text_result['character_count']}")
print(f"📊 Word count: {text_result['word_count']}")
print(f"🔧 Method used: {text_result['method_used']}")
print(f"⏱️ Extraction time: {text_result['extraction_time']}s")
# Show sample of extracted text
text_sample = text_result['text'][:200] + "..." if len(text_result['text']) > 200 else text_result['text']
print(f"📝 Text sample:\n{text_sample}")
# Test metadata extraction
print("\n🏷️ Extracting metadata...")
metadata_result = await extract_metadata(csv_file)
print(f"✅ Metadata extracted")
print(f"📁 File size: {metadata_result['file_metadata']['file_size']} bytes")
print(f"📅 Format: {metadata_result['format_info']['format_name']}")
# Test health analysis
print("\n🩺 Analyzing document health...")
health_result = await analyze_document_health(csv_file)
print(f"✅ Health analysis complete")
print(f"💚 Overall health: {health_result['overall_health']}")
print(f"📊 Health score: {health_result['health_score']}/10")
if health_result['recommendations']:
print("📋 Recommendations:")
for rec in health_result['recommendations']:
print(f"{rec}")
return True
else:
print("❌ CSV format not supported")
return False
except Exception as e:
print(f"❌ Error processing CSV: {e}")
import traceback
traceback.print_exc()
return False
finally:
# Clean up
try:
os.unlink(csv_file)
except OSError:
pass
async def test_file_with_path(file_path):
"""Test processing a specific file."""
print(f"\n📁 Testing file: {file_path}")
if not os.path.exists(file_path):
print(f"❌ File not found: {file_path}")
return False
try:
# Test format detection
print("🔍 Detecting file format...")
format_result = await detect_office_format(file_path)
print(f"📋 Format: {format_result['format_detection']['format_name']}")
print(f"📂 Category: {format_result['format_detection']['category']}")
print(f"✅ Supported: {format_result['supported']}")
if format_result["supported"]:
# Test text extraction
print("📄 Extracting text...")
text_result = await extract_text(file_path, include_metadata=True)
print(f"✅ Text extracted successfully")
print(f"📊 Character count: {text_result['character_count']}")
print(f"📊 Word count: {text_result['word_count']}")
print(f"🔧 Method used: {text_result['method_used']}")
print(f"⏱️ Extraction time: {text_result['extraction_time']}s")
# Show sample of extracted text
text_sample = text_result['text'][:300] + "..." if len(text_result['text']) > 300 else text_result['text']
print(f"📝 Text sample:\n{text_sample}")
# Test image extraction for supported formats
if format_result['format_detection']['category'] in ['word', 'excel', 'powerpoint']:
print("\n🖼️ Extracting images...")
try:
image_result = await extract_images(file_path, min_width=50, min_height=50)
print(f"✅ Image extraction complete")
print(f"🖼️ Images found: {image_result['image_count']}")
if image_result['images']:
print("📋 Image details:")
for i, img in enumerate(image_result['images'][:3]): # Show first 3
print(f" {i+1}. {img['filename']} ({img['width']}x{img['height']})")
except Exception as e:
print(f"⚠️ Image extraction failed: {e}")
# Test health analysis
print("\n🩺 Analyzing document health...")
health_result = await analyze_document_health(file_path)
print(f"✅ Health analysis complete")
print(f"💚 Overall health: {health_result['overall_health']}")
print(f"📊 Health score: {health_result['health_score']}/10")
if health_result['recommendations']:
print("📋 Recommendations:")
for rec in health_result['recommendations']:
print(f"{rec}")
return True
else:
print("❌ File format not supported by MCP Office Tools")
return False
except Exception as e:
print(f"❌ Error processing file: {e}")
import traceback
traceback.print_exc()
return False
async def main():
"""Main test function."""
print("🚀 MCP Office Tools - Testing Suite")
print("=" * 50)
# Test supported formats
success_count = 0
total_tests = 0
total_tests += 1
if await test_supported_formats():
success_count += 1
# Test CSV processing
total_tests += 1
if await test_csv_processing():
success_count += 1
# Test specific file if provided
if len(sys.argv) > 1:
file_path = sys.argv[1]
total_tests += 1
if await test_file_with_path(file_path):
success_count += 1
else:
print("\n💡 Usage: python test_office_tools.py [path_to_office_file]")
print(" Example: python test_office_tools.py document.docx")
print(" Example: python test_office_tools.py spreadsheet.xlsx")
# Summary
print("\n" + "=" * 50)
print(f"📊 Test Results: {success_count}/{total_tests} tests passed")
if success_count == total_tests:
print("🎉 All tests passed! MCP Office Tools is working correctly.")
return 0
else:
print("⚠️ Some tests failed. Check the output above for details.")
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())