Some checks are pending
Test Dashboard / test-and-dashboard (push) Waiting to run
Named for Milton Waddams, who was relocated to the basement with boxes of legacy documents. He handles the .doc and .xls files from 1997 that nobody else wants to touch. - Rename package from mcp-office-tools to mcwaddams - Update author to Ryan Malloy - Update all imports and references - Add Office Space themed README narrative - All 53 tests passing
238 lines
8.5 KiB
Python
238 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Example script to test MCP Office Tools functionality."""
|
|
|
|
import asyncio
|
|
import sys
|
|
import tempfile
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# Add the package to Python path for local testing
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|
|
|
from mcwaddams.server import (
|
|
extract_text,
|
|
extract_images,
|
|
extract_metadata,
|
|
detect_office_format,
|
|
analyze_document_health,
|
|
get_supported_formats
|
|
)
|
|
|
|
|
|
def create_sample_csv():
|
|
"""Create a sample CSV file for testing."""
|
|
temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w')
|
|
temp_file.write("""Name,Age,Department,Salary
|
|
John Smith,30,Engineering,75000
|
|
Jane Doe,25,Marketing,65000
|
|
Bob Johnson,35,Sales,70000
|
|
Alice Brown,28,Engineering,80000
|
|
Charlie Wilson,32,HR,60000""")
|
|
temp_file.close()
|
|
return temp_file.name
|
|
|
|
|
|
async def test_supported_formats():
|
|
"""Test getting supported formats."""
|
|
print("🔍 Testing supported formats...")
|
|
|
|
try:
|
|
result = await get_supported_formats()
|
|
|
|
print(f"✅ Total supported formats: {result['total_formats']}")
|
|
print(f"📝 Word formats: {', '.join(result['categories']['word'])}")
|
|
print(f"📊 Excel formats: {', '.join(result['categories']['excel'])}")
|
|
print(f"🎯 PowerPoint formats: {', '.join(result['categories']['powerpoint'])}")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error testing supported formats: {e}")
|
|
return False
|
|
|
|
|
|
async def test_csv_processing():
|
|
"""Test CSV file processing."""
|
|
print("\n📊 Testing CSV processing...")
|
|
|
|
csv_file = create_sample_csv()
|
|
|
|
try:
|
|
# Test format detection
|
|
print("🔍 Detecting CSV format...")
|
|
format_result = await detect_office_format(csv_file)
|
|
|
|
if format_result["supported"]:
|
|
print("✅ CSV format detected and supported")
|
|
|
|
# Test text extraction
|
|
print("📄 Extracting text from CSV...")
|
|
text_result = await extract_text(csv_file, preserve_formatting=True)
|
|
|
|
print(f"✅ Text extracted successfully")
|
|
print(f"📊 Character count: {text_result['character_count']}")
|
|
print(f"📊 Word count: {text_result['word_count']}")
|
|
print(f"🔧 Method used: {text_result['method_used']}")
|
|
print(f"⏱️ Extraction time: {text_result['extraction_time']}s")
|
|
|
|
# Show sample of extracted text
|
|
text_sample = text_result['text'][:200] + "..." if len(text_result['text']) > 200 else text_result['text']
|
|
print(f"📝 Text sample:\n{text_sample}")
|
|
|
|
# Test metadata extraction
|
|
print("\n🏷️ Extracting metadata...")
|
|
metadata_result = await extract_metadata(csv_file)
|
|
|
|
print(f"✅ Metadata extracted")
|
|
print(f"📁 File size: {metadata_result['file_metadata']['file_size']} bytes")
|
|
print(f"📅 Format: {metadata_result['format_info']['format_name']}")
|
|
|
|
# Test health analysis
|
|
print("\n🩺 Analyzing document health...")
|
|
health_result = await analyze_document_health(csv_file)
|
|
|
|
print(f"✅ Health analysis complete")
|
|
print(f"💚 Overall health: {health_result['overall_health']}")
|
|
print(f"📊 Health score: {health_result['health_score']}/10")
|
|
|
|
if health_result['recommendations']:
|
|
print("📋 Recommendations:")
|
|
for rec in health_result['recommendations']:
|
|
print(f" • {rec}")
|
|
|
|
return True
|
|
else:
|
|
print("❌ CSV format not supported")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error processing CSV: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
finally:
|
|
# Clean up
|
|
try:
|
|
os.unlink(csv_file)
|
|
except OSError:
|
|
pass
|
|
|
|
|
|
async def test_file_with_path(file_path):
|
|
"""Test processing a specific file."""
|
|
print(f"\n📁 Testing file: {file_path}")
|
|
|
|
if not os.path.exists(file_path):
|
|
print(f"❌ File not found: {file_path}")
|
|
return False
|
|
|
|
try:
|
|
# Test format detection
|
|
print("🔍 Detecting file format...")
|
|
format_result = await detect_office_format(file_path)
|
|
|
|
print(f"📋 Format: {format_result['format_detection']['format_name']}")
|
|
print(f"📂 Category: {format_result['format_detection']['category']}")
|
|
print(f"✅ Supported: {format_result['supported']}")
|
|
|
|
if format_result["supported"]:
|
|
# Test text extraction
|
|
print("📄 Extracting text...")
|
|
text_result = await extract_text(file_path, include_metadata=True)
|
|
|
|
print(f"✅ Text extracted successfully")
|
|
print(f"📊 Character count: {text_result['character_count']}")
|
|
print(f"📊 Word count: {text_result['word_count']}")
|
|
print(f"🔧 Method used: {text_result['method_used']}")
|
|
print(f"⏱️ Extraction time: {text_result['extraction_time']}s")
|
|
|
|
# Show sample of extracted text
|
|
text_sample = text_result['text'][:300] + "..." if len(text_result['text']) > 300 else text_result['text']
|
|
print(f"📝 Text sample:\n{text_sample}")
|
|
|
|
# Test image extraction for supported formats
|
|
if format_result['format_detection']['category'] in ['word', 'excel', 'powerpoint']:
|
|
print("\n🖼️ Extracting images...")
|
|
try:
|
|
image_result = await extract_images(file_path, min_width=50, min_height=50)
|
|
print(f"✅ Image extraction complete")
|
|
print(f"🖼️ Images found: {image_result['image_count']}")
|
|
|
|
if image_result['images']:
|
|
print("📋 Image details:")
|
|
for i, img in enumerate(image_result['images'][:3]): # Show first 3
|
|
print(f" {i+1}. {img['filename']} ({img['width']}x{img['height']})")
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Image extraction failed: {e}")
|
|
|
|
# Test health analysis
|
|
print("\n🩺 Analyzing document health...")
|
|
health_result = await analyze_document_health(file_path)
|
|
|
|
print(f"✅ Health analysis complete")
|
|
print(f"💚 Overall health: {health_result['overall_health']}")
|
|
print(f"📊 Health score: {health_result['health_score']}/10")
|
|
|
|
if health_result['recommendations']:
|
|
print("📋 Recommendations:")
|
|
for rec in health_result['recommendations']:
|
|
print(f" • {rec}")
|
|
|
|
return True
|
|
else:
|
|
print("❌ File format not supported by MCP Office Tools")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error processing file: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
async def main():
|
|
"""Main test function."""
|
|
print("🚀 MCP Office Tools - Testing Suite")
|
|
print("=" * 50)
|
|
|
|
# Test supported formats
|
|
success_count = 0
|
|
total_tests = 0
|
|
|
|
total_tests += 1
|
|
if await test_supported_formats():
|
|
success_count += 1
|
|
|
|
# Test CSV processing
|
|
total_tests += 1
|
|
if await test_csv_processing():
|
|
success_count += 1
|
|
|
|
# Test specific file if provided
|
|
if len(sys.argv) > 1:
|
|
file_path = sys.argv[1]
|
|
total_tests += 1
|
|
if await test_file_with_path(file_path):
|
|
success_count += 1
|
|
else:
|
|
print("\n💡 Usage: python test_office_tools.py [path_to_office_file]")
|
|
print(" Example: python test_office_tools.py document.docx")
|
|
print(" Example: python test_office_tools.py spreadsheet.xlsx")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 50)
|
|
print(f"📊 Test Results: {success_count}/{total_tests} tests passed")
|
|
|
|
if success_count == total_tests:
|
|
print("🎉 All tests passed! MCP Office Tools is working correctly.")
|
|
return 0
|
|
else:
|
|
print("⚠️ Some tests failed. Check the output above for details.")
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit_code = asyncio.run(main()) |