#!/usr/bin/env python3 """Example script to test MCP Office Tools functionality.""" import asyncio import sys import tempfile import os from pathlib import Path # Add the package to Python path for local testing sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from mcp_office_tools.server import ( extract_text, extract_images, extract_metadata, detect_office_format, analyze_document_health, get_supported_formats ) def create_sample_csv(): """Create a sample CSV file for testing.""" temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w') temp_file.write("""Name,Age,Department,Salary John Smith,30,Engineering,75000 Jane Doe,25,Marketing,65000 Bob Johnson,35,Sales,70000 Alice Brown,28,Engineering,80000 Charlie Wilson,32,HR,60000""") temp_file.close() return temp_file.name async def test_supported_formats(): """Test getting supported formats.""" print("šŸ” Testing supported formats...") try: result = await get_supported_formats() print(f"āœ… Total supported formats: {result['total_formats']}") print(f"šŸ“ Word formats: {', '.join(result['categories']['word'])}") print(f"šŸ“Š Excel formats: {', '.join(result['categories']['excel'])}") print(f"šŸŽÆ PowerPoint formats: {', '.join(result['categories']['powerpoint'])}") return True except Exception as e: print(f"āŒ Error testing supported formats: {e}") return False async def test_csv_processing(): """Test CSV file processing.""" print("\nšŸ“Š Testing CSV processing...") csv_file = create_sample_csv() try: # Test format detection print("šŸ” Detecting CSV format...") format_result = await detect_office_format(csv_file) if format_result["supported"]: print("āœ… CSV format detected and supported") # Test text extraction print("šŸ“„ Extracting text from CSV...") text_result = await extract_text(csv_file, preserve_formatting=True) print(f"āœ… Text extracted successfully") print(f"šŸ“Š Character count: {text_result['character_count']}") print(f"šŸ“Š Word count: {text_result['word_count']}") print(f"šŸ”§ Method used: {text_result['method_used']}") print(f"ā±ļø Extraction time: {text_result['extraction_time']}s") # Show sample of extracted text text_sample = text_result['text'][:200] + "..." if len(text_result['text']) > 200 else text_result['text'] print(f"šŸ“ Text sample:\n{text_sample}") # Test metadata extraction print("\nšŸ·ļø Extracting metadata...") metadata_result = await extract_metadata(csv_file) print(f"āœ… Metadata extracted") print(f"šŸ“ File size: {metadata_result['file_metadata']['file_size']} bytes") print(f"šŸ“… Format: {metadata_result['format_info']['format_name']}") # Test health analysis print("\n🩺 Analyzing document health...") health_result = await analyze_document_health(csv_file) print(f"āœ… Health analysis complete") print(f"šŸ’š Overall health: {health_result['overall_health']}") print(f"šŸ“Š Health score: {health_result['health_score']}/10") if health_result['recommendations']: print("šŸ“‹ Recommendations:") for rec in health_result['recommendations']: print(f" • {rec}") return True else: print("āŒ CSV format not supported") return False except Exception as e: print(f"āŒ Error processing CSV: {e}") import traceback traceback.print_exc() return False finally: # Clean up try: os.unlink(csv_file) except OSError: pass async def test_file_with_path(file_path): """Test processing a specific file.""" print(f"\nšŸ“ Testing file: {file_path}") if not os.path.exists(file_path): print(f"āŒ File not found: {file_path}") return False try: # Test format detection print("šŸ” Detecting file format...") format_result = await detect_office_format(file_path) print(f"šŸ“‹ Format: {format_result['format_detection']['format_name']}") print(f"šŸ“‚ Category: {format_result['format_detection']['category']}") print(f"āœ… Supported: {format_result['supported']}") if format_result["supported"]: # Test text extraction print("šŸ“„ Extracting text...") text_result = await extract_text(file_path, include_metadata=True) print(f"āœ… Text extracted successfully") print(f"šŸ“Š Character count: {text_result['character_count']}") print(f"šŸ“Š Word count: {text_result['word_count']}") print(f"šŸ”§ Method used: {text_result['method_used']}") print(f"ā±ļø Extraction time: {text_result['extraction_time']}s") # Show sample of extracted text text_sample = text_result['text'][:300] + "..." if len(text_result['text']) > 300 else text_result['text'] print(f"šŸ“ Text sample:\n{text_sample}") # Test image extraction for supported formats if format_result['format_detection']['category'] in ['word', 'excel', 'powerpoint']: print("\nšŸ–¼ļø Extracting images...") try: image_result = await extract_images(file_path, min_width=50, min_height=50) print(f"āœ… Image extraction complete") print(f"šŸ–¼ļø Images found: {image_result['image_count']}") if image_result['images']: print("šŸ“‹ Image details:") for i, img in enumerate(image_result['images'][:3]): # Show first 3 print(f" {i+1}. {img['filename']} ({img['width']}x{img['height']})") except Exception as e: print(f"āš ļø Image extraction failed: {e}") # Test health analysis print("\n🩺 Analyzing document health...") health_result = await analyze_document_health(file_path) print(f"āœ… Health analysis complete") print(f"šŸ’š Overall health: {health_result['overall_health']}") print(f"šŸ“Š Health score: {health_result['health_score']}/10") if health_result['recommendations']: print("šŸ“‹ Recommendations:") for rec in health_result['recommendations']: print(f" • {rec}") return True else: print("āŒ File format not supported by MCP Office Tools") return False except Exception as e: print(f"āŒ Error processing file: {e}") import traceback traceback.print_exc() return False async def main(): """Main test function.""" print("šŸš€ MCP Office Tools - Testing Suite") print("=" * 50) # Test supported formats success_count = 0 total_tests = 0 total_tests += 1 if await test_supported_formats(): success_count += 1 # Test CSV processing total_tests += 1 if await test_csv_processing(): success_count += 1 # Test specific file if provided if len(sys.argv) > 1: file_path = sys.argv[1] total_tests += 1 if await test_file_with_path(file_path): success_count += 1 else: print("\nšŸ’” Usage: python test_office_tools.py [path_to_office_file]") print(" Example: python test_office_tools.py document.docx") print(" Example: python test_office_tools.py spreadsheet.xlsx") # Summary print("\n" + "=" * 50) print(f"šŸ“Š Test Results: {success_count}/{total_tests} tests passed") if success_count == total_tests: print("šŸŽ‰ All tests passed! MCP Office Tools is working correctly.") return 0 else: print("āš ļø Some tests failed. Check the output above for details.") return 1 if __name__ == "__main__": exit_code = asyncio.run(main())