mcp-office-tools/examples/test_office_tools.py

#!/usr/bin/env python3
"""Example script to test MCP Office Tools functionality."""

import asyncio
import sys
import tempfile
import os
from pathlib import Path

# Add the package to Python path for local testing
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

from mcwaddams.server import (
    extract_text,
    extract_images,
    extract_metadata,
    detect_office_format,
    analyze_document_health,
    get_supported_formats
)


def create_sample_csv():
    """Create a sample CSV file for testing."""
    temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w')
    temp_file.write("""Name,Age,Department,Salary
John Smith,30,Engineering,75000
Jane Doe,25,Marketing,65000
Bob Johnson,35,Sales,70000
Alice Brown,28,Engineering,80000
Charlie Wilson,32,HR,60000""")
    temp_file.close()
    return temp_file.name


async def test_supported_formats():
    """Test getting supported formats."""
    print("🔍 Testing supported formats...")

    try:
        result = await get_supported_formats()

        print(f"✅ Total supported formats: {result['total_formats']}")
        print(f"📝 Word formats: {', '.join(result['categories']['word'])}")
        print(f"📊 Excel formats: {', '.join(result['categories']['excel'])}")
        print(f"🎯 PowerPoint formats: {', '.join(result['categories']['powerpoint'])}")

        return True

    except Exception as e:
        print(f"❌ Error testing supported formats: {e}")
        return False


async def test_csv_processing():
    """Test CSV file processing."""
    print("\n📊 Testing CSV processing...")

    csv_file = create_sample_csv()

    try:
        # Test format detection
        print("🔍 Detecting CSV format...")
        format_result = await detect_office_format(csv_file)

        if format_result["supported"]:
            print("✅ CSV format detected and supported")

            # Test text extraction
            print("📄 Extracting text from CSV...")
            text_result = await extract_text(csv_file, preserve_formatting=True)

            print(f"✅ Text extracted successfully")
            print(f"📊 Character count: {text_result['character_count']}")
            print(f"📊 Word count: {text_result['word_count']}")
            print(f"🔧 Method used: {text_result['method_used']}")
            print(f"⏱️ Extraction time: {text_result['extraction_time']}s")

            # Show sample of extracted text
            text_sample = text_result['text'][:200] + "..." if len(text_result['text']) > 200 else text_result['text']
            print(f"📝 Text sample:\n{text_sample}")

            # Test metadata extraction
            print("\n🏷️ Extracting metadata...")
            metadata_result = await extract_metadata(csv_file)

            print(f"✅ Metadata extracted")
            print(f"📁 File size: {metadata_result['file_metadata']['file_size']} bytes")
            print(f"📅 Format: {metadata_result['format_info']['format_name']}")

            # Test health analysis
            print("\n🩺 Analyzing document health...")
            health_result = await analyze_document_health(csv_file)

            print(f"✅ Health analysis complete")
            print(f"💚 Overall health: {health_result['overall_health']}")
            print(f"📊 Health score: {health_result['health_score']}/10")

            if health_result['recommendations']:
                print("📋 Recommendations:")
                for rec in health_result['recommendations']:
                    print(f"   • {rec}")

            return True
        else:
            print("❌ CSV format not supported")
            return False

    except Exception as e:
        print(f"❌ Error processing CSV: {e}")
        import traceback
        traceback.print_exc()
        return False

    finally:
        # Clean up
        try:
            os.unlink(csv_file)
        except OSError:
            pass


async def test_file_with_path(file_path):
    """Test processing a specific file."""
    print(f"\n📁 Testing file: {file_path}")

    if not os.path.exists(file_path):
        print(f"❌ File not found: {file_path}")
        return False

    try:
        # Test format detection
        print("🔍 Detecting file format...")
        format_result = await detect_office_format(file_path)

        print(f"📋 Format: {format_result['format_detection']['format_name']}")
        print(f"📂 Category: {format_result['format_detection']['category']}")
        print(f"✅ Supported: {format_result['supported']}")

        if format_result["supported"]:
            # Test text extraction
            print("📄 Extracting text...")
            text_result = await extract_text(file_path, include_metadata=True)

            print(f"✅ Text extracted successfully")
            print(f"📊 Character count: {text_result['character_count']}")
            print(f"📊 Word count: {text_result['word_count']}")
            print(f"🔧 Method used: {text_result['method_used']}")
            print(f"⏱️ Extraction time: {text_result['extraction_time']}s")

            # Show sample of extracted text
            text_sample = text_result['text'][:300] + "..." if len(text_result['text']) > 300 else text_result['text']
            print(f"📝 Text sample:\n{text_sample}")

            # Test image extraction for supported formats
            if format_result['format_detection']['category'] in ['word', 'excel', 'powerpoint']:
                print("\n🖼️ Extracting images...")
                try:
                    image_result = await extract_images(file_path, min_width=50, min_height=50)
                    print(f"✅ Image extraction complete")
                    print(f"🖼️ Images found: {image_result['image_count']}")

                    if image_result['images']:
                        print("📋 Image details:")
                        for i, img in enumerate(image_result['images'][:3]):  # Show first 3
                            print(f"   {i+1}. {img['filename']} ({img['width']}x{img['height']})")

                except Exception as e:
                    print(f"⚠️ Image extraction failed: {e}")

            # Test health analysis
            print("\n🩺 Analyzing document health...")
            health_result = await analyze_document_health(file_path)

            print(f"✅ Health analysis complete")
            print(f"💚 Overall health: {health_result['overall_health']}")
            print(f"📊 Health score: {health_result['health_score']}/10")

            if health_result['recommendations']:
                print("📋 Recommendations:")
                for rec in health_result['recommendations']:
                    print(f"   • {rec}")

            return True
        else:
            print("❌ File format not supported by MCP Office Tools")
            return False

    except Exception as e:
        print(f"❌ Error processing file: {e}")
        import traceback
        traceback.print_exc()
        return False


async def main():
    """Main test function."""
    print("🚀 MCP Office Tools - Testing Suite")
    print("=" * 50)

    # Test supported formats
    success_count = 0
    total_tests = 0

    total_tests += 1
    if await test_supported_formats():
        success_count += 1

    # Test CSV processing
    total_tests += 1
    if await test_csv_processing():
        success_count += 1

    # Test specific file if provided
    if len(sys.argv) > 1:
        file_path = sys.argv[1]
        total_tests += 1
        if await test_file_with_path(file_path):
            success_count += 1
    else:
        print("\n💡 Usage: python test_office_tools.py [path_to_office_file]")
        print("   Example: python test_office_tools.py document.docx")
        print("   Example: python test_office_tools.py spreadsheet.xlsx")

    # Summary
    print("\n" + "=" * 50)
    print(f"📊 Test Results: {success_count}/{total_tests} tests passed")

    if success_count == total_tests:
        print("🎉 All tests passed! MCP Office Tools is working correctly.")
        return 0
    else:
        print("⚠️ Some tests failed. Check the output above for details.")
        return 1


if __name__ == "__main__":
    exit_code = asyncio.run(main())