mcp-office-tools/torture_test.py

#!/usr/bin/env python
"""
Torture test for MCP Office Tools - Tests advanced tools with real files.
This tests robustness of the MCP server against various document formats.
"""

import asyncio
import os
import sys
import warnings
import tempfile

# Suppress pandas datetime warnings for cleaner output
warnings.filterwarnings("ignore", message=".*datetime64.*")
warnings.filterwarnings("ignore", category=FutureWarning)

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))

from mcp_office_tools.mixins.excel import ExcelMixin
from mcp_office_tools.mixins.word import WordMixin


# Test files - real files from user's system
EXCEL_TEST_FILES = [
    "/home/rpm/FORScan Lite spreadsheets v1.1/FORScan Lite spreadsheet - PIDs.xlsx",
    "/home/rpm/FORScan Lite spreadsheets v1.1/FORScan Lite spreadsheet - CAN messages.xlsx",
]

WORD_TEST_FILES = [
    "/home/rpm/MeshCentral-master/docs/docs/meshcentral/debugging.md",  # Markdown as text test
]

# We'll also create synthetic test files
def create_test_xlsx(path: str):
    """Create a test Excel file with formulas and data."""
    import openpyxl
    from openpyxl.chart import BarChart, Reference

    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "Test Data"

    # Add headers
    ws["A1"] = "Category"
    ws["B1"] = "Value"
    ws["C1"] = "Formula"

    # Add data
    categories = ["Alpha", "Beta", "Gamma", "Delta", "Epsilon"]
    values = [100, 250, 175, 320, 95]

    for i, (cat, val) in enumerate(zip(categories, values), start=2):
        ws[f"A{i}"] = cat
        ws[f"B{i}"] = val
        ws[f"C{i}"] = f"=B{i}*1.1"  # Formula

    # Add summary formulas
    ws["A8"] = "Total"
    ws["B8"] = "=SUM(B2:B6)"
    ws["A9"] = "Average"
    ws["B9"] = "=AVERAGE(B2:B6)"
    ws["A10"] = "Max"
    ws["B10"] = "=MAX(B2:B6)"

    wb.save(path)
    return path


def create_test_docx(path: str):
    """Create a test Word document with headings, tables, and sections."""
    from docx import Document
    from docx.shared import Inches, Pt

    doc = Document()

    # Add title
    doc.add_heading("Test Document for Torture Testing", 0)

    # Add section with paragraphs
    doc.add_heading("Introduction", level=1)
    doc.add_paragraph("This is a test document created for torture testing the MCP Office Tools.")
    doc.add_paragraph("It contains multiple elements to test extraction capabilities.")

    # Add subheadings
    doc.add_heading("Data Overview", level=2)
    doc.add_paragraph("Below is a table of test data.")

    # Add a table
    table = doc.add_table(rows=4, cols=3)
    table.style = 'Table Grid'
    headers = ["Name", "Value", "Status"]
    for i, header in enumerate(headers):
        table.rows[0].cells[i].text = header

    data = [
        ("Item A", "100", "Active"),
        ("Item B", "200", "Pending"),
        ("Item C", "300", "Complete"),
    ]
    for row_idx, row_data in enumerate(data, start=1):
        for col_idx, cell_data in enumerate(row_data):
            table.rows[row_idx].cells[col_idx].text = cell_data

    # Add another section
    doc.add_heading("Analysis Results", level=1)
    doc.add_heading("Summary", level=2)
    doc.add_paragraph("The analysis shows positive results across all metrics.")

    doc.add_heading("Conclusion", level=1)
    doc.add_paragraph("This concludes the test document.")

    doc.save(path)
    return path


async def run_torture_tests():
    """Run comprehensive torture tests on all advanced tools."""
    print("=" * 70)
    print("📊 TORTURE TEST SUMMARY")
    print("=" * 70)

    excel_mixin = ExcelMixin()
    word_mixin = WordMixin()

    results = {}

    # Create temp directory for synthetic test files
    with tempfile.TemporaryDirectory() as tmpdir:
        test_xlsx = create_test_xlsx(os.path.join(tmpdir, "test_data.xlsx"))
        test_docx = create_test_docx(os.path.join(tmpdir, "test_document.docx"))

        # Test 1: Excel Data Analysis
        print("\n🔬 Test 1: Excel Data Analysis")
        try:
            result = await excel_mixin.analyze_excel_data(test_xlsx)
            assert "analysis" in result or "summary" in result, "Missing analysis/summary key"
            summary = result.get("summary", {})
            sheets_count = summary.get("sheets_analyzed", 1)
            print(f"   ✅ PASS - Analyzed {sheets_count} sheet(s)")
            results["Excel Data Analysis"] = True
        except Exception as e:
            print(f"   ❌ FAIL - {type(e).__name__}: {e}")
            results["Excel Data Analysis"] = False

        # Test 2: Excel Formula Extraction
        print("\n🔬 Test 2: Excel Formula Extraction")
        try:
            result = await excel_mixin.extract_excel_formulas(test_xlsx)
            assert "formulas" in result or "summary" in result, "Missing formulas/summary key"
            summary = result.get("summary", {})
            formula_count = summary.get("total_formulas", 0)
            print(f"   ✅ PASS - Extracted {formula_count} formula(s)")
            results["Excel Formula Extraction"] = True
        except Exception as e:
            print(f"   ❌ FAIL - {type(e).__name__}: {e}")
            results["Excel Formula Extraction"] = False

        # Test 3: Excel Chart Generation
        print("\n🔬 Test 3: Excel Chart Data Generation")
        try:
            # Use actual column names from the test data (headers in row 1)
            result = await excel_mixin.create_excel_chart_data(
                test_xlsx,
                x_column="Category",
                y_columns=["Value"],
                chart_type="bar"
            )
            assert "chart_configuration" in result, "Missing chart_configuration key"
            print(f"   ✅ PASS - Generated chart config with {len(result['chart_configuration'])} libraries")
            results["Excel Chart Generation"] = True
        except Exception as e:
            print(f"   ❌ FAIL - {type(e).__name__}: {e}")
            results["Excel Chart Generation"] = False

        # Test 4: Word Structure Analysis
        print("\n🔬 Test 4: Word Structure Analysis")
        try:
            result = await word_mixin.analyze_word_structure(test_docx)
            assert "structure" in result, "Missing structure key"
            heading_count = result["structure"].get("total_headings", 0)
            print(f"   ✅ PASS - Found {heading_count} heading(s)")
            results["Word Structure Analysis"] = True
        except Exception as e:
            print(f"   ❌ FAIL - {type(e).__name__}: {e}")
            results["Word Structure Analysis"] = False

        # Test 5: Word Table Extraction
        print("\n🔬 Test 5: Word Table Extraction")
        try:
            result = await word_mixin.extract_word_tables(test_docx)
            assert "tables" in result, "Missing tables key"
            table_count = result.get("total_tables", 0)
            print(f"   ✅ PASS - Extracted {table_count} table(s)")
            results["Word Table Extraction"] = True
        except Exception as e:
            print(f"   ❌ FAIL - {type(e).__name__}: {e}")
            results["Word Table Extraction"] = False

        # Test 6: Real Excel file (if available)
        print("\n🔬 Test 6: Real Excel File (FORScan spreadsheet)")
        real_excel = EXCEL_TEST_FILES[0]
        if os.path.exists(real_excel):
            try:
                result = await excel_mixin.analyze_excel_data(real_excel)
                sheets = len(result.get("sheets", []))
                print(f"   ✅ PASS - Analyzed real file with {sheets} sheet(s)")
                results["Real Excel Analysis"] = True
            except Exception as e:
                print(f"   ❌ FAIL - {type(e).__name__}: {e}")
                results["Real Excel Analysis"] = False
        else:
            print(f"   ⏭️  SKIP - File not found: {real_excel}")
            results["Real Excel Analysis"] = None

    # Summary
    print("\n" + "=" * 70)
    print("📊 TORTURE TEST SUMMARY")
    print("=" * 70)

    passed = sum(1 for v in results.values() if v is True)
    failed = sum(1 for v in results.values() if v is False)
    skipped = sum(1 for v in results.values() if v is None)

    for test_name, passed_flag in results.items():
        if passed_flag is True:
            print(f"   ✅ PASS: {test_name}")
        elif passed_flag is False:
            print(f"   ❌ FAIL: {test_name}")
        else:
            print(f"   ⏭️  SKIP: {test_name}")

    print(f"\n   Total: {passed}/{passed + failed} tests passed", end="")
    if skipped > 0:
        print(f" ({skipped} skipped)")
    else:
        print()

    return passed == (passed + failed)


if __name__ == "__main__":
    success = asyncio.run(run_torture_tests())
    sys.exit(0 if success else 1)