mcp-office-tools/torture_test.py
Ryan Malloy 76c7a0b2d0 Add decorators for field defaults and error handling, fix Excel performance
- Create @resolve_field_defaults decorator to handle Pydantic FieldInfo
  objects when tools are called directly (outside MCP framework)
- Create @handle_office_errors decorator for consistent error wrapping
- Apply decorators to Excel and Word mixins, removing ~100 lines of
  boilerplate code
- Fix Excel formula extraction performance: load workbooks once before
  loop instead of per-cell (100x faster with calculated values)
- Update test suite to use correct mock patch paths (patch where names
  are looked up, not where defined)
- Add torture_test.py for real document validation
2026-01-10 23:51:30 -07:00

245 lines
8.6 KiB
Python

#!/usr/bin/env python
"""
Torture test for MCP Office Tools - Tests advanced tools with real files.
This tests robustness of the MCP server against various document formats.
"""
import asyncio
import os
import sys
import warnings
import tempfile
# Suppress pandas datetime warnings for cleaner output
warnings.filterwarnings("ignore", message=".*datetime64.*")
warnings.filterwarnings("ignore", category=FutureWarning)
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
from mcp_office_tools.mixins.excel import ExcelMixin
from mcp_office_tools.mixins.word import WordMixin
# Test files - real files from user's system
EXCEL_TEST_FILES = [
"/home/rpm/FORScan Lite spreadsheets v1.1/FORScan Lite spreadsheet - PIDs.xlsx",
"/home/rpm/FORScan Lite spreadsheets v1.1/FORScan Lite spreadsheet - CAN messages.xlsx",
]
WORD_TEST_FILES = [
"/home/rpm/MeshCentral-master/docs/docs/meshcentral/debugging.md", # Markdown as text test
]
# We'll also create synthetic test files
def create_test_xlsx(path: str):
"""Create a test Excel file with formulas and data."""
import openpyxl
from openpyxl.chart import BarChart, Reference
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Test Data"
# Add headers
ws["A1"] = "Category"
ws["B1"] = "Value"
ws["C1"] = "Formula"
# Add data
categories = ["Alpha", "Beta", "Gamma", "Delta", "Epsilon"]
values = [100, 250, 175, 320, 95]
for i, (cat, val) in enumerate(zip(categories, values), start=2):
ws[f"A{i}"] = cat
ws[f"B{i}"] = val
ws[f"C{i}"] = f"=B{i}*1.1" # Formula
# Add summary formulas
ws["A8"] = "Total"
ws["B8"] = "=SUM(B2:B6)"
ws["A9"] = "Average"
ws["B9"] = "=AVERAGE(B2:B6)"
ws["A10"] = "Max"
ws["B10"] = "=MAX(B2:B6)"
wb.save(path)
return path
def create_test_docx(path: str):
"""Create a test Word document with headings, tables, and sections."""
from docx import Document
from docx.shared import Inches, Pt
doc = Document()
# Add title
doc.add_heading("Test Document for Torture Testing", 0)
# Add section with paragraphs
doc.add_heading("Introduction", level=1)
doc.add_paragraph("This is a test document created for torture testing the MCP Office Tools.")
doc.add_paragraph("It contains multiple elements to test extraction capabilities.")
# Add subheadings
doc.add_heading("Data Overview", level=2)
doc.add_paragraph("Below is a table of test data.")
# Add a table
table = doc.add_table(rows=4, cols=3)
table.style = 'Table Grid'
headers = ["Name", "Value", "Status"]
for i, header in enumerate(headers):
table.rows[0].cells[i].text = header
data = [
("Item A", "100", "Active"),
("Item B", "200", "Pending"),
("Item C", "300", "Complete"),
]
for row_idx, row_data in enumerate(data, start=1):
for col_idx, cell_data in enumerate(row_data):
table.rows[row_idx].cells[col_idx].text = cell_data
# Add another section
doc.add_heading("Analysis Results", level=1)
doc.add_heading("Summary", level=2)
doc.add_paragraph("The analysis shows positive results across all metrics.")
doc.add_heading("Conclusion", level=1)
doc.add_paragraph("This concludes the test document.")
doc.save(path)
return path
async def run_torture_tests():
"""Run comprehensive torture tests on all advanced tools."""
print("=" * 70)
print("📊 TORTURE TEST SUMMARY")
print("=" * 70)
excel_mixin = ExcelMixin()
word_mixin = WordMixin()
results = {}
# Create temp directory for synthetic test files
with tempfile.TemporaryDirectory() as tmpdir:
test_xlsx = create_test_xlsx(os.path.join(tmpdir, "test_data.xlsx"))
test_docx = create_test_docx(os.path.join(tmpdir, "test_document.docx"))
# Test 1: Excel Data Analysis
print("\n🔬 Test 1: Excel Data Analysis")
try:
result = await excel_mixin.analyze_excel_data(test_xlsx)
assert "analysis" in result or "summary" in result, "Missing analysis/summary key"
summary = result.get("summary", {})
sheets_count = summary.get("sheets_analyzed", 1)
print(f" ✅ PASS - Analyzed {sheets_count} sheet(s)")
results["Excel Data Analysis"] = True
except Exception as e:
print(f" ❌ FAIL - {type(e).__name__}: {e}")
results["Excel Data Analysis"] = False
# Test 2: Excel Formula Extraction
print("\n🔬 Test 2: Excel Formula Extraction")
try:
result = await excel_mixin.extract_excel_formulas(test_xlsx)
assert "formulas" in result or "summary" in result, "Missing formulas/summary key"
summary = result.get("summary", {})
formula_count = summary.get("total_formulas", 0)
print(f" ✅ PASS - Extracted {formula_count} formula(s)")
results["Excel Formula Extraction"] = True
except Exception as e:
print(f" ❌ FAIL - {type(e).__name__}: {e}")
results["Excel Formula Extraction"] = False
# Test 3: Excel Chart Generation
print("\n🔬 Test 3: Excel Chart Data Generation")
try:
# Use actual column names from the test data (headers in row 1)
result = await excel_mixin.create_excel_chart_data(
test_xlsx,
x_column="Category",
y_columns=["Value"],
chart_type="bar"
)
assert "chart_configuration" in result, "Missing chart_configuration key"
print(f" ✅ PASS - Generated chart config with {len(result['chart_configuration'])} libraries")
results["Excel Chart Generation"] = True
except Exception as e:
print(f" ❌ FAIL - {type(e).__name__}: {e}")
results["Excel Chart Generation"] = False
# Test 4: Word Structure Analysis
print("\n🔬 Test 4: Word Structure Analysis")
try:
result = await word_mixin.analyze_word_structure(test_docx)
assert "structure" in result, "Missing structure key"
heading_count = result["structure"].get("total_headings", 0)
print(f" ✅ PASS - Found {heading_count} heading(s)")
results["Word Structure Analysis"] = True
except Exception as e:
print(f" ❌ FAIL - {type(e).__name__}: {e}")
results["Word Structure Analysis"] = False
# Test 5: Word Table Extraction
print("\n🔬 Test 5: Word Table Extraction")
try:
result = await word_mixin.extract_word_tables(test_docx)
assert "tables" in result, "Missing tables key"
table_count = result.get("total_tables", 0)
print(f" ✅ PASS - Extracted {table_count} table(s)")
results["Word Table Extraction"] = True
except Exception as e:
print(f" ❌ FAIL - {type(e).__name__}: {e}")
results["Word Table Extraction"] = False
# Test 6: Real Excel file (if available)
print("\n🔬 Test 6: Real Excel File (FORScan spreadsheet)")
real_excel = EXCEL_TEST_FILES[0]
if os.path.exists(real_excel):
try:
result = await excel_mixin.analyze_excel_data(real_excel)
sheets = len(result.get("sheets", []))
print(f" ✅ PASS - Analyzed real file with {sheets} sheet(s)")
results["Real Excel Analysis"] = True
except Exception as e:
print(f" ❌ FAIL - {type(e).__name__}: {e}")
results["Real Excel Analysis"] = False
else:
print(f" ⏭️ SKIP - File not found: {real_excel}")
results["Real Excel Analysis"] = None
# Summary
print("\n" + "=" * 70)
print("📊 TORTURE TEST SUMMARY")
print("=" * 70)
passed = sum(1 for v in results.values() if v is True)
failed = sum(1 for v in results.values() if v is False)
skipped = sum(1 for v in results.values() if v is None)
for test_name, passed_flag in results.items():
if passed_flag is True:
print(f" ✅ PASS: {test_name}")
elif passed_flag is False:
print(f" ❌ FAIL: {test_name}")
else:
print(f" ⏭️ SKIP: {test_name}")
print(f"\n Total: {passed}/{passed + failed} tests passed", end="")
if skipped > 0:
print(f" ({skipped} skipped)")
else:
print()
return passed == (passed + failed)
if __name__ == "__main__":
success = asyncio.run(run_torture_tests())
sys.exit(0 if success else 1)