mcp-office-tools/tests/test_universal_mixin.py
Ryan Malloy 76c7a0b2d0 Add decorators for field defaults and error handling, fix Excel performance
- Create @resolve_field_defaults decorator to handle Pydantic FieldInfo
  objects when tools are called directly (outside MCP framework)
- Create @handle_office_errors decorator for consistent error wrapping
- Apply decorators to Excel and Word mixins, removing ~100 lines of
  boilerplate code
- Fix Excel formula extraction performance: load workbooks once before
  loop instead of per-cell (100x faster with calculated values)
- Update test suite to use correct mock patch paths (patch where names
  are looked up, not where defined)
- Add torture_test.py for real document validation
2026-01-10 23:51:30 -07:00

418 lines
18 KiB
Python

"""Focused tests for UniversalMixin functionality.
This module tests the UniversalMixin in isolation, focusing on:
- Tool registration and functionality
- Error handling patterns
- Mocking strategies for file operations
- Async behavior validation
"""
import pytest
import tempfile
import os
from unittest.mock import AsyncMock, MagicMock, patch, mock_open
from pathlib import Path
from fastmcp import FastMCP
# FastMCP testing - using direct tool access
from mcp_office_tools.mixins.universal import UniversalMixin
from mcp_office_tools.utils import OfficeFileError
class TestUniversalMixinRegistration:
"""Test tool registration and basic setup."""
def test_mixin_initialization(self):
"""Test UniversalMixin initializes correctly."""
app = FastMCP("Test Universal")
mixin = UniversalMixin()
mixin.register_all(app)
assert mixin is not None
assert len(app._tool_manager._tools) == 6 # 6 universal tools
def test_tool_names_registered(self):
"""Test that all expected tool names are registered."""
app = FastMCP("Test Universal")
UniversalMixin().register_all(app)
expected_tools = {
"extract_text",
"extract_images",
"extract_metadata",
"detect_office_format",
"analyze_document_health",
"get_supported_formats"
}
registered_tools = set(app._tool_manager._tools.keys())
assert expected_tools.issubset(registered_tools)
class TestExtractText:
"""Test extract_text tool functionality."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
async def test_extract_text_nonexistent_file(self, mixin):
"""Test extract_text with nonexistent file raises OfficeFileError."""
with pytest.raises(OfficeFileError):
await mixin.extract_text("/nonexistent/file.docx")
@pytest.mark.asyncio
@patch('mcp_office_tools.mixins.universal.resolve_office_file_path')
@patch('mcp_office_tools.mixins.universal.validate_office_file')
@patch('mcp_office_tools.mixins.universal.detect_format')
async def test_extract_text_validation_failure(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test extract_text with validation failure."""
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {
"is_valid": False,
"errors": ["File is corrupted"]
}
with pytest.raises(OfficeFileError, match="Invalid file: File is corrupted"):
await mixin.extract_text("/test.docx")
@pytest.mark.asyncio
@patch('mcp_office_tools.mixins.universal.resolve_office_file_path')
@patch('mcp_office_tools.mixins.universal.validate_office_file')
@patch('mcp_office_tools.mixins.universal.detect_format')
async def test_extract_text_csv_success(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test successful CSV text extraction."""
# Setup mocks
mock_resolve.return_value = "/test.csv"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {
"category": "data",
"extension": ".csv",
"format_name": "CSV"
}
# Mock internal methods
with patch.object(mixin, '_extract_text_by_category') as mock_extract:
mock_extract.return_value = {
"text": "Name,Age\nJohn,30\nJane,25",
"method_used": "pandas",
"methods_tried": ["pandas"]
}
with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
mock_metadata.return_value = {"file_size": 1024, "rows": 3}
result = await mixin.extract_text("/test.csv")
# Verify structure
assert "text" in result
assert "metadata" in result
assert "document_metadata" in result
# Verify content
assert "John" in result["text"]
assert result["metadata"]["extraction_method"] == "pandas"
assert result["metadata"]["format"] == "CSV"
assert result["document_metadata"]["file_size"] == 1024
@pytest.mark.asyncio
async def test_extract_text_parameter_handling(self, mixin):
"""Test extract_text parameter validation and handling."""
# Mock all dependencies for parameter testing
with patch('mcp_office_tools.mixins.universal.resolve_office_file_path') as mock_resolve:
with patch('mcp_office_tools.mixins.universal.validate_office_file') as mock_validate:
with patch('mcp_office_tools.mixins.universal.detect_format') as mock_detect:
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}
with patch.object(mixin, '_extract_text_by_category') as mock_extract:
mock_extract.return_value = {"text": "test", "method_used": "docx"}
with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
mock_metadata.return_value = {}
# Test with different parameters
result = await mixin.extract_text(
file_path="/test.docx",
preserve_formatting=True,
include_metadata=False,
method="primary"
)
# Verify the call was made with correct parameters
# _extract_text_by_category(local_path, extension, category, preserve_formatting, method)
mock_extract.assert_called_once()
args = mock_extract.call_args[0]
assert args[2] == "word" # category (index 2)
assert args[3] == True # preserve_formatting (index 3)
assert args[4] == "primary" # method (index 4)
class TestExtractImages:
"""Test extract_images tool functionality."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
async def test_extract_images_nonexistent_file(self, mixin):
"""Test extract_images with nonexistent file."""
with pytest.raises(OfficeFileError):
await mixin.extract_images("/nonexistent/file.docx")
@pytest.mark.asyncio
@patch('mcp_office_tools.mixins.universal.resolve_office_file_path')
@patch('mcp_office_tools.mixins.universal.validate_office_file')
@patch('mcp_office_tools.mixins.universal.detect_format')
async def test_extract_images_unsupported_format(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test extract_images with unsupported format (CSV) returns empty list."""
mock_resolve.return_value = "/test.csv"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {"category": "data", "extension": ".csv", "format_name": "CSV"}
# Mock the internal method that returns empty for unsupported formats
with patch.object(mixin, '_extract_images_by_category') as mock_extract:
mock_extract.return_value = [] # CSV returns empty list, not an error
result = await mixin.extract_images("/test.csv")
# Verify structure
assert "images" in result
assert "metadata" in result
assert result["images"] == []
assert result["metadata"]["image_count"] == 0
class TestGetSupportedFormats:
"""Test get_supported_formats tool functionality."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
async def test_get_supported_formats_structure(self, mixin):
"""Test get_supported_formats returns correct structure."""
result = await mixin.get_supported_formats()
# Verify top-level structure
assert isinstance(result, dict)
required_keys = {"supported_extensions", "format_details", "categories", "total_formats"}
assert required_keys.issubset(result.keys())
# Verify supported extensions include common formats
extensions = result["supported_extensions"]
assert isinstance(extensions, list)
expected_extensions = {".docx", ".xlsx", ".pptx", ".doc", ".xls", ".ppt", ".csv"}
assert expected_extensions.issubset(set(extensions))
# Verify categories
categories = result["categories"]
assert isinstance(categories, dict)
expected_categories = {"word", "excel", "powerpoint"}
assert expected_categories.issubset(categories.keys())
# Verify total_formats is correct
assert result["total_formats"] == len(extensions)
@pytest.mark.asyncio
async def test_get_supported_formats_details(self, mixin):
"""Test get_supported_formats includes detailed format information."""
result = await mixin.get_supported_formats()
format_details = result["format_details"]
assert isinstance(format_details, dict)
# Check that .docx details are present and complete
if ".docx" in format_details:
docx_details = format_details[".docx"]
expected_docx_keys = {"category", "legacy_format", "text_extraction", "image_extraction", "metadata_extraction", "markdown_conversion"}
assert expected_docx_keys.issubset(docx_details.keys())
# Verify Word document specifics
assert docx_details["category"] == "word"
assert docx_details["legacy_format"] is False
assert docx_details["markdown_conversion"] is True
class TestDocumentHealth:
"""Test analyze_document_health tool functionality."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
@patch('mcp_office_tools.mixins.universal.resolve_office_file_path')
@patch('mcp_office_tools.mixins.universal.validate_office_file')
@patch('mcp_office_tools.mixins.universal.detect_format')
async def test_analyze_document_health_success(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test successful document health analysis."""
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {
"is_valid": True,
"errors": [],
"warnings": [],
"password_protected": False
}
mock_detect.return_value = {
"category": "word",
"extension": ".docx",
"format_name": "Word Document",
"is_legacy": False,
"structure": {"estimated_complexity": "simple"}
}
result = await mixin.analyze_document_health("/test.docx")
# Verify structure matches actual implementation
assert "overall_health" in result
assert "validation" in result
assert "format_info" in result
assert "analysis_time" in result
assert "recommendations" in result
# Verify content
assert result["overall_health"] == "healthy"
assert result["validation"]["is_valid"] is True
assert result["format_info"]["category"] == "word"
assert len(result["recommendations"]) > 0
class TestDirectToolAccess:
"""Test mixin integration with direct tool access."""
@pytest.mark.asyncio
async def test_tool_execution_direct(self):
"""Test tool execution through direct tool access."""
app = FastMCP("Test App")
UniversalMixin().register_all(app)
# Test get_supported_formats via direct access
get_supported_formats_tool = app._tool_manager._tools["get_supported_formats"]
result = await get_supported_formats_tool.fn()
assert "supported_extensions" in result
assert "format_details" in result
assert isinstance(result["supported_extensions"], list)
@pytest.mark.asyncio
async def test_tool_error_direct(self):
"""Test tool error handling via direct access."""
app = FastMCP("Test App")
UniversalMixin().register_all(app)
# Test error handling via direct access
extract_text_tool = app._tool_manager._tools["extract_text"]
with pytest.raises(OfficeFileError):
await extract_text_tool.fn(file_path="/nonexistent/file.docx")
class TestMockingPatterns:
"""Demonstrate various mocking patterns for file operations."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
async def test_comprehensive_mocking_pattern(self, mixin):
"""Demonstrate comprehensive mocking for complex tool testing."""
# Mock all external dependencies
with patch('mcp_office_tools.mixins.universal.resolve_office_file_path') as mock_resolve:
with patch('mcp_office_tools.mixins.universal.validate_office_file') as mock_validate:
with patch('mcp_office_tools.mixins.universal.detect_format') as mock_detect:
# Setup realistic mock responses
mock_resolve.return_value = "/realistic/path/document.docx"
mock_validate.return_value = {
"is_valid": True,
"errors": [],
"warnings": ["File is large"],
"password_protected": False,
"file_size": 1048576 # 1MB
}
mock_detect.return_value = {
"category": "word",
"extension": ".docx",
"format_name": "Microsoft Word Document",
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"is_legacy": False,
"structure": {
"estimated_complexity": "moderate",
"has_images": True,
"has_tables": True
}
}
# Mock internal processing methods
with patch.object(mixin, '_extract_text_by_category') as mock_extract:
mock_extract.return_value = {
"text": "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting.",
"method_used": "python-docx",
"methods_tried": ["python-docx"],
"formatted_sections": [
{"type": "heading", "text": "Document Title", "level": 1},
{"type": "paragraph", "text": "This is comprehensive test content..."}
]
}
with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
mock_metadata.return_value = {
"title": "Test Document",
"author": "Test Author",
"created": "2024-01-01T10:00:00Z",
"modified": "2024-01-15T14:30:00Z",
"word_count": 1247,
"page_count": 3
}
# Execute with realistic parameters
result = await mixin.extract_text(
file_path="/test/document.docx",
preserve_formatting=True,
include_metadata=True,
method="auto"
)
# Comprehensive assertions
assert result["text"] == "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting."
assert result["metadata"]["extraction_method"] == "python-docx"
assert result["metadata"]["format"] == "Microsoft Word Document"
assert "extraction_time" in result["metadata"]
assert result["document_metadata"]["author"] == "Test Author"
assert "structure" in result # Because preserve_formatting=True
# Verify all mocks were called appropriately
mock_resolve.assert_called_once_with("/test/document.docx")
mock_validate.assert_called_once_with("/realistic/path/document.docx")
mock_detect.assert_called_once_with("/realistic/path/document.docx")
mock_extract.assert_called_once()
mock_metadata.assert_called_once()
if __name__ == "__main__":
pytest.main([__file__, "-v"])