mcp-office-tools/tests/test_universal_mixin.py
Ryan Malloy 31948d6ffc
Some checks are pending
Test Dashboard / test-and-dashboard (push) Waiting to run
Rename package to mcwaddams
Named for Milton Waddams, who was relocated to the basement with
boxes of legacy documents. He handles the .doc and .xls files from
1997 that nobody else wants to touch.

- Rename package from mcp-office-tools to mcwaddams
- Update author to Ryan Malloy
- Update all imports and references
- Add Office Space themed README narrative
- All 53 tests passing
2026-01-11 11:35:35 -07:00

419 lines
18 KiB
Python

"""Focused tests for UniversalMixin functionality.
This module tests the UniversalMixin in isolation, focusing on:
- Tool registration and functionality
- Error handling patterns
- Mocking strategies for file operations
- Async behavior validation
"""
import pytest
import tempfile
import os
from unittest.mock import AsyncMock, MagicMock, patch, mock_open
from pathlib import Path
from fastmcp import FastMCP
# FastMCP testing - using direct tool access
from mcwaddams.mixins.universal import UniversalMixin
from mcwaddams.utils import OfficeFileError
class TestUniversalMixinRegistration:
"""Test tool registration and basic setup."""
def test_mixin_initialization(self):
"""Test UniversalMixin initializes correctly."""
app = FastMCP("Test Universal")
mixin = UniversalMixin()
mixin.register_all(app)
assert mixin is not None
assert len(app._tool_manager._tools) == 7 # 7 universal tools (includes index_document)
def test_tool_names_registered(self):
"""Test that all expected tool names are registered."""
app = FastMCP("Test Universal")
UniversalMixin().register_all(app)
expected_tools = {
"extract_text",
"extract_images",
"extract_metadata",
"detect_office_format",
"analyze_document_health",
"get_supported_formats",
"index_document"
}
registered_tools = set(app._tool_manager._tools.keys())
assert expected_tools.issubset(registered_tools)
class TestExtractText:
"""Test extract_text tool functionality."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
async def test_extract_text_nonexistent_file(self, mixin):
"""Test extract_text with nonexistent file raises OfficeFileError."""
with pytest.raises(OfficeFileError):
await mixin.extract_text("/nonexistent/file.docx")
@pytest.mark.asyncio
@patch('mcwaddams.mixins.universal.resolve_office_file_path')
@patch('mcwaddams.mixins.universal.validate_office_file')
@patch('mcwaddams.mixins.universal.detect_format')
async def test_extract_text_validation_failure(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test extract_text with validation failure."""
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {
"is_valid": False,
"errors": ["File is corrupted"]
}
with pytest.raises(OfficeFileError, match="Invalid file: File is corrupted"):
await mixin.extract_text("/test.docx")
@pytest.mark.asyncio
@patch('mcwaddams.mixins.universal.resolve_office_file_path')
@patch('mcwaddams.mixins.universal.validate_office_file')
@patch('mcwaddams.mixins.universal.detect_format')
async def test_extract_text_csv_success(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test successful CSV text extraction."""
# Setup mocks
mock_resolve.return_value = "/test.csv"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {
"category": "data",
"extension": ".csv",
"format_name": "CSV"
}
# Mock internal methods
with patch.object(mixin, '_extract_text_by_category') as mock_extract:
mock_extract.return_value = {
"text": "Name,Age\nJohn,30\nJane,25",
"method_used": "pandas",
"methods_tried": ["pandas"]
}
with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
mock_metadata.return_value = {"file_size": 1024, "rows": 3}
result = await mixin.extract_text("/test.csv")
# Verify structure
assert "text" in result
assert "metadata" in result
assert "document_metadata" in result
# Verify content
assert "John" in result["text"]
assert result["metadata"]["extraction_method"] == "pandas"
assert result["metadata"]["format"] == "CSV"
assert result["document_metadata"]["file_size"] == 1024
@pytest.mark.asyncio
async def test_extract_text_parameter_handling(self, mixin):
"""Test extract_text parameter validation and handling."""
# Mock all dependencies for parameter testing
with patch('mcwaddams.mixins.universal.resolve_office_file_path') as mock_resolve:
with patch('mcwaddams.mixins.universal.validate_office_file') as mock_validate:
with patch('mcwaddams.mixins.universal.detect_format') as mock_detect:
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}
with patch.object(mixin, '_extract_text_by_category') as mock_extract:
mock_extract.return_value = {"text": "test", "method_used": "docx"}
with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
mock_metadata.return_value = {}
# Test with different parameters
result = await mixin.extract_text(
file_path="/test.docx",
preserve_formatting=True,
include_metadata=False,
method="primary"
)
# Verify the call was made with correct parameters
# _extract_text_by_category(local_path, extension, category, preserve_formatting, method)
mock_extract.assert_called_once()
args = mock_extract.call_args[0]
assert args[2] == "word" # category (index 2)
assert args[3] == True # preserve_formatting (index 3)
assert args[4] == "primary" # method (index 4)
class TestExtractImages:
"""Test extract_images tool functionality."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
async def test_extract_images_nonexistent_file(self, mixin):
"""Test extract_images with nonexistent file."""
with pytest.raises(OfficeFileError):
await mixin.extract_images("/nonexistent/file.docx")
@pytest.mark.asyncio
@patch('mcwaddams.mixins.universal.resolve_office_file_path')
@patch('mcwaddams.mixins.universal.validate_office_file')
@patch('mcwaddams.mixins.universal.detect_format')
async def test_extract_images_unsupported_format(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test extract_images with unsupported format (CSV) returns empty list."""
mock_resolve.return_value = "/test.csv"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {"category": "data", "extension": ".csv", "format_name": "CSV"}
# Mock the internal method that returns empty for unsupported formats
with patch.object(mixin, '_extract_images_by_category') as mock_extract:
mock_extract.return_value = [] # CSV returns empty list, not an error
result = await mixin.extract_images("/test.csv")
# Verify structure
assert "images" in result
assert "metadata" in result
assert result["images"] == []
assert result["metadata"]["image_count"] == 0
class TestGetSupportedFormats:
"""Test get_supported_formats tool functionality."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
async def test_get_supported_formats_structure(self, mixin):
"""Test get_supported_formats returns correct structure."""
result = await mixin.get_supported_formats()
# Verify top-level structure
assert isinstance(result, dict)
required_keys = {"supported_extensions", "format_details", "categories", "total_formats"}
assert required_keys.issubset(result.keys())
# Verify supported extensions include common formats
extensions = result["supported_extensions"]
assert isinstance(extensions, list)
expected_extensions = {".docx", ".xlsx", ".pptx", ".doc", ".xls", ".ppt", ".csv"}
assert expected_extensions.issubset(set(extensions))
# Verify categories
categories = result["categories"]
assert isinstance(categories, dict)
expected_categories = {"word", "excel", "powerpoint"}
assert expected_categories.issubset(categories.keys())
# Verify total_formats is correct
assert result["total_formats"] == len(extensions)
@pytest.mark.asyncio
async def test_get_supported_formats_details(self, mixin):
"""Test get_supported_formats includes detailed format information."""
result = await mixin.get_supported_formats()
format_details = result["format_details"]
assert isinstance(format_details, dict)
# Check that .docx details are present and complete
if ".docx" in format_details:
docx_details = format_details[".docx"]
expected_docx_keys = {"category", "legacy_format", "text_extraction", "image_extraction", "metadata_extraction", "markdown_conversion"}
assert expected_docx_keys.issubset(docx_details.keys())
# Verify Word document specifics
assert docx_details["category"] == "word"
assert docx_details["legacy_format"] is False
assert docx_details["markdown_conversion"] is True
class TestDocumentHealth:
"""Test analyze_document_health tool functionality."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
@patch('mcwaddams.mixins.universal.resolve_office_file_path')
@patch('mcwaddams.mixins.universal.validate_office_file')
@patch('mcwaddams.mixins.universal.detect_format')
async def test_analyze_document_health_success(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test successful document health analysis."""
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {
"is_valid": True,
"errors": [],
"warnings": [],
"password_protected": False
}
mock_detect.return_value = {
"category": "word",
"extension": ".docx",
"format_name": "Word Document",
"is_legacy": False,
"structure": {"estimated_complexity": "simple"}
}
result = await mixin.analyze_document_health("/test.docx")
# Verify structure matches actual implementation
assert "overall_health" in result
assert "validation" in result
assert "format_info" in result
assert "analysis_time" in result
assert "recommendations" in result
# Verify content
assert result["overall_health"] == "healthy"
assert result["validation"]["is_valid"] is True
assert result["format_info"]["category"] == "word"
assert len(result["recommendations"]) > 0
class TestDirectToolAccess:
"""Test mixin integration with direct tool access."""
@pytest.mark.asyncio
async def test_tool_execution_direct(self):
"""Test tool execution through direct tool access."""
app = FastMCP("Test App")
UniversalMixin().register_all(app)
# Test get_supported_formats via direct access
get_supported_formats_tool = app._tool_manager._tools["get_supported_formats"]
result = await get_supported_formats_tool.fn()
assert "supported_extensions" in result
assert "format_details" in result
assert isinstance(result["supported_extensions"], list)
@pytest.mark.asyncio
async def test_tool_error_direct(self):
"""Test tool error handling via direct access."""
app = FastMCP("Test App")
UniversalMixin().register_all(app)
# Test error handling via direct access
extract_text_tool = app._tool_manager._tools["extract_text"]
with pytest.raises(OfficeFileError):
await extract_text_tool.fn(file_path="/nonexistent/file.docx")
class TestMockingPatterns:
"""Demonstrate various mocking patterns for file operations."""
@pytest.fixture
def mixin(self):
"""Create UniversalMixin for testing."""
app = FastMCP("Test")
mixin = UniversalMixin()
mixin.register_all(app)
return mixin
@pytest.mark.asyncio
async def test_comprehensive_mocking_pattern(self, mixin):
"""Demonstrate comprehensive mocking for complex tool testing."""
# Mock all external dependencies
with patch('mcwaddams.mixins.universal.resolve_office_file_path') as mock_resolve:
with patch('mcwaddams.mixins.universal.validate_office_file') as mock_validate:
with patch('mcwaddams.mixins.universal.detect_format') as mock_detect:
# Setup realistic mock responses
mock_resolve.return_value = "/realistic/path/document.docx"
mock_validate.return_value = {
"is_valid": True,
"errors": [],
"warnings": ["File is large"],
"password_protected": False,
"file_size": 1048576 # 1MB
}
mock_detect.return_value = {
"category": "word",
"extension": ".docx",
"format_name": "Microsoft Word Document",
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"is_legacy": False,
"structure": {
"estimated_complexity": "moderate",
"has_images": True,
"has_tables": True
}
}
# Mock internal processing methods
with patch.object(mixin, '_extract_text_by_category') as mock_extract:
mock_extract.return_value = {
"text": "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting.",
"method_used": "python-docx",
"methods_tried": ["python-docx"],
"formatted_sections": [
{"type": "heading", "text": "Document Title", "level": 1},
{"type": "paragraph", "text": "This is comprehensive test content..."}
]
}
with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
mock_metadata.return_value = {
"title": "Test Document",
"author": "Test Author",
"created": "2024-01-01T10:00:00Z",
"modified": "2024-01-15T14:30:00Z",
"word_count": 1247,
"page_count": 3
}
# Execute with realistic parameters
result = await mixin.extract_text(
file_path="/test/document.docx",
preserve_formatting=True,
include_metadata=True,
method="auto"
)
# Comprehensive assertions
assert result["text"] == "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting."
assert result["metadata"]["extraction_method"] == "python-docx"
assert result["metadata"]["format"] == "Microsoft Word Document"
assert "extraction_time" in result["metadata"]
assert result["document_metadata"]["author"] == "Test Author"
assert "structure" in result # Because preserve_formatting=True
# Verify all mocks were called appropriately
mock_resolve.assert_called_once_with("/test/document.docx")
mock_validate.assert_called_once_with("/realistic/path/document.docx")
mock_detect.assert_called_once_with("/realistic/path/document.docx")
mock_extract.assert_called_once()
mock_metadata.assert_called_once()
if __name__ == "__main__":
pytest.main([__file__, "-v"])