Some checks are pending
Test Dashboard / test-and-dashboard (push) Waiting to run
Named for Milton Waddams, who was relocated to the basement with boxes of legacy documents. He handles the .doc and .xls files from 1997 that nobody else wants to touch. - Rename package from mcp-office-tools to mcwaddams - Update author to Ryan Malloy - Update all imports and references - Add Office Space themed README narrative - All 53 tests passing
419 lines
18 KiB
Python
419 lines
18 KiB
Python
"""Focused tests for UniversalMixin functionality.
|
|
|
|
This module tests the UniversalMixin in isolation, focusing on:
|
|
- Tool registration and functionality
|
|
- Error handling patterns
|
|
- Mocking strategies for file operations
|
|
- Async behavior validation
|
|
"""
|
|
|
|
import pytest
|
|
import tempfile
|
|
import os
|
|
from unittest.mock import AsyncMock, MagicMock, patch, mock_open
|
|
from pathlib import Path
|
|
|
|
from fastmcp import FastMCP
|
|
# FastMCP testing - using direct tool access
|
|
|
|
from mcwaddams.mixins.universal import UniversalMixin
|
|
from mcwaddams.utils import OfficeFileError
|
|
|
|
|
|
class TestUniversalMixinRegistration:
|
|
"""Test tool registration and basic setup."""
|
|
|
|
def test_mixin_initialization(self):
|
|
"""Test UniversalMixin initializes correctly."""
|
|
app = FastMCP("Test Universal")
|
|
mixin = UniversalMixin()
|
|
mixin.register_all(app)
|
|
|
|
assert mixin is not None
|
|
assert len(app._tool_manager._tools) == 7 # 7 universal tools (includes index_document)
|
|
|
|
def test_tool_names_registered(self):
|
|
"""Test that all expected tool names are registered."""
|
|
app = FastMCP("Test Universal")
|
|
UniversalMixin().register_all(app)
|
|
|
|
expected_tools = {
|
|
"extract_text",
|
|
"extract_images",
|
|
"extract_metadata",
|
|
"detect_office_format",
|
|
"analyze_document_health",
|
|
"get_supported_formats",
|
|
"index_document"
|
|
}
|
|
|
|
registered_tools = set(app._tool_manager._tools.keys())
|
|
assert expected_tools.issubset(registered_tools)
|
|
|
|
|
|
class TestExtractText:
|
|
"""Test extract_text tool functionality."""
|
|
|
|
@pytest.fixture
|
|
def mixin(self):
|
|
"""Create UniversalMixin for testing."""
|
|
app = FastMCP("Test")
|
|
mixin = UniversalMixin()
|
|
mixin.register_all(app)
|
|
return mixin
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_text_nonexistent_file(self, mixin):
|
|
"""Test extract_text with nonexistent file raises OfficeFileError."""
|
|
with pytest.raises(OfficeFileError):
|
|
await mixin.extract_text("/nonexistent/file.docx")
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('mcwaddams.mixins.universal.resolve_office_file_path')
|
|
@patch('mcwaddams.mixins.universal.validate_office_file')
|
|
@patch('mcwaddams.mixins.universal.detect_format')
|
|
async def test_extract_text_validation_failure(self, mock_detect, mock_validate, mock_resolve, mixin):
|
|
"""Test extract_text with validation failure."""
|
|
mock_resolve.return_value = "/test.docx"
|
|
mock_validate.return_value = {
|
|
"is_valid": False,
|
|
"errors": ["File is corrupted"]
|
|
}
|
|
|
|
with pytest.raises(OfficeFileError, match="Invalid file: File is corrupted"):
|
|
await mixin.extract_text("/test.docx")
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('mcwaddams.mixins.universal.resolve_office_file_path')
|
|
@patch('mcwaddams.mixins.universal.validate_office_file')
|
|
@patch('mcwaddams.mixins.universal.detect_format')
|
|
async def test_extract_text_csv_success(self, mock_detect, mock_validate, mock_resolve, mixin):
|
|
"""Test successful CSV text extraction."""
|
|
# Setup mocks
|
|
mock_resolve.return_value = "/test.csv"
|
|
mock_validate.return_value = {"is_valid": True, "errors": []}
|
|
mock_detect.return_value = {
|
|
"category": "data",
|
|
"extension": ".csv",
|
|
"format_name": "CSV"
|
|
}
|
|
|
|
# Mock internal methods
|
|
with patch.object(mixin, '_extract_text_by_category') as mock_extract:
|
|
mock_extract.return_value = {
|
|
"text": "Name,Age\nJohn,30\nJane,25",
|
|
"method_used": "pandas",
|
|
"methods_tried": ["pandas"]
|
|
}
|
|
|
|
with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
|
|
mock_metadata.return_value = {"file_size": 1024, "rows": 3}
|
|
|
|
result = await mixin.extract_text("/test.csv")
|
|
|
|
# Verify structure
|
|
assert "text" in result
|
|
assert "metadata" in result
|
|
assert "document_metadata" in result
|
|
|
|
# Verify content
|
|
assert "John" in result["text"]
|
|
assert result["metadata"]["extraction_method"] == "pandas"
|
|
assert result["metadata"]["format"] == "CSV"
|
|
assert result["document_metadata"]["file_size"] == 1024
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_text_parameter_handling(self, mixin):
|
|
"""Test extract_text parameter validation and handling."""
|
|
# Mock all dependencies for parameter testing
|
|
with patch('mcwaddams.mixins.universal.resolve_office_file_path') as mock_resolve:
|
|
with patch('mcwaddams.mixins.universal.validate_office_file') as mock_validate:
|
|
with patch('mcwaddams.mixins.universal.detect_format') as mock_detect:
|
|
mock_resolve.return_value = "/test.docx"
|
|
mock_validate.return_value = {"is_valid": True, "errors": []}
|
|
mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}
|
|
|
|
with patch.object(mixin, '_extract_text_by_category') as mock_extract:
|
|
mock_extract.return_value = {"text": "test", "method_used": "docx"}
|
|
|
|
with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
|
|
mock_metadata.return_value = {}
|
|
|
|
# Test with different parameters
|
|
result = await mixin.extract_text(
|
|
file_path="/test.docx",
|
|
preserve_formatting=True,
|
|
include_metadata=False,
|
|
method="primary"
|
|
)
|
|
|
|
# Verify the call was made with correct parameters
|
|
# _extract_text_by_category(local_path, extension, category, preserve_formatting, method)
|
|
mock_extract.assert_called_once()
|
|
args = mock_extract.call_args[0]
|
|
assert args[2] == "word" # category (index 2)
|
|
assert args[3] == True # preserve_formatting (index 3)
|
|
assert args[4] == "primary" # method (index 4)
|
|
|
|
|
|
class TestExtractImages:
|
|
"""Test extract_images tool functionality."""
|
|
|
|
@pytest.fixture
|
|
def mixin(self):
|
|
"""Create UniversalMixin for testing."""
|
|
app = FastMCP("Test")
|
|
mixin = UniversalMixin()
|
|
mixin.register_all(app)
|
|
return mixin
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_images_nonexistent_file(self, mixin):
|
|
"""Test extract_images with nonexistent file."""
|
|
with pytest.raises(OfficeFileError):
|
|
await mixin.extract_images("/nonexistent/file.docx")
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('mcwaddams.mixins.universal.resolve_office_file_path')
|
|
@patch('mcwaddams.mixins.universal.validate_office_file')
|
|
@patch('mcwaddams.mixins.universal.detect_format')
|
|
async def test_extract_images_unsupported_format(self, mock_detect, mock_validate, mock_resolve, mixin):
|
|
"""Test extract_images with unsupported format (CSV) returns empty list."""
|
|
mock_resolve.return_value = "/test.csv"
|
|
mock_validate.return_value = {"is_valid": True, "errors": []}
|
|
mock_detect.return_value = {"category": "data", "extension": ".csv", "format_name": "CSV"}
|
|
|
|
# Mock the internal method that returns empty for unsupported formats
|
|
with patch.object(mixin, '_extract_images_by_category') as mock_extract:
|
|
mock_extract.return_value = [] # CSV returns empty list, not an error
|
|
|
|
result = await mixin.extract_images("/test.csv")
|
|
|
|
# Verify structure
|
|
assert "images" in result
|
|
assert "metadata" in result
|
|
assert result["images"] == []
|
|
assert result["metadata"]["image_count"] == 0
|
|
|
|
|
|
class TestGetSupportedFormats:
|
|
"""Test get_supported_formats tool functionality."""
|
|
|
|
@pytest.fixture
|
|
def mixin(self):
|
|
"""Create UniversalMixin for testing."""
|
|
app = FastMCP("Test")
|
|
mixin = UniversalMixin()
|
|
mixin.register_all(app)
|
|
return mixin
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_supported_formats_structure(self, mixin):
|
|
"""Test get_supported_formats returns correct structure."""
|
|
result = await mixin.get_supported_formats()
|
|
|
|
# Verify top-level structure
|
|
assert isinstance(result, dict)
|
|
required_keys = {"supported_extensions", "format_details", "categories", "total_formats"}
|
|
assert required_keys.issubset(result.keys())
|
|
|
|
# Verify supported extensions include common formats
|
|
extensions = result["supported_extensions"]
|
|
assert isinstance(extensions, list)
|
|
expected_extensions = {".docx", ".xlsx", ".pptx", ".doc", ".xls", ".ppt", ".csv"}
|
|
assert expected_extensions.issubset(set(extensions))
|
|
|
|
# Verify categories
|
|
categories = result["categories"]
|
|
assert isinstance(categories, dict)
|
|
expected_categories = {"word", "excel", "powerpoint"}
|
|
assert expected_categories.issubset(categories.keys())
|
|
|
|
# Verify total_formats is correct
|
|
assert result["total_formats"] == len(extensions)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_supported_formats_details(self, mixin):
|
|
"""Test get_supported_formats includes detailed format information."""
|
|
result = await mixin.get_supported_formats()
|
|
|
|
format_details = result["format_details"]
|
|
assert isinstance(format_details, dict)
|
|
|
|
# Check that .docx details are present and complete
|
|
if ".docx" in format_details:
|
|
docx_details = format_details[".docx"]
|
|
expected_docx_keys = {"category", "legacy_format", "text_extraction", "image_extraction", "metadata_extraction", "markdown_conversion"}
|
|
assert expected_docx_keys.issubset(docx_details.keys())
|
|
# Verify Word document specifics
|
|
assert docx_details["category"] == "word"
|
|
assert docx_details["legacy_format"] is False
|
|
assert docx_details["markdown_conversion"] is True
|
|
|
|
|
|
class TestDocumentHealth:
|
|
"""Test analyze_document_health tool functionality."""
|
|
|
|
@pytest.fixture
|
|
def mixin(self):
|
|
"""Create UniversalMixin for testing."""
|
|
app = FastMCP("Test")
|
|
mixin = UniversalMixin()
|
|
mixin.register_all(app)
|
|
return mixin
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('mcwaddams.mixins.universal.resolve_office_file_path')
|
|
@patch('mcwaddams.mixins.universal.validate_office_file')
|
|
@patch('mcwaddams.mixins.universal.detect_format')
|
|
async def test_analyze_document_health_success(self, mock_detect, mock_validate, mock_resolve, mixin):
|
|
"""Test successful document health analysis."""
|
|
mock_resolve.return_value = "/test.docx"
|
|
mock_validate.return_value = {
|
|
"is_valid": True,
|
|
"errors": [],
|
|
"warnings": [],
|
|
"password_protected": False
|
|
}
|
|
mock_detect.return_value = {
|
|
"category": "word",
|
|
"extension": ".docx",
|
|
"format_name": "Word Document",
|
|
"is_legacy": False,
|
|
"structure": {"estimated_complexity": "simple"}
|
|
}
|
|
|
|
result = await mixin.analyze_document_health("/test.docx")
|
|
|
|
# Verify structure matches actual implementation
|
|
assert "overall_health" in result
|
|
assert "validation" in result
|
|
assert "format_info" in result
|
|
assert "analysis_time" in result
|
|
assert "recommendations" in result
|
|
|
|
# Verify content
|
|
assert result["overall_health"] == "healthy"
|
|
assert result["validation"]["is_valid"] is True
|
|
assert result["format_info"]["category"] == "word"
|
|
assert len(result["recommendations"]) > 0
|
|
|
|
|
|
class TestDirectToolAccess:
|
|
"""Test mixin integration with direct tool access."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_tool_execution_direct(self):
|
|
"""Test tool execution through direct tool access."""
|
|
app = FastMCP("Test App")
|
|
UniversalMixin().register_all(app)
|
|
|
|
# Test get_supported_formats via direct access
|
|
get_supported_formats_tool = app._tool_manager._tools["get_supported_formats"]
|
|
result = await get_supported_formats_tool.fn()
|
|
|
|
assert "supported_extensions" in result
|
|
assert "format_details" in result
|
|
assert isinstance(result["supported_extensions"], list)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_tool_error_direct(self):
|
|
"""Test tool error handling via direct access."""
|
|
app = FastMCP("Test App")
|
|
UniversalMixin().register_all(app)
|
|
|
|
# Test error handling via direct access
|
|
extract_text_tool = app._tool_manager._tools["extract_text"]
|
|
with pytest.raises(OfficeFileError):
|
|
await extract_text_tool.fn(file_path="/nonexistent/file.docx")
|
|
|
|
|
|
class TestMockingPatterns:
|
|
"""Demonstrate various mocking patterns for file operations."""
|
|
|
|
@pytest.fixture
|
|
def mixin(self):
|
|
"""Create UniversalMixin for testing."""
|
|
app = FastMCP("Test")
|
|
mixin = UniversalMixin()
|
|
mixin.register_all(app)
|
|
return mixin
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_comprehensive_mocking_pattern(self, mixin):
|
|
"""Demonstrate comprehensive mocking for complex tool testing."""
|
|
# Mock all external dependencies
|
|
with patch('mcwaddams.mixins.universal.resolve_office_file_path') as mock_resolve:
|
|
with patch('mcwaddams.mixins.universal.validate_office_file') as mock_validate:
|
|
with patch('mcwaddams.mixins.universal.detect_format') as mock_detect:
|
|
|
|
# Setup realistic mock responses
|
|
mock_resolve.return_value = "/realistic/path/document.docx"
|
|
mock_validate.return_value = {
|
|
"is_valid": True,
|
|
"errors": [],
|
|
"warnings": ["File is large"],
|
|
"password_protected": False,
|
|
"file_size": 1048576 # 1MB
|
|
}
|
|
mock_detect.return_value = {
|
|
"category": "word",
|
|
"extension": ".docx",
|
|
"format_name": "Microsoft Word Document",
|
|
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
"is_legacy": False,
|
|
"structure": {
|
|
"estimated_complexity": "moderate",
|
|
"has_images": True,
|
|
"has_tables": True
|
|
}
|
|
}
|
|
|
|
# Mock internal processing methods
|
|
with patch.object(mixin, '_extract_text_by_category') as mock_extract:
|
|
mock_extract.return_value = {
|
|
"text": "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting.",
|
|
"method_used": "python-docx",
|
|
"methods_tried": ["python-docx"],
|
|
"formatted_sections": [
|
|
{"type": "heading", "text": "Document Title", "level": 1},
|
|
{"type": "paragraph", "text": "This is comprehensive test content..."}
|
|
]
|
|
}
|
|
|
|
with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
|
|
mock_metadata.return_value = {
|
|
"title": "Test Document",
|
|
"author": "Test Author",
|
|
"created": "2024-01-01T10:00:00Z",
|
|
"modified": "2024-01-15T14:30:00Z",
|
|
"word_count": 1247,
|
|
"page_count": 3
|
|
}
|
|
|
|
# Execute with realistic parameters
|
|
result = await mixin.extract_text(
|
|
file_path="/test/document.docx",
|
|
preserve_formatting=True,
|
|
include_metadata=True,
|
|
method="auto"
|
|
)
|
|
|
|
# Comprehensive assertions
|
|
assert result["text"] == "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting."
|
|
assert result["metadata"]["extraction_method"] == "python-docx"
|
|
assert result["metadata"]["format"] == "Microsoft Word Document"
|
|
assert "extraction_time" in result["metadata"]
|
|
assert result["document_metadata"]["author"] == "Test Author"
|
|
assert "structure" in result # Because preserve_formatting=True
|
|
|
|
# Verify all mocks were called appropriately
|
|
mock_resolve.assert_called_once_with("/test/document.docx")
|
|
mock_validate.assert_called_once_with("/realistic/path/document.docx")
|
|
mock_detect.assert_called_once_with("/realistic/path/document.docx")
|
|
mock_extract.assert_called_once()
|
|
mock_metadata.assert_called_once()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"]) |