"""Focused tests for UniversalMixin functionality. This module tests the UniversalMixin in isolation, focusing on: - Tool registration and functionality - Error handling patterns - Mocking strategies for file operations - Async behavior validation """ import pytest import tempfile import os from unittest.mock import AsyncMock, MagicMock, patch, mock_open from pathlib import Path from fastmcp import FastMCP # FastMCP testing - using direct tool access from mcp_office_tools.mixins.universal import UniversalMixin from mcp_office_tools.utils import OfficeFileError class TestUniversalMixinRegistration: """Test tool registration and basic setup.""" def test_mixin_initialization(self): """Test UniversalMixin initializes correctly.""" app = FastMCP("Test Universal") mixin = UniversalMixin(app) assert mixin.app == app assert len(app._tools) == 6 # 6 universal tools def test_tool_names_registered(self): """Test that all expected tool names are registered.""" app = FastMCP("Test Universal") UniversalMixin(app) expected_tools = { "extract_text", "extract_images", "extract_metadata", "detect_office_format", "analyze_document_health", "get_supported_formats" } registered_tools = set(app._tools.keys()) assert expected_tools.issubset(registered_tools) class TestExtractText: """Test extract_text tool functionality.""" @pytest.fixture def mixin(self): """Create UniversalMixin for testing.""" app = FastMCP("Test") return UniversalMixin(app) @pytest.mark.asyncio async def test_extract_text_nonexistent_file(self, mixin): """Test extract_text with nonexistent file raises OfficeFileError.""" with pytest.raises(OfficeFileError): await mixin.extract_text("/nonexistent/file.docx") @pytest.mark.asyncio @patch('mcp_office_tools.utils.validation.resolve_office_file_path') @patch('mcp_office_tools.utils.validation.validate_office_file') @patch('mcp_office_tools.utils.file_detection.detect_format') async def test_extract_text_validation_failure(self, mock_detect, mock_validate, mock_resolve, mixin): """Test extract_text with validation failure.""" mock_resolve.return_value = "/test.docx" mock_validate.return_value = { "is_valid": False, "errors": ["File is corrupted"] } with pytest.raises(OfficeFileError, match="Invalid file: File is corrupted"): await mixin.extract_text("/test.docx") @pytest.mark.asyncio @patch('mcp_office_tools.utils.validation.resolve_office_file_path') @patch('mcp_office_tools.utils.validation.validate_office_file') @patch('mcp_office_tools.utils.file_detection.detect_format') async def test_extract_text_csv_success(self, mock_detect, mock_validate, mock_resolve, mixin): """Test successful CSV text extraction.""" # Setup mocks mock_resolve.return_value = "/test.csv" mock_validate.return_value = {"is_valid": True, "errors": []} mock_detect.return_value = { "category": "data", "extension": ".csv", "format_name": "CSV" } # Mock internal methods with patch.object(mixin, '_extract_text_by_category') as mock_extract: mock_extract.return_value = { "text": "Name,Age\nJohn,30\nJane,25", "method_used": "pandas", "methods_tried": ["pandas"] } with patch.object(mixin, '_extract_basic_metadata') as mock_metadata: mock_metadata.return_value = {"file_size": 1024, "rows": 3} result = await mixin.extract_text("/test.csv") # Verify structure assert "text" in result assert "metadata" in result assert "document_metadata" in result # Verify content assert "John" in result["text"] assert result["metadata"]["extraction_method"] == "pandas" assert result["metadata"]["format"] == "CSV" assert result["document_metadata"]["file_size"] == 1024 @pytest.mark.asyncio async def test_extract_text_parameter_handling(self, mixin): """Test extract_text parameter validation and handling.""" # Mock all dependencies for parameter testing with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve: with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate: with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect: mock_resolve.return_value = "/test.docx" mock_validate.return_value = {"is_valid": True, "errors": []} mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"} with patch.object(mixin, '_extract_text_by_category') as mock_extract: mock_extract.return_value = {"text": "test", "method_used": "docx"} with patch.object(mixin, '_extract_basic_metadata') as mock_metadata: mock_metadata.return_value = {} # Test with different parameters result = await mixin.extract_text( file_path="/test.docx", preserve_formatting=True, include_metadata=False, method="primary" ) # Verify the call was made with correct parameters mock_extract.assert_called_once() args = mock_extract.call_args[0] assert args[2] == "word" # category assert args[4] == True # preserve_formatting assert args[5] == "primary" # method class TestExtractImages: """Test extract_images tool functionality.""" @pytest.fixture def mixin(self): """Create UniversalMixin for testing.""" app = FastMCP("Test") return UniversalMixin(app) @pytest.mark.asyncio async def test_extract_images_nonexistent_file(self, mixin): """Test extract_images with nonexistent file.""" with pytest.raises(OfficeFileError): await mixin.extract_images("/nonexistent/file.docx") @pytest.mark.asyncio @patch('mcp_office_tools.utils.validation.resolve_office_file_path') @patch('mcp_office_tools.utils.validation.validate_office_file') @patch('mcp_office_tools.utils.file_detection.detect_format') async def test_extract_images_unsupported_format(self, mock_detect, mock_validate, mock_resolve, mixin): """Test extract_images with unsupported format (CSV).""" mock_resolve.return_value = "/test.csv" mock_validate.return_value = {"is_valid": True, "errors": []} mock_detect.return_value = {"category": "data", "extension": ".csv", "format_name": "CSV"} with pytest.raises(OfficeFileError, match="Image extraction not supported for data files"): await mixin.extract_images("/test.csv") class TestGetSupportedFormats: """Test get_supported_formats tool functionality.""" @pytest.fixture def mixin(self): """Create UniversalMixin for testing.""" app = FastMCP("Test") return UniversalMixin(app) @pytest.mark.asyncio async def test_get_supported_formats_structure(self, mixin): """Test get_supported_formats returns correct structure.""" result = await mixin.get_supported_formats() # Verify top-level structure assert isinstance(result, dict) required_keys = {"supported_extensions", "format_details", "categories", "total_formats"} assert required_keys.issubset(result.keys()) # Verify supported extensions include common formats extensions = result["supported_extensions"] assert isinstance(extensions, list) expected_extensions = {".docx", ".xlsx", ".pptx", ".doc", ".xls", ".ppt", ".csv"} assert expected_extensions.issubset(set(extensions)) # Verify categories categories = result["categories"] assert isinstance(categories, dict) expected_categories = {"word", "excel", "powerpoint", "data"} assert expected_categories.issubset(categories.keys()) # Verify total_formats is correct assert result["total_formats"] == len(extensions) @pytest.mark.asyncio async def test_get_supported_formats_details(self, mixin): """Test get_supported_formats includes detailed format information.""" result = await mixin.get_supported_formats() format_details = result["format_details"] assert isinstance(format_details, dict) # Check that .docx details are present and complete if ".docx" in format_details: docx_details = format_details[".docx"] expected_docx_keys = {"name", "category", "description", "features_supported"} assert expected_docx_keys.issubset(docx_details.keys()) class TestDocumentHealth: """Test analyze_document_health tool functionality.""" @pytest.fixture def mixin(self): """Create UniversalMixin for testing.""" app = FastMCP("Test") return UniversalMixin(app) @pytest.mark.asyncio @patch('mcp_office_tools.utils.validation.resolve_office_file_path') @patch('mcp_office_tools.utils.validation.validate_office_file') @patch('mcp_office_tools.utils.file_detection.detect_format') async def test_analyze_document_health_success(self, mock_detect, mock_validate, mock_resolve, mixin): """Test successful document health analysis.""" mock_resolve.return_value = "/test.docx" mock_validate.return_value = { "is_valid": True, "errors": [], "warnings": [], "password_protected": False } mock_detect.return_value = { "category": "word", "extension": ".docx", "format_name": "Word Document", "is_legacy": False, "structure": {"estimated_complexity": "simple"} } with patch.object(mixin, '_calculate_health_score') as mock_score: with patch.object(mixin, '_get_health_recommendations') as mock_recommendations: mock_score.return_value = 9 mock_recommendations.return_value = ["Document appears healthy"] result = await mixin.analyze_document_health("/test.docx") # Verify structure assert "health_score" in result assert "analysis" in result assert "recommendations" in result assert "format_info" in result # Verify content assert result["health_score"] == 9 assert len(result["recommendations"]) > 0 class TestDirectToolAccess: """Test mixin integration with direct tool access.""" @pytest.mark.asyncio async def test_tool_execution_direct(self): """Test tool execution through direct tool access.""" app = FastMCP("Test App") UniversalMixin(app) # Test get_supported_formats via direct access get_supported_formats_tool = app._tools["get_supported_formats"] result = await get_supported_formats_tool() assert "supported_extensions" in result assert "format_details" in result assert isinstance(result["supported_extensions"], list) @pytest.mark.asyncio async def test_tool_error_direct(self): """Test tool error handling via direct access.""" app = FastMCP("Test App") UniversalMixin(app) # Test error handling via direct access extract_text_tool = app._tools["extract_text"] with pytest.raises(OfficeFileError): await extract_text_tool(file_path="/nonexistent/file.docx") class TestMockingPatterns: """Demonstrate various mocking patterns for file operations.""" @pytest.fixture def mixin(self): """Create UniversalMixin for testing.""" app = FastMCP("Test") return UniversalMixin(app) @pytest.mark.asyncio async def test_comprehensive_mocking_pattern(self, mixin): """Demonstrate comprehensive mocking for complex tool testing.""" # Mock all external dependencies with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve: with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate: with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect: # Setup realistic mock responses mock_resolve.return_value = "/realistic/path/document.docx" mock_validate.return_value = { "is_valid": True, "errors": [], "warnings": ["File is large"], "password_protected": False, "file_size": 1048576 # 1MB } mock_detect.return_value = { "category": "word", "extension": ".docx", "format_name": "Microsoft Word Document", "mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "is_legacy": False, "structure": { "estimated_complexity": "moderate", "has_images": True, "has_tables": True } } # Mock internal processing methods with patch.object(mixin, '_extract_text_by_category') as mock_extract: mock_extract.return_value = { "text": "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting.", "method_used": "python-docx", "methods_tried": ["python-docx"], "formatted_sections": [ {"type": "heading", "text": "Document Title", "level": 1}, {"type": "paragraph", "text": "This is comprehensive test content..."} ] } with patch.object(mixin, '_extract_basic_metadata') as mock_metadata: mock_metadata.return_value = { "title": "Test Document", "author": "Test Author", "created": "2024-01-01T10:00:00Z", "modified": "2024-01-15T14:30:00Z", "word_count": 1247, "page_count": 3 } # Execute with realistic parameters result = await mixin.extract_text( file_path="/test/document.docx", preserve_formatting=True, include_metadata=True, method="auto" ) # Comprehensive assertions assert result["text"] == "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting." assert result["metadata"]["extraction_method"] == "python-docx" assert result["metadata"]["format"] == "Microsoft Word Document" assert "extraction_time" in result["metadata"] assert result["document_metadata"]["author"] == "Test Author" assert "structure" in result # Because preserve_formatting=True # Verify all mocks were called appropriately mock_resolve.assert_called_once_with("/test/document.docx") mock_validate.assert_called_once_with("/realistic/path/document.docx") mock_detect.assert_called_once_with("/realistic/path/document.docx") mock_extract.assert_called_once() mock_metadata.assert_called_once() if __name__ == "__main__": pytest.main([__file__, "-v"])