"""Focused tests for WordMixin functionality. This module tests the WordMixin in isolation, focusing on: - Word-specific tool functionality - Markdown conversion capabilities - Chapter and bookmark extraction - Parameter validation for Word-specific features """ import pytest from unittest.mock import AsyncMock, MagicMock, patch from pathlib import Path from fastmcp import FastMCP # FastMCP testing - using direct tool access from mcp_office_tools.mixins.word import WordMixin from mcp_office_tools.utils import OfficeFileError class TestWordMixinRegistration: """Test WordMixin tool registration and setup.""" def test_mixin_initialization(self): """Test WordMixin initializes correctly.""" app = FastMCP("Test Word") mixin = WordMixin(app) assert mixin.app == app assert len(app._tools) == 1 # 1 word tool def test_tool_names_registered(self): """Test that Word-specific tools are registered.""" app = FastMCP("Test Word") WordMixin(app) expected_tools = {"convert_to_markdown"} registered_tools = set(app._tools.keys()) assert expected_tools.issubset(registered_tools) class TestConvertToMarkdown: """Test convert_to_markdown tool functionality.""" @pytest.fixture def mixin(self): """Create WordMixin for testing.""" app = FastMCP("Test") return WordMixin(app) @pytest.mark.asyncio async def test_convert_to_markdown_nonexistent_file(self, mixin): """Test convert_to_markdown with nonexistent file.""" with pytest.raises(OfficeFileError): await mixin.convert_to_markdown("/nonexistent/file.docx") @pytest.mark.asyncio @patch('mcp_office_tools.utils.validation.resolve_office_file_path') @patch('mcp_office_tools.utils.validation.validate_office_file') @patch('mcp_office_tools.utils.file_detection.detect_format') async def test_convert_to_markdown_validation_failure(self, mock_detect, mock_validate, mock_resolve, mixin): """Test convert_to_markdown with validation failure.""" mock_resolve.return_value = "/test.docx" mock_validate.return_value = { "is_valid": False, "errors": ["File is password protected"] } with pytest.raises(OfficeFileError, match="Invalid file: File is password protected"): await mixin.convert_to_markdown("/test.docx") @pytest.mark.asyncio @patch('mcp_office_tools.utils.validation.resolve_office_file_path') @patch('mcp_office_tools.utils.validation.validate_office_file') @patch('mcp_office_tools.utils.file_detection.detect_format') async def test_convert_to_markdown_non_word_document(self, mock_detect, mock_validate, mock_resolve, mixin): """Test that non-Word documents are rejected.""" mock_resolve.return_value = "/test.xlsx" mock_validate.return_value = {"is_valid": True, "errors": []} mock_detect.return_value = { "category": "excel", "extension": ".xlsx", "format_name": "Excel" } with pytest.raises(OfficeFileError, match="Markdown conversion currently only supports Word documents"): await mixin.convert_to_markdown("/test.xlsx") @pytest.mark.asyncio @patch('mcp_office_tools.utils.validation.resolve_office_file_path') @patch('mcp_office_tools.utils.validation.validate_office_file') @patch('mcp_office_tools.utils.file_detection.detect_format') async def test_convert_to_markdown_docx_success(self, mock_detect, mock_validate, mock_resolve, mixin): """Test successful DOCX to markdown conversion.""" # Setup mocks mock_resolve.return_value = "/test.docx" mock_validate.return_value = {"is_valid": True, "errors": []} mock_detect.return_value = { "category": "word", "extension": ".docx", "format_name": "Word Document" } # Mock internal methods with patch.object(mixin, '_analyze_document_size') as mock_analyze: with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation: with patch.object(mixin, '_convert_docx_to_markdown') as mock_convert: mock_analyze.return_value = { "estimated_pages": 5, "estimated_size": "medium", "has_images": True, "has_complex_formatting": False } mock_recommendation.return_value = { "recommendation": "proceed", "message": "Document size is manageable for full conversion" } mock_convert.return_value = { "markdown": "# Test Document\n\nThis is test content.", "images": [], "metadata": {"conversion_method": "python-docx"}, "processing_notes": [] } result = await mixin.convert_to_markdown("/test.docx") # Verify structure assert "markdown" in result assert "metadata" in result assert "processing_info" in result # Verify content assert "# Test Document" in result["markdown"] assert result["metadata"]["format"] == "Word Document" assert "conversion_time" in result["metadata"] @pytest.mark.asyncio async def test_convert_to_markdown_parameter_handling(self, mixin): """Test convert_to_markdown parameter validation and handling.""" # Mock all dependencies for parameter testing with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve: with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate: with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect: mock_resolve.return_value = "/test.docx" mock_validate.return_value = {"is_valid": True, "errors": []} mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"} with patch.object(mixin, '_analyze_document_size') as mock_analyze: with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation: with patch.object(mixin, '_parse_page_range') as mock_parse_range: with patch.object(mixin, '_convert_docx_to_markdown') as mock_convert: mock_analyze.return_value = {"estimated_pages": 10} mock_recommendation.return_value = {"recommendation": "proceed"} mock_parse_range.return_value = [1, 2, 3, 4, 5] mock_convert.return_value = { "markdown": "# Test", "images": [], "metadata": {}, "processing_notes": [] } # Test with specific parameters result = await mixin.convert_to_markdown( file_path="/test.docx", include_images=False, image_mode="files", max_image_size=512000, preserve_structure=False, page_range="1-5", bookmark_name="Chapter1", chapter_name="Introduction", summary_only=False, output_dir="/output" ) # Verify conversion was called with correct parameters mock_convert.assert_called_once() args, kwargs = mock_convert.call_args # Note: Since bookmark_name is provided, page_numbers should be None # (bookmark takes precedence over page_range) @pytest.mark.asyncio async def test_convert_to_markdown_bookmark_priority(self, mixin): """Test that bookmark extraction takes priority over page ranges.""" with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve: with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate: with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect: mock_resolve.return_value = "/test.docx" mock_validate.return_value = {"is_valid": True, "errors": []} mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"} with patch.object(mixin, '_analyze_document_size'): with patch.object(mixin, '_get_processing_recommendation'): with patch.object(mixin, '_parse_page_range') as mock_parse_range: with patch.object(mixin, '_convert_docx_to_markdown') as mock_convert: mock_convert.return_value = { "markdown": "# Chapter Content", "images": [], "metadata": {}, "processing_notes": [] } # Call with both page_range and bookmark_name await mixin.convert_to_markdown( "/test.docx", page_range="1-10", bookmark_name="Chapter1" ) # Verify that page range parsing was NOT called # (because bookmark takes priority) mock_parse_range.assert_not_called() @pytest.mark.asyncio async def test_convert_to_markdown_summary_mode(self, mixin): """Test summary_only mode functionality.""" with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve: with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate: with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect: mock_resolve.return_value = "/test.docx" mock_validate.return_value = {"is_valid": True, "errors": []} mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"} with patch.object(mixin, '_analyze_document_size') as mock_analyze: with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation: mock_analyze.return_value = { "estimated_pages": 25, "estimated_size": "large", "has_images": True } mock_recommendation.return_value = { "recommendation": "summary_recommended", "message": "Large document - summary mode recommended" } result = await mixin.convert_to_markdown( "/test.docx", summary_only=True ) # Verify that summary information is returned assert "metadata" in result assert "processing_info" in result # In summary mode, conversion should not happen class TestWordSpecificHelpers: """Test Word-specific helper methods.""" @pytest.fixture def mixin(self): """Create WordMixin for testing.""" app = FastMCP("Test") return WordMixin(app) def test_parse_page_range_single_page(self, mixin): """Test parsing single page range.""" result = mixin._parse_page_range("5") assert result == [5] def test_parse_page_range_range(self, mixin): """Test parsing page ranges.""" result = mixin._parse_page_range("1-5") assert result == [1, 2, 3, 4, 5] def test_parse_page_range_complex(self, mixin): """Test parsing complex page ranges.""" result = mixin._parse_page_range("1,3,5-7,10") expected = [1, 3, 5, 6, 7, 10] assert result == expected def test_parse_page_range_invalid(self, mixin): """Test parsing invalid page ranges.""" with pytest.raises(OfficeFileError): mixin._parse_page_range("invalid") with pytest.raises(OfficeFileError): mixin._parse_page_range("10-5") # End before start def test_get_processing_recommendation(self, mixin): """Test processing recommendation logic.""" # Small document - proceed normally doc_analysis = {"estimated_pages": 3, "estimated_size": "small"} result = mixin._get_processing_recommendation(doc_analysis, "", False) assert result["recommendation"] == "proceed" # Large document without page range - suggest summary doc_analysis = {"estimated_pages": 25, "estimated_size": "large"} result = mixin._get_processing_recommendation(doc_analysis, "", False) assert result["recommendation"] == "summary_recommended" # Large document with page range - proceed doc_analysis = {"estimated_pages": 25, "estimated_size": "large"} result = mixin._get_processing_recommendation(doc_analysis, "1-5", False) assert result["recommendation"] == "proceed" # Summary mode requested - proceed with summary doc_analysis = {"estimated_pages": 25, "estimated_size": "large"} result = mixin._get_processing_recommendation(doc_analysis, "", True) assert result["recommendation"] == "proceed" class TestDirectToolAccess: """Test WordMixin integration with direct tool access.""" @pytest.mark.asyncio async def test_tool_execution_direct(self): """Test Word tool execution through direct tool access.""" app = FastMCP("Test App") WordMixin(app) # Test error handling via direct access (nonexistent file) convert_to_markdown_tool = app._tools["convert_to_markdown"] with pytest.raises(OfficeFileError): await convert_to_markdown_tool(file_path="/nonexistent/file.docx") @pytest.mark.asyncio async def test_tool_parameter_validation_direct(self): """Test parameter validation through direct access.""" app = FastMCP("Test App") WordMixin(app) # Test with various parameter combinations - wrong file type should be caught convert_to_markdown_tool = app._tools["convert_to_markdown"] # This should trigger the format validation and raise OfficeFileError with pytest.raises(OfficeFileError): await convert_to_markdown_tool( file_path="/test.xlsx", # Wrong file type include_images=True, image_mode="base64", preserve_structure=True ) class TestLegacyWordSupport: """Test support for legacy Word documents (.doc).""" @pytest.fixture def mixin(self): """Create WordMixin for testing.""" app = FastMCP("Test") return WordMixin(app) @pytest.mark.asyncio @patch('mcp_office_tools.utils.validation.resolve_office_file_path') @patch('mcp_office_tools.utils.validation.validate_office_file') @patch('mcp_office_tools.utils.file_detection.detect_format') async def test_convert_legacy_doc_to_markdown(self, mock_detect, mock_validate, mock_resolve, mixin): """Test conversion of legacy .doc files.""" mock_resolve.return_value = "/test.doc" mock_validate.return_value = {"is_valid": True, "errors": []} mock_detect.return_value = { "category": "word", "extension": ".doc", "format_name": "Word Document (Legacy)" } # Mock internal methods for legacy support with patch.object(mixin, '_analyze_document_size') as mock_analyze: with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation: with patch.object(mixin, '_convert_doc_to_markdown') as mock_convert: mock_analyze.return_value = {"estimated_pages": 3} mock_recommendation.return_value = {"recommendation": "proceed"} mock_convert.return_value = { "markdown": "# Legacy Document\n\nContent from .doc file", "images": [], "metadata": {"conversion_method": "legacy-parser"}, "processing_notes": ["Converted from legacy format"] } result = await mixin.convert_to_markdown("/test.doc") # Verify legacy conversion worked assert "# Legacy Document" in result["markdown"] assert "legacy-parser" in str(result["metadata"]) assert len(result["processing_info"]["processing_notes"]) > 0 if __name__ == "__main__": pytest.main([__file__, "-v"])