mcp-office-tools/tests/test_universal_mixin.py

"""Focused tests for UniversalMixin functionality.

This module tests the UniversalMixin in isolation, focusing on:
- Tool registration and functionality
- Error handling patterns
- Mocking strategies for file operations
- Async behavior validation
"""

import pytest
import tempfile
import os
from unittest.mock import AsyncMock, MagicMock, patch, mock_open
from pathlib import Path

from fastmcp import FastMCP
# FastMCP testing - using direct tool access

from mcwaddams.mixins.universal import UniversalMixin
from mcwaddams.utils import OfficeFileError


class TestUniversalMixinRegistration:
    """Test tool registration and basic setup."""

    def test_mixin_initialization(self):
        """Test UniversalMixin initializes correctly."""
        app = FastMCP("Test Universal")
        mixin = UniversalMixin()
        mixin.register_all(app)

        assert mixin is not None
        assert len(app._tool_manager._tools) == 7  # 7 universal tools (includes index_document)

    def test_tool_names_registered(self):
        """Test that all expected tool names are registered."""
        app = FastMCP("Test Universal")
        UniversalMixin().register_all(app)

        expected_tools = {
            "extract_text",
            "extract_images",
            "extract_metadata",
            "detect_office_format",
            "analyze_document_health",
            "get_supported_formats",
            "index_document"
        }

        registered_tools = set(app._tool_manager._tools.keys())
        assert expected_tools.issubset(registered_tools)


class TestExtractText:
    """Test extract_text tool functionality."""

    @pytest.fixture
    def mixin(self):
        """Create UniversalMixin for testing."""
        app = FastMCP("Test")
        mixin = UniversalMixin()
        mixin.register_all(app)
        return mixin

    @pytest.mark.asyncio
    async def test_extract_text_nonexistent_file(self, mixin):
        """Test extract_text with nonexistent file raises OfficeFileError."""
        with pytest.raises(OfficeFileError):
            await mixin.extract_text("/nonexistent/file.docx")

    @pytest.mark.asyncio
    @patch('mcwaddams.mixins.universal.resolve_office_file_path')
    @patch('mcwaddams.mixins.universal.validate_office_file')
    @patch('mcwaddams.mixins.universal.detect_format')
    async def test_extract_text_validation_failure(self, mock_detect, mock_validate, mock_resolve, mixin):
        """Test extract_text with validation failure."""
        mock_resolve.return_value = "/test.docx"
        mock_validate.return_value = {
            "is_valid": False,
            "errors": ["File is corrupted"]
        }

        with pytest.raises(OfficeFileError, match="Invalid file: File is corrupted"):
            await mixin.extract_text("/test.docx")

    @pytest.mark.asyncio
    @patch('mcwaddams.mixins.universal.resolve_office_file_path')
    @patch('mcwaddams.mixins.universal.validate_office_file')
    @patch('mcwaddams.mixins.universal.detect_format')
    async def test_extract_text_csv_success(self, mock_detect, mock_validate, mock_resolve, mixin):
        """Test successful CSV text extraction."""
        # Setup mocks
        mock_resolve.return_value = "/test.csv"
        mock_validate.return_value = {"is_valid": True, "errors": []}
        mock_detect.return_value = {
            "category": "data",
            "extension": ".csv",
            "format_name": "CSV"
        }

        # Mock internal methods
        with patch.object(mixin, '_extract_text_by_category') as mock_extract:
            mock_extract.return_value = {
                "text": "Name,Age\nJohn,30\nJane,25",
                "method_used": "pandas",
                "methods_tried": ["pandas"]
            }

            with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
                mock_metadata.return_value = {"file_size": 1024, "rows": 3}

                result = await mixin.extract_text("/test.csv")

                # Verify structure
                assert "text" in result
                assert "metadata" in result
                assert "document_metadata" in result

                # Verify content
                assert "John" in result["text"]
                assert result["metadata"]["extraction_method"] == "pandas"
                assert result["metadata"]["format"] == "CSV"
                assert result["document_metadata"]["file_size"] == 1024

    @pytest.mark.asyncio
    async def test_extract_text_parameter_handling(self, mixin):
        """Test extract_text parameter validation and handling."""
        # Mock all dependencies for parameter testing
        with patch('mcwaddams.mixins.universal.resolve_office_file_path') as mock_resolve:
            with patch('mcwaddams.mixins.universal.validate_office_file') as mock_validate:
                with patch('mcwaddams.mixins.universal.detect_format') as mock_detect:
                    mock_resolve.return_value = "/test.docx"
                    mock_validate.return_value = {"is_valid": True, "errors": []}
                    mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}

                    with patch.object(mixin, '_extract_text_by_category') as mock_extract:
                        mock_extract.return_value = {"text": "test", "method_used": "docx"}

                        with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
                            mock_metadata.return_value = {}

                            # Test with different parameters
                            result = await mixin.extract_text(
                                file_path="/test.docx",
                                preserve_formatting=True,
                                include_metadata=False,
                                method="primary"
                            )

                            # Verify the call was made with correct parameters
                            # _extract_text_by_category(local_path, extension, category, preserve_formatting, method)
                            mock_extract.assert_called_once()
                            args = mock_extract.call_args[0]
                            assert args[2] == "word"     # category (index 2)
                            assert args[3] == True       # preserve_formatting (index 3)
                            assert args[4] == "primary"  # method (index 4)


class TestExtractImages:
    """Test extract_images tool functionality."""

    @pytest.fixture
    def mixin(self):
        """Create UniversalMixin for testing."""
        app = FastMCP("Test")
        mixin = UniversalMixin()
        mixin.register_all(app)
        return mixin

    @pytest.mark.asyncio
    async def test_extract_images_nonexistent_file(self, mixin):
        """Test extract_images with nonexistent file."""
        with pytest.raises(OfficeFileError):
            await mixin.extract_images("/nonexistent/file.docx")

    @pytest.mark.asyncio
    @patch('mcwaddams.mixins.universal.resolve_office_file_path')
    @patch('mcwaddams.mixins.universal.validate_office_file')
    @patch('mcwaddams.mixins.universal.detect_format')
    async def test_extract_images_unsupported_format(self, mock_detect, mock_validate, mock_resolve, mixin):
        """Test extract_images with unsupported format (CSV) returns empty list."""
        mock_resolve.return_value = "/test.csv"
        mock_validate.return_value = {"is_valid": True, "errors": []}
        mock_detect.return_value = {"category": "data", "extension": ".csv", "format_name": "CSV"}

        # Mock the internal method that returns empty for unsupported formats
        with patch.object(mixin, '_extract_images_by_category') as mock_extract:
            mock_extract.return_value = []  # CSV returns empty list, not an error

            result = await mixin.extract_images("/test.csv")

            # Verify structure
            assert "images" in result
            assert "metadata" in result
            assert result["images"] == []
            assert result["metadata"]["image_count"] == 0


class TestGetSupportedFormats:
    """Test get_supported_formats tool functionality."""

    @pytest.fixture
    def mixin(self):
        """Create UniversalMixin for testing."""
        app = FastMCP("Test")
        mixin = UniversalMixin()
        mixin.register_all(app)
        return mixin

    @pytest.mark.asyncio
    async def test_get_supported_formats_structure(self, mixin):
        """Test get_supported_formats returns correct structure."""
        result = await mixin.get_supported_formats()

        # Verify top-level structure
        assert isinstance(result, dict)
        required_keys = {"supported_extensions", "format_details", "categories", "total_formats"}
        assert required_keys.issubset(result.keys())

        # Verify supported extensions include common formats
        extensions = result["supported_extensions"]
        assert isinstance(extensions, list)
        expected_extensions = {".docx", ".xlsx", ".pptx", ".doc", ".xls", ".ppt", ".csv"}
        assert expected_extensions.issubset(set(extensions))

        # Verify categories
        categories = result["categories"]
        assert isinstance(categories, dict)
        expected_categories = {"word", "excel", "powerpoint"}
        assert expected_categories.issubset(categories.keys())

        # Verify total_formats is correct
        assert result["total_formats"] == len(extensions)

    @pytest.mark.asyncio
    async def test_get_supported_formats_details(self, mixin):
        """Test get_supported_formats includes detailed format information."""
        result = await mixin.get_supported_formats()

        format_details = result["format_details"]
        assert isinstance(format_details, dict)

        # Check that .docx details are present and complete
        if ".docx" in format_details:
            docx_details = format_details[".docx"]
            expected_docx_keys = {"category", "legacy_format", "text_extraction", "image_extraction", "metadata_extraction", "markdown_conversion"}
            assert expected_docx_keys.issubset(docx_details.keys())
            # Verify Word document specifics
            assert docx_details["category"] == "word"
            assert docx_details["legacy_format"] is False
            assert docx_details["markdown_conversion"] is True


class TestDocumentHealth:
    """Test analyze_document_health tool functionality."""

    @pytest.fixture
    def mixin(self):
        """Create UniversalMixin for testing."""
        app = FastMCP("Test")
        mixin = UniversalMixin()
        mixin.register_all(app)
        return mixin

    @pytest.mark.asyncio
    @patch('mcwaddams.mixins.universal.resolve_office_file_path')
    @patch('mcwaddams.mixins.universal.validate_office_file')
    @patch('mcwaddams.mixins.universal.detect_format')
    async def test_analyze_document_health_success(self, mock_detect, mock_validate, mock_resolve, mixin):
        """Test successful document health analysis."""
        mock_resolve.return_value = "/test.docx"
        mock_validate.return_value = {
            "is_valid": True,
            "errors": [],
            "warnings": [],
            "password_protected": False
        }
        mock_detect.return_value = {
            "category": "word",
            "extension": ".docx",
            "format_name": "Word Document",
            "is_legacy": False,
            "structure": {"estimated_complexity": "simple"}
        }

        result = await mixin.analyze_document_health("/test.docx")

        # Verify structure matches actual implementation
        assert "overall_health" in result
        assert "validation" in result
        assert "format_info" in result
        assert "analysis_time" in result
        assert "recommendations" in result

        # Verify content
        assert result["overall_health"] == "healthy"
        assert result["validation"]["is_valid"] is True
        assert result["format_info"]["category"] == "word"
        assert len(result["recommendations"]) > 0


class TestDirectToolAccess:
    """Test mixin integration with direct tool access."""

    @pytest.mark.asyncio
    async def test_tool_execution_direct(self):
        """Test tool execution through direct tool access."""
        app = FastMCP("Test App")
        UniversalMixin().register_all(app)

        # Test get_supported_formats via direct access
        get_supported_formats_tool = app._tool_manager._tools["get_supported_formats"]
        result = await get_supported_formats_tool.fn()

        assert "supported_extensions" in result
        assert "format_details" in result
        assert isinstance(result["supported_extensions"], list)

    @pytest.mark.asyncio
    async def test_tool_error_direct(self):
        """Test tool error handling via direct access."""
        app = FastMCP("Test App")
        UniversalMixin().register_all(app)

        # Test error handling via direct access
        extract_text_tool = app._tool_manager._tools["extract_text"]
        with pytest.raises(OfficeFileError):
            await extract_text_tool.fn(file_path="/nonexistent/file.docx")


class TestMockingPatterns:
    """Demonstrate various mocking patterns for file operations."""

    @pytest.fixture
    def mixin(self):
        """Create UniversalMixin for testing."""
        app = FastMCP("Test")
        mixin = UniversalMixin()
        mixin.register_all(app)
        return mixin

    @pytest.mark.asyncio
    async def test_comprehensive_mocking_pattern(self, mixin):
        """Demonstrate comprehensive mocking for complex tool testing."""
        # Mock all external dependencies
        with patch('mcwaddams.mixins.universal.resolve_office_file_path') as mock_resolve:
            with patch('mcwaddams.mixins.universal.validate_office_file') as mock_validate:
                with patch('mcwaddams.mixins.universal.detect_format') as mock_detect:

                    # Setup realistic mock responses
                    mock_resolve.return_value = "/realistic/path/document.docx"
                    mock_validate.return_value = {
                        "is_valid": True,
                        "errors": [],
                        "warnings": ["File is large"],
                        "password_protected": False,
                        "file_size": 1048576  # 1MB
                    }
                    mock_detect.return_value = {
                        "category": "word",
                        "extension": ".docx",
                        "format_name": "Microsoft Word Document",
                        "mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                        "is_legacy": False,
                        "structure": {
                            "estimated_complexity": "moderate",
                            "has_images": True,
                            "has_tables": True
                        }
                    }

                    # Mock internal processing methods
                    with patch.object(mixin, '_extract_text_by_category') as mock_extract:
                        mock_extract.return_value = {
                            "text": "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting.",
                            "method_used": "python-docx",
                            "methods_tried": ["python-docx"],
                            "formatted_sections": [
                                {"type": "heading", "text": "Document Title", "level": 1},
                                {"type": "paragraph", "text": "This is comprehensive test content..."}
                            ]
                        }

                        with patch.object(mixin, '_extract_basic_metadata') as mock_metadata:
                            mock_metadata.return_value = {
                                "title": "Test Document",
                                "author": "Test Author",
                                "created": "2024-01-01T10:00:00Z",
                                "modified": "2024-01-15T14:30:00Z",
                                "word_count": 1247,
                                "page_count": 3
                            }

                            # Execute with realistic parameters
                            result = await mixin.extract_text(
                                file_path="/test/document.docx",
                                preserve_formatting=True,
                                include_metadata=True,
                                method="auto"
                            )

                            # Comprehensive assertions
                            assert result["text"] == "This is comprehensive test content with multiple paragraphs.\n\nIncluding headers and formatting."
                            assert result["metadata"]["extraction_method"] == "python-docx"
                            assert result["metadata"]["format"] == "Microsoft Word Document"
                            assert "extraction_time" in result["metadata"]
                            assert result["document_metadata"]["author"] == "Test Author"
                            assert "structure" in result  # Because preserve_formatting=True

                            # Verify all mocks were called appropriately
                            mock_resolve.assert_called_once_with("/test/document.docx")
                            mock_validate.assert_called_once_with("/realistic/path/document.docx")
                            mock_detect.assert_called_once_with("/realistic/path/document.docx")
                            mock_extract.assert_called_once()
                            mock_metadata.assert_called_once()


if __name__ == "__main__":
    pytest.main([__file__, "-v"])