mcp-office-tools/tests/test_server.py

"""Test suite for MCP Office Tools server."""

import pytest
import tempfile
import os
from pathlib import Path
from unittest.mock import patch, MagicMock

from mcp_office_tools.server import app
from mcp_office_tools.utils import OfficeFileError


class TestServerInitialization:
    """Test server initialization and basic functionality."""

    def test_app_creation(self):
        """Test that FastMCP app is created correctly."""
        assert app is not None
        assert hasattr(app, 'tool')

    def test_tools_registered(self):
        """Test that all main tools are registered."""
        # FastMCP registers tools via decorators, so they should be available
        # This is a basic check that the module loads without errors
        from mcp_office_tools.server import (
            extract_text,
            extract_images,
            extract_metadata,
            detect_office_format,
            analyze_document_health,
            get_supported_formats
        )

        assert callable(extract_text)
        assert callable(extract_images)
        assert callable(extract_metadata)
        assert callable(detect_office_format)
        assert callable(analyze_document_health)
        assert callable(get_supported_formats)


class TestGetSupportedFormats:
    """Test supported formats listing."""

    @pytest.mark.asyncio
    async def test_get_supported_formats(self):
        """Test getting supported formats."""
        from mcp_office_tools.server import get_supported_formats

        result = await get_supported_formats()

        assert isinstance(result, dict)
        assert "supported_extensions" in result
        assert "format_details" in result
        assert "categories" in result
        assert "total_formats" in result

        # Check that common formats are supported
        extensions = result["supported_extensions"]
        assert ".docx" in extensions
        assert ".xlsx" in extensions
        assert ".pptx" in extensions
        assert ".doc" in extensions
        assert ".xls" in extensions
        assert ".ppt" in extensions
        assert ".csv" in extensions

        # Check categories
        categories = result["categories"]
        assert "word" in categories
        assert "excel" in categories
        assert "powerpoint" in categories


class TestTextExtraction:
    """Test text extraction functionality."""

    def create_mock_docx(self):
        """Create a mock DOCX file for testing."""
        temp_file = tempfile.NamedTemporaryFile(suffix='.docx', delete=False)
        # Create a minimal ZIP structure that looks like a DOCX
        import zipfile
        with zipfile.ZipFile(temp_file.name, 'w') as zf:
            zf.writestr('word/document.xml', '<?xml version="1.0"?><document><body><p><t>Test content</t></p></body></document>')
            zf.writestr('docProps/core.xml', '<?xml version="1.0"?><coreProperties></coreProperties>')
        return temp_file.name

    def create_mock_csv(self):
        """Create a mock CSV file for testing."""
        temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w')
        temp_file.write("Name,Age,City\nJohn,30,New York\nJane,25,Boston\n")
        temp_file.close()
        return temp_file.name

    @pytest.mark.asyncio
    async def test_extract_text_nonexistent_file(self):
        """Test text extraction with nonexistent file."""
        from mcp_office_tools.server import extract_text

        with pytest.raises(OfficeFileError):
            await extract_text("/nonexistent/file.docx")

    @pytest.mark.asyncio
    async def test_extract_text_unsupported_format(self):
        """Test text extraction with unsupported format."""
        from mcp_office_tools.server import extract_text

        # Create a temporary file with unsupported extension
        temp_file = tempfile.NamedTemporaryFile(suffix='.unsupported', delete=False)
        temp_file.close()

        try:
            with pytest.raises(OfficeFileError):
                await extract_text(temp_file.name)
        finally:
            os.unlink(temp_file.name)

    @pytest.mark.asyncio
    @patch('mcp_office_tools.utils.validation.magic.from_file')
    async def test_extract_text_csv_success(self, mock_magic):
        """Test successful text extraction from CSV."""
        from mcp_office_tools.server import extract_text

        # Mock magic to return CSV MIME type
        mock_magic.return_value = 'text/csv'

        csv_file = self.create_mock_csv()

        try:
            result = await extract_text(csv_file)

            assert isinstance(result, dict)
            assert "text" in result
            assert "method_used" in result
            assert "character_count" in result
            assert "word_count" in result
            assert "extraction_time" in result
            assert "format_info" in result

            # Check that CSV content is extracted
            assert "John" in result["text"]
            assert "Name" in result["text"]
            assert result["method_used"] == "pandas"

        finally:
            os.unlink(csv_file)


class TestImageExtraction:
    """Test image extraction functionality."""

    @pytest.mark.asyncio
    async def test_extract_images_nonexistent_file(self):
        """Test image extraction with nonexistent file."""
        from mcp_office_tools.server import extract_images

        with pytest.raises(OfficeFileError):
            await extract_images("/nonexistent/file.docx")

    @pytest.mark.asyncio
    async def test_extract_images_csv_unsupported(self):
        """Test image extraction with CSV (unsupported for images)."""
        from mcp_office_tools.server import extract_images

        temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w')
        temp_file.write("Name,Age\nJohn,30\n")
        temp_file.close()

        try:
            with pytest.raises(OfficeFileError):
                await extract_images(temp_file.name)
        finally:
            os.unlink(temp_file.name)


class TestMetadataExtraction:
    """Test metadata extraction functionality."""

    @pytest.mark.asyncio
    async def test_extract_metadata_nonexistent_file(self):
        """Test metadata extraction with nonexistent file."""
        from mcp_office_tools.server import extract_metadata

        with pytest.raises(OfficeFileError):
            await extract_metadata("/nonexistent/file.docx")


class TestFormatDetection:
    """Test format detection functionality."""

    @pytest.mark.asyncio
    async def test_detect_office_format_nonexistent_file(self):
        """Test format detection with nonexistent file."""
        from mcp_office_tools.server import detect_office_format

        with pytest.raises(OfficeFileError):
            await detect_office_format("/nonexistent/file.docx")


class TestDocumentHealth:
    """Test document health analysis functionality."""

    @pytest.mark.asyncio
    async def test_analyze_document_health_nonexistent_file(self):
        """Test health analysis with nonexistent file."""
        from mcp_office_tools.server import analyze_document_health

        with pytest.raises(OfficeFileError):
            await analyze_document_health("/nonexistent/file.docx")


class TestUtilityFunctions:
    """Test utility functions."""

    def test_calculate_health_score(self):
        """Test health score calculation."""
        from mcp_office_tools.server import _calculate_health_score

        # Mock validation and format info
        validation = {
            "is_valid": True,
            "errors": [],
            "warnings": [],
            "password_protected": False
        }
        format_info = {
            "is_legacy": False,
            "structure": {"estimated_complexity": "simple"}
        }

        score = _calculate_health_score(validation, format_info)
        assert isinstance(score, int)
        assert 1 <= score <= 10
        assert score == 10  # Perfect score for healthy document

    def test_get_health_recommendations(self):
        """Test health recommendations."""
        from mcp_office_tools.server import _get_health_recommendations

        # Mock validation and format info
        validation = {
            "errors": [],
            "password_protected": False
        }
        format_info = {
            "is_legacy": False,
            "structure": {"estimated_complexity": "simple"}
        }

        recommendations = _get_health_recommendations(validation, format_info)
        assert isinstance(recommendations, list)
        assert len(recommendations) > 0
        assert "Document appears healthy" in recommendations[0]


if __name__ == "__main__":
    pytest.main([__file__])