- Comprehensive Microsoft Office document processing server
- Support for Word (.docx, .doc), Excel (.xlsx, .xls), PowerPoint (.pptx, .ppt), CSV
- 6 universal tools: extract_text, extract_images, extract_metadata, detect_office_format, analyze_document_health, get_supported_formats
- Multi-library fallback system for robust processing
- URL support with intelligent caching
- Legacy Office format support (97-2003)
- FastMCP integration with async architecture
- Production ready with comprehensive documentation
🤖 Generated with Claude Code (claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
257 lines
8.8 KiB
Python
257 lines
8.8 KiB
Python
"""Test suite for MCP Office Tools server."""
|
|
|
|
import pytest
|
|
import tempfile
|
|
import os
|
|
from pathlib import Path
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
from mcp_office_tools.server import app
|
|
from mcp_office_tools.utils import OfficeFileError
|
|
|
|
|
|
class TestServerInitialization:
|
|
"""Test server initialization and basic functionality."""
|
|
|
|
def test_app_creation(self):
|
|
"""Test that FastMCP app is created correctly."""
|
|
assert app is not None
|
|
assert hasattr(app, 'tool')
|
|
|
|
def test_tools_registered(self):
|
|
"""Test that all main tools are registered."""
|
|
# FastMCP registers tools via decorators, so they should be available
|
|
# This is a basic check that the module loads without errors
|
|
from mcp_office_tools.server import (
|
|
extract_text,
|
|
extract_images,
|
|
extract_metadata,
|
|
detect_office_format,
|
|
analyze_document_health,
|
|
get_supported_formats
|
|
)
|
|
|
|
assert callable(extract_text)
|
|
assert callable(extract_images)
|
|
assert callable(extract_metadata)
|
|
assert callable(detect_office_format)
|
|
assert callable(analyze_document_health)
|
|
assert callable(get_supported_formats)
|
|
|
|
|
|
class TestGetSupportedFormats:
|
|
"""Test supported formats listing."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_supported_formats(self):
|
|
"""Test getting supported formats."""
|
|
from mcp_office_tools.server import get_supported_formats
|
|
|
|
result = await get_supported_formats()
|
|
|
|
assert isinstance(result, dict)
|
|
assert "supported_extensions" in result
|
|
assert "format_details" in result
|
|
assert "categories" in result
|
|
assert "total_formats" in result
|
|
|
|
# Check that common formats are supported
|
|
extensions = result["supported_extensions"]
|
|
assert ".docx" in extensions
|
|
assert ".xlsx" in extensions
|
|
assert ".pptx" in extensions
|
|
assert ".doc" in extensions
|
|
assert ".xls" in extensions
|
|
assert ".ppt" in extensions
|
|
assert ".csv" in extensions
|
|
|
|
# Check categories
|
|
categories = result["categories"]
|
|
assert "word" in categories
|
|
assert "excel" in categories
|
|
assert "powerpoint" in categories
|
|
|
|
|
|
class TestTextExtraction:
|
|
"""Test text extraction functionality."""
|
|
|
|
def create_mock_docx(self):
|
|
"""Create a mock DOCX file for testing."""
|
|
temp_file = tempfile.NamedTemporaryFile(suffix='.docx', delete=False)
|
|
# Create a minimal ZIP structure that looks like a DOCX
|
|
import zipfile
|
|
with zipfile.ZipFile(temp_file.name, 'w') as zf:
|
|
zf.writestr('word/document.xml', '<?xml version="1.0"?><document><body><p><t>Test content</t></p></body></document>')
|
|
zf.writestr('docProps/core.xml', '<?xml version="1.0"?><coreProperties></coreProperties>')
|
|
return temp_file.name
|
|
|
|
def create_mock_csv(self):
|
|
"""Create a mock CSV file for testing."""
|
|
temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w')
|
|
temp_file.write("Name,Age,City\nJohn,30,New York\nJane,25,Boston\n")
|
|
temp_file.close()
|
|
return temp_file.name
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_text_nonexistent_file(self):
|
|
"""Test text extraction with nonexistent file."""
|
|
from mcp_office_tools.server import extract_text
|
|
|
|
with pytest.raises(OfficeFileError):
|
|
await extract_text("/nonexistent/file.docx")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_text_unsupported_format(self):
|
|
"""Test text extraction with unsupported format."""
|
|
from mcp_office_tools.server import extract_text
|
|
|
|
# Create a temporary file with unsupported extension
|
|
temp_file = tempfile.NamedTemporaryFile(suffix='.unsupported', delete=False)
|
|
temp_file.close()
|
|
|
|
try:
|
|
with pytest.raises(OfficeFileError):
|
|
await extract_text(temp_file.name)
|
|
finally:
|
|
os.unlink(temp_file.name)
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('mcp_office_tools.utils.validation.magic.from_file')
|
|
async def test_extract_text_csv_success(self, mock_magic):
|
|
"""Test successful text extraction from CSV."""
|
|
from mcp_office_tools.server import extract_text
|
|
|
|
# Mock magic to return CSV MIME type
|
|
mock_magic.return_value = 'text/csv'
|
|
|
|
csv_file = self.create_mock_csv()
|
|
|
|
try:
|
|
result = await extract_text(csv_file)
|
|
|
|
assert isinstance(result, dict)
|
|
assert "text" in result
|
|
assert "method_used" in result
|
|
assert "character_count" in result
|
|
assert "word_count" in result
|
|
assert "extraction_time" in result
|
|
assert "format_info" in result
|
|
|
|
# Check that CSV content is extracted
|
|
assert "John" in result["text"]
|
|
assert "Name" in result["text"]
|
|
assert result["method_used"] == "pandas"
|
|
|
|
finally:
|
|
os.unlink(csv_file)
|
|
|
|
|
|
class TestImageExtraction:
|
|
"""Test image extraction functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_images_nonexistent_file(self):
|
|
"""Test image extraction with nonexistent file."""
|
|
from mcp_office_tools.server import extract_images
|
|
|
|
with pytest.raises(OfficeFileError):
|
|
await extract_images("/nonexistent/file.docx")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_images_csv_unsupported(self):
|
|
"""Test image extraction with CSV (unsupported for images)."""
|
|
from mcp_office_tools.server import extract_images
|
|
|
|
temp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w')
|
|
temp_file.write("Name,Age\nJohn,30\n")
|
|
temp_file.close()
|
|
|
|
try:
|
|
with pytest.raises(OfficeFileError):
|
|
await extract_images(temp_file.name)
|
|
finally:
|
|
os.unlink(temp_file.name)
|
|
|
|
|
|
class TestMetadataExtraction:
|
|
"""Test metadata extraction functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_metadata_nonexistent_file(self):
|
|
"""Test metadata extraction with nonexistent file."""
|
|
from mcp_office_tools.server import extract_metadata
|
|
|
|
with pytest.raises(OfficeFileError):
|
|
await extract_metadata("/nonexistent/file.docx")
|
|
|
|
|
|
class TestFormatDetection:
|
|
"""Test format detection functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_detect_office_format_nonexistent_file(self):
|
|
"""Test format detection with nonexistent file."""
|
|
from mcp_office_tools.server import detect_office_format
|
|
|
|
with pytest.raises(OfficeFileError):
|
|
await detect_office_format("/nonexistent/file.docx")
|
|
|
|
|
|
class TestDocumentHealth:
|
|
"""Test document health analysis functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_analyze_document_health_nonexistent_file(self):
|
|
"""Test health analysis with nonexistent file."""
|
|
from mcp_office_tools.server import analyze_document_health
|
|
|
|
with pytest.raises(OfficeFileError):
|
|
await analyze_document_health("/nonexistent/file.docx")
|
|
|
|
|
|
class TestUtilityFunctions:
|
|
"""Test utility functions."""
|
|
|
|
def test_calculate_health_score(self):
|
|
"""Test health score calculation."""
|
|
from mcp_office_tools.server import _calculate_health_score
|
|
|
|
# Mock validation and format info
|
|
validation = {
|
|
"is_valid": True,
|
|
"errors": [],
|
|
"warnings": [],
|
|
"password_protected": False
|
|
}
|
|
format_info = {
|
|
"is_legacy": False,
|
|
"structure": {"estimated_complexity": "simple"}
|
|
}
|
|
|
|
score = _calculate_health_score(validation, format_info)
|
|
assert isinstance(score, int)
|
|
assert 1 <= score <= 10
|
|
assert score == 10 # Perfect score for healthy document
|
|
|
|
def test_get_health_recommendations(self):
|
|
"""Test health recommendations."""
|
|
from mcp_office_tools.server import _get_health_recommendations
|
|
|
|
# Mock validation and format info
|
|
validation = {
|
|
"errors": [],
|
|
"password_protected": False
|
|
}
|
|
format_info = {
|
|
"is_legacy": False,
|
|
"structure": {"estimated_complexity": "simple"}
|
|
}
|
|
|
|
recommendations = _get_health_recommendations(validation, format_info)
|
|
assert isinstance(recommendations, list)
|
|
assert len(recommendations) > 0
|
|
assert "Document appears healthy" in recommendations[0]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__]) |