mcp-office-tools/tests/test_word_mixin.py
Ryan Malloy 0748eec48d Fix FastMCP stdio server import
- Use app.run_stdio_async() instead of deprecated stdio_server import
- Aligns with FastMCP 2.11.3 API
- Server now starts correctly with uv run mcp-office-tools
- Maintains all MCPMixin functionality and tool registration
2025-09-26 15:49:00 -06:00

381 lines
18 KiB
Python

"""Focused tests for WordMixin functionality.
This module tests the WordMixin in isolation, focusing on:
- Word-specific tool functionality
- Markdown conversion capabilities
- Chapter and bookmark extraction
- Parameter validation for Word-specific features
"""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from pathlib import Path
from fastmcp import FastMCP
# FastMCP testing - using direct tool access
from mcp_office_tools.mixins.word import WordMixin
from mcp_office_tools.utils import OfficeFileError
class TestWordMixinRegistration:
"""Test WordMixin tool registration and setup."""
def test_mixin_initialization(self):
"""Test WordMixin initializes correctly."""
app = FastMCP("Test Word")
mixin = WordMixin(app)
assert mixin.app == app
assert len(app._tools) == 1 # 1 word tool
def test_tool_names_registered(self):
"""Test that Word-specific tools are registered."""
app = FastMCP("Test Word")
WordMixin(app)
expected_tools = {"convert_to_markdown"}
registered_tools = set(app._tools.keys())
assert expected_tools.issubset(registered_tools)
class TestConvertToMarkdown:
"""Test convert_to_markdown tool functionality."""
@pytest.fixture
def mixin(self):
"""Create WordMixin for testing."""
app = FastMCP("Test")
return WordMixin(app)
@pytest.mark.asyncio
async def test_convert_to_markdown_nonexistent_file(self, mixin):
"""Test convert_to_markdown with nonexistent file."""
with pytest.raises(OfficeFileError):
await mixin.convert_to_markdown("/nonexistent/file.docx")
@pytest.mark.asyncio
@patch('mcp_office_tools.utils.validation.resolve_office_file_path')
@patch('mcp_office_tools.utils.validation.validate_office_file')
@patch('mcp_office_tools.utils.file_detection.detect_format')
async def test_convert_to_markdown_validation_failure(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test convert_to_markdown with validation failure."""
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {
"is_valid": False,
"errors": ["File is password protected"]
}
with pytest.raises(OfficeFileError, match="Invalid file: File is password protected"):
await mixin.convert_to_markdown("/test.docx")
@pytest.mark.asyncio
@patch('mcp_office_tools.utils.validation.resolve_office_file_path')
@patch('mcp_office_tools.utils.validation.validate_office_file')
@patch('mcp_office_tools.utils.file_detection.detect_format')
async def test_convert_to_markdown_non_word_document(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test that non-Word documents are rejected."""
mock_resolve.return_value = "/test.xlsx"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {
"category": "excel",
"extension": ".xlsx",
"format_name": "Excel"
}
with pytest.raises(OfficeFileError, match="Markdown conversion currently only supports Word documents"):
await mixin.convert_to_markdown("/test.xlsx")
@pytest.mark.asyncio
@patch('mcp_office_tools.utils.validation.resolve_office_file_path')
@patch('mcp_office_tools.utils.validation.validate_office_file')
@patch('mcp_office_tools.utils.file_detection.detect_format')
async def test_convert_to_markdown_docx_success(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test successful DOCX to markdown conversion."""
# Setup mocks
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {
"category": "word",
"extension": ".docx",
"format_name": "Word Document"
}
# Mock internal methods
with patch.object(mixin, '_analyze_document_size') as mock_analyze:
with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation:
with patch.object(mixin, '_convert_docx_to_markdown') as mock_convert:
mock_analyze.return_value = {
"estimated_pages": 5,
"estimated_size": "medium",
"has_images": True,
"has_complex_formatting": False
}
mock_recommendation.return_value = {
"recommendation": "proceed",
"message": "Document size is manageable for full conversion"
}
mock_convert.return_value = {
"markdown": "# Test Document\n\nThis is test content.",
"images": [],
"metadata": {"conversion_method": "python-docx"},
"processing_notes": []
}
result = await mixin.convert_to_markdown("/test.docx")
# Verify structure
assert "markdown" in result
assert "metadata" in result
assert "processing_info" in result
# Verify content
assert "# Test Document" in result["markdown"]
assert result["metadata"]["format"] == "Word Document"
assert "conversion_time" in result["metadata"]
@pytest.mark.asyncio
async def test_convert_to_markdown_parameter_handling(self, mixin):
"""Test convert_to_markdown parameter validation and handling."""
# Mock all dependencies for parameter testing
with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve:
with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate:
with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect:
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}
with patch.object(mixin, '_analyze_document_size') as mock_analyze:
with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation:
with patch.object(mixin, '_parse_page_range') as mock_parse_range:
with patch.object(mixin, '_convert_docx_to_markdown') as mock_convert:
mock_analyze.return_value = {"estimated_pages": 10}
mock_recommendation.return_value = {"recommendation": "proceed"}
mock_parse_range.return_value = [1, 2, 3, 4, 5]
mock_convert.return_value = {
"markdown": "# Test",
"images": [],
"metadata": {},
"processing_notes": []
}
# Test with specific parameters
result = await mixin.convert_to_markdown(
file_path="/test.docx",
include_images=False,
image_mode="files",
max_image_size=512000,
preserve_structure=False,
page_range="1-5",
bookmark_name="Chapter1",
chapter_name="Introduction",
summary_only=False,
output_dir="/output"
)
# Verify conversion was called with correct parameters
mock_convert.assert_called_once()
args, kwargs = mock_convert.call_args
# Note: Since bookmark_name is provided, page_numbers should be None
# (bookmark takes precedence over page_range)
@pytest.mark.asyncio
async def test_convert_to_markdown_bookmark_priority(self, mixin):
"""Test that bookmark extraction takes priority over page ranges."""
with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve:
with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate:
with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect:
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}
with patch.object(mixin, '_analyze_document_size'):
with patch.object(mixin, '_get_processing_recommendation'):
with patch.object(mixin, '_parse_page_range') as mock_parse_range:
with patch.object(mixin, '_convert_docx_to_markdown') as mock_convert:
mock_convert.return_value = {
"markdown": "# Chapter Content",
"images": [],
"metadata": {},
"processing_notes": []
}
# Call with both page_range and bookmark_name
await mixin.convert_to_markdown(
"/test.docx",
page_range="1-10",
bookmark_name="Chapter1"
)
# Verify that page range parsing was NOT called
# (because bookmark takes priority)
mock_parse_range.assert_not_called()
@pytest.mark.asyncio
async def test_convert_to_markdown_summary_mode(self, mixin):
"""Test summary_only mode functionality."""
with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve:
with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate:
with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect:
mock_resolve.return_value = "/test.docx"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}
with patch.object(mixin, '_analyze_document_size') as mock_analyze:
with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation:
mock_analyze.return_value = {
"estimated_pages": 25,
"estimated_size": "large",
"has_images": True
}
mock_recommendation.return_value = {
"recommendation": "summary_recommended",
"message": "Large document - summary mode recommended"
}
result = await mixin.convert_to_markdown(
"/test.docx",
summary_only=True
)
# Verify that summary information is returned
assert "metadata" in result
assert "processing_info" in result
# In summary mode, conversion should not happen
class TestWordSpecificHelpers:
"""Test Word-specific helper methods."""
@pytest.fixture
def mixin(self):
"""Create WordMixin for testing."""
app = FastMCP("Test")
return WordMixin(app)
def test_parse_page_range_single_page(self, mixin):
"""Test parsing single page range."""
result = mixin._parse_page_range("5")
assert result == [5]
def test_parse_page_range_range(self, mixin):
"""Test parsing page ranges."""
result = mixin._parse_page_range("1-5")
assert result == [1, 2, 3, 4, 5]
def test_parse_page_range_complex(self, mixin):
"""Test parsing complex page ranges."""
result = mixin._parse_page_range("1,3,5-7,10")
expected = [1, 3, 5, 6, 7, 10]
assert result == expected
def test_parse_page_range_invalid(self, mixin):
"""Test parsing invalid page ranges."""
with pytest.raises(OfficeFileError):
mixin._parse_page_range("invalid")
with pytest.raises(OfficeFileError):
mixin._parse_page_range("10-5") # End before start
def test_get_processing_recommendation(self, mixin):
"""Test processing recommendation logic."""
# Small document - proceed normally
doc_analysis = {"estimated_pages": 3, "estimated_size": "small"}
result = mixin._get_processing_recommendation(doc_analysis, "", False)
assert result["recommendation"] == "proceed"
# Large document without page range - suggest summary
doc_analysis = {"estimated_pages": 25, "estimated_size": "large"}
result = mixin._get_processing_recommendation(doc_analysis, "", False)
assert result["recommendation"] == "summary_recommended"
# Large document with page range - proceed
doc_analysis = {"estimated_pages": 25, "estimated_size": "large"}
result = mixin._get_processing_recommendation(doc_analysis, "1-5", False)
assert result["recommendation"] == "proceed"
# Summary mode requested - proceed with summary
doc_analysis = {"estimated_pages": 25, "estimated_size": "large"}
result = mixin._get_processing_recommendation(doc_analysis, "", True)
assert result["recommendation"] == "proceed"
class TestDirectToolAccess:
"""Test WordMixin integration with direct tool access."""
@pytest.mark.asyncio
async def test_tool_execution_direct(self):
"""Test Word tool execution through direct tool access."""
app = FastMCP("Test App")
WordMixin(app)
# Test error handling via direct access (nonexistent file)
convert_to_markdown_tool = app._tools["convert_to_markdown"]
with pytest.raises(OfficeFileError):
await convert_to_markdown_tool(file_path="/nonexistent/file.docx")
@pytest.mark.asyncio
async def test_tool_parameter_validation_direct(self):
"""Test parameter validation through direct access."""
app = FastMCP("Test App")
WordMixin(app)
# Test with various parameter combinations - wrong file type should be caught
convert_to_markdown_tool = app._tools["convert_to_markdown"]
# This should trigger the format validation and raise OfficeFileError
with pytest.raises(OfficeFileError):
await convert_to_markdown_tool(
file_path="/test.xlsx", # Wrong file type
include_images=True,
image_mode="base64",
preserve_structure=True
)
class TestLegacyWordSupport:
"""Test support for legacy Word documents (.doc)."""
@pytest.fixture
def mixin(self):
"""Create WordMixin for testing."""
app = FastMCP("Test")
return WordMixin(app)
@pytest.mark.asyncio
@patch('mcp_office_tools.utils.validation.resolve_office_file_path')
@patch('mcp_office_tools.utils.validation.validate_office_file')
@patch('mcp_office_tools.utils.file_detection.detect_format')
async def test_convert_legacy_doc_to_markdown(self, mock_detect, mock_validate, mock_resolve, mixin):
"""Test conversion of legacy .doc files."""
mock_resolve.return_value = "/test.doc"
mock_validate.return_value = {"is_valid": True, "errors": []}
mock_detect.return_value = {
"category": "word",
"extension": ".doc",
"format_name": "Word Document (Legacy)"
}
# Mock internal methods for legacy support
with patch.object(mixin, '_analyze_document_size') as mock_analyze:
with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation:
with patch.object(mixin, '_convert_doc_to_markdown') as mock_convert:
mock_analyze.return_value = {"estimated_pages": 3}
mock_recommendation.return_value = {"recommendation": "proceed"}
mock_convert.return_value = {
"markdown": "# Legacy Document\n\nContent from .doc file",
"images": [],
"metadata": {"conversion_method": "legacy-parser"},
"processing_notes": ["Converted from legacy format"]
}
result = await mixin.convert_to_markdown("/test.doc")
# Verify legacy conversion worked
assert "# Legacy Document" in result["markdown"]
assert "legacy-parser" in str(result["metadata"])
assert len(result["processing_info"]["processing_notes"]) > 0
if __name__ == "__main__":
pytest.main([__file__, "-v"])