- Use app.run_stdio_async() instead of deprecated stdio_server import - Aligns with FastMCP 2.11.3 API - Server now starts correctly with uv run mcp-office-tools - Maintains all MCPMixin functionality and tool registration
381 lines
18 KiB
Python
381 lines
18 KiB
Python
"""Focused tests for WordMixin functionality.
|
|
|
|
This module tests the WordMixin in isolation, focusing on:
|
|
- Word-specific tool functionality
|
|
- Markdown conversion capabilities
|
|
- Chapter and bookmark extraction
|
|
- Parameter validation for Word-specific features
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
from pathlib import Path
|
|
|
|
from fastmcp import FastMCP
|
|
# FastMCP testing - using direct tool access
|
|
|
|
from mcp_office_tools.mixins.word import WordMixin
|
|
from mcp_office_tools.utils import OfficeFileError
|
|
|
|
|
|
class TestWordMixinRegistration:
|
|
"""Test WordMixin tool registration and setup."""
|
|
|
|
def test_mixin_initialization(self):
|
|
"""Test WordMixin initializes correctly."""
|
|
app = FastMCP("Test Word")
|
|
mixin = WordMixin(app)
|
|
|
|
assert mixin.app == app
|
|
assert len(app._tools) == 1 # 1 word tool
|
|
|
|
def test_tool_names_registered(self):
|
|
"""Test that Word-specific tools are registered."""
|
|
app = FastMCP("Test Word")
|
|
WordMixin(app)
|
|
|
|
expected_tools = {"convert_to_markdown"}
|
|
registered_tools = set(app._tools.keys())
|
|
assert expected_tools.issubset(registered_tools)
|
|
|
|
|
|
class TestConvertToMarkdown:
|
|
"""Test convert_to_markdown tool functionality."""
|
|
|
|
@pytest.fixture
|
|
def mixin(self):
|
|
"""Create WordMixin for testing."""
|
|
app = FastMCP("Test")
|
|
return WordMixin(app)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_convert_to_markdown_nonexistent_file(self, mixin):
|
|
"""Test convert_to_markdown with nonexistent file."""
|
|
with pytest.raises(OfficeFileError):
|
|
await mixin.convert_to_markdown("/nonexistent/file.docx")
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('mcp_office_tools.utils.validation.resolve_office_file_path')
|
|
@patch('mcp_office_tools.utils.validation.validate_office_file')
|
|
@patch('mcp_office_tools.utils.file_detection.detect_format')
|
|
async def test_convert_to_markdown_validation_failure(self, mock_detect, mock_validate, mock_resolve, mixin):
|
|
"""Test convert_to_markdown with validation failure."""
|
|
mock_resolve.return_value = "/test.docx"
|
|
mock_validate.return_value = {
|
|
"is_valid": False,
|
|
"errors": ["File is password protected"]
|
|
}
|
|
|
|
with pytest.raises(OfficeFileError, match="Invalid file: File is password protected"):
|
|
await mixin.convert_to_markdown("/test.docx")
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('mcp_office_tools.utils.validation.resolve_office_file_path')
|
|
@patch('mcp_office_tools.utils.validation.validate_office_file')
|
|
@patch('mcp_office_tools.utils.file_detection.detect_format')
|
|
async def test_convert_to_markdown_non_word_document(self, mock_detect, mock_validate, mock_resolve, mixin):
|
|
"""Test that non-Word documents are rejected."""
|
|
mock_resolve.return_value = "/test.xlsx"
|
|
mock_validate.return_value = {"is_valid": True, "errors": []}
|
|
mock_detect.return_value = {
|
|
"category": "excel",
|
|
"extension": ".xlsx",
|
|
"format_name": "Excel"
|
|
}
|
|
|
|
with pytest.raises(OfficeFileError, match="Markdown conversion currently only supports Word documents"):
|
|
await mixin.convert_to_markdown("/test.xlsx")
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('mcp_office_tools.utils.validation.resolve_office_file_path')
|
|
@patch('mcp_office_tools.utils.validation.validate_office_file')
|
|
@patch('mcp_office_tools.utils.file_detection.detect_format')
|
|
async def test_convert_to_markdown_docx_success(self, mock_detect, mock_validate, mock_resolve, mixin):
|
|
"""Test successful DOCX to markdown conversion."""
|
|
# Setup mocks
|
|
mock_resolve.return_value = "/test.docx"
|
|
mock_validate.return_value = {"is_valid": True, "errors": []}
|
|
mock_detect.return_value = {
|
|
"category": "word",
|
|
"extension": ".docx",
|
|
"format_name": "Word Document"
|
|
}
|
|
|
|
# Mock internal methods
|
|
with patch.object(mixin, '_analyze_document_size') as mock_analyze:
|
|
with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation:
|
|
with patch.object(mixin, '_convert_docx_to_markdown') as mock_convert:
|
|
mock_analyze.return_value = {
|
|
"estimated_pages": 5,
|
|
"estimated_size": "medium",
|
|
"has_images": True,
|
|
"has_complex_formatting": False
|
|
}
|
|
mock_recommendation.return_value = {
|
|
"recommendation": "proceed",
|
|
"message": "Document size is manageable for full conversion"
|
|
}
|
|
mock_convert.return_value = {
|
|
"markdown": "# Test Document\n\nThis is test content.",
|
|
"images": [],
|
|
"metadata": {"conversion_method": "python-docx"},
|
|
"processing_notes": []
|
|
}
|
|
|
|
result = await mixin.convert_to_markdown("/test.docx")
|
|
|
|
# Verify structure
|
|
assert "markdown" in result
|
|
assert "metadata" in result
|
|
assert "processing_info" in result
|
|
|
|
# Verify content
|
|
assert "# Test Document" in result["markdown"]
|
|
assert result["metadata"]["format"] == "Word Document"
|
|
assert "conversion_time" in result["metadata"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_convert_to_markdown_parameter_handling(self, mixin):
|
|
"""Test convert_to_markdown parameter validation and handling."""
|
|
# Mock all dependencies for parameter testing
|
|
with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve:
|
|
with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate:
|
|
with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect:
|
|
mock_resolve.return_value = "/test.docx"
|
|
mock_validate.return_value = {"is_valid": True, "errors": []}
|
|
mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}
|
|
|
|
with patch.object(mixin, '_analyze_document_size') as mock_analyze:
|
|
with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation:
|
|
with patch.object(mixin, '_parse_page_range') as mock_parse_range:
|
|
with patch.object(mixin, '_convert_docx_to_markdown') as mock_convert:
|
|
mock_analyze.return_value = {"estimated_pages": 10}
|
|
mock_recommendation.return_value = {"recommendation": "proceed"}
|
|
mock_parse_range.return_value = [1, 2, 3, 4, 5]
|
|
mock_convert.return_value = {
|
|
"markdown": "# Test",
|
|
"images": [],
|
|
"metadata": {},
|
|
"processing_notes": []
|
|
}
|
|
|
|
# Test with specific parameters
|
|
result = await mixin.convert_to_markdown(
|
|
file_path="/test.docx",
|
|
include_images=False,
|
|
image_mode="files",
|
|
max_image_size=512000,
|
|
preserve_structure=False,
|
|
page_range="1-5",
|
|
bookmark_name="Chapter1",
|
|
chapter_name="Introduction",
|
|
summary_only=False,
|
|
output_dir="/output"
|
|
)
|
|
|
|
# Verify conversion was called with correct parameters
|
|
mock_convert.assert_called_once()
|
|
args, kwargs = mock_convert.call_args
|
|
# Note: Since bookmark_name is provided, page_numbers should be None
|
|
# (bookmark takes precedence over page_range)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_convert_to_markdown_bookmark_priority(self, mixin):
|
|
"""Test that bookmark extraction takes priority over page ranges."""
|
|
with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve:
|
|
with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate:
|
|
with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect:
|
|
mock_resolve.return_value = "/test.docx"
|
|
mock_validate.return_value = {"is_valid": True, "errors": []}
|
|
mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}
|
|
|
|
with patch.object(mixin, '_analyze_document_size'):
|
|
with patch.object(mixin, '_get_processing_recommendation'):
|
|
with patch.object(mixin, '_parse_page_range') as mock_parse_range:
|
|
with patch.object(mixin, '_convert_docx_to_markdown') as mock_convert:
|
|
mock_convert.return_value = {
|
|
"markdown": "# Chapter Content",
|
|
"images": [],
|
|
"metadata": {},
|
|
"processing_notes": []
|
|
}
|
|
|
|
# Call with both page_range and bookmark_name
|
|
await mixin.convert_to_markdown(
|
|
"/test.docx",
|
|
page_range="1-10",
|
|
bookmark_name="Chapter1"
|
|
)
|
|
|
|
# Verify that page range parsing was NOT called
|
|
# (because bookmark takes priority)
|
|
mock_parse_range.assert_not_called()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_convert_to_markdown_summary_mode(self, mixin):
|
|
"""Test summary_only mode functionality."""
|
|
with patch('mcp_office_tools.utils.validation.resolve_office_file_path') as mock_resolve:
|
|
with patch('mcp_office_tools.utils.validation.validate_office_file') as mock_validate:
|
|
with patch('mcp_office_tools.utils.file_detection.detect_format') as mock_detect:
|
|
mock_resolve.return_value = "/test.docx"
|
|
mock_validate.return_value = {"is_valid": True, "errors": []}
|
|
mock_detect.return_value = {"category": "word", "extension": ".docx", "format_name": "Word"}
|
|
|
|
with patch.object(mixin, '_analyze_document_size') as mock_analyze:
|
|
with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation:
|
|
mock_analyze.return_value = {
|
|
"estimated_pages": 25,
|
|
"estimated_size": "large",
|
|
"has_images": True
|
|
}
|
|
mock_recommendation.return_value = {
|
|
"recommendation": "summary_recommended",
|
|
"message": "Large document - summary mode recommended"
|
|
}
|
|
|
|
result = await mixin.convert_to_markdown(
|
|
"/test.docx",
|
|
summary_only=True
|
|
)
|
|
|
|
# Verify that summary information is returned
|
|
assert "metadata" in result
|
|
assert "processing_info" in result
|
|
# In summary mode, conversion should not happen
|
|
|
|
|
|
class TestWordSpecificHelpers:
|
|
"""Test Word-specific helper methods."""
|
|
|
|
@pytest.fixture
|
|
def mixin(self):
|
|
"""Create WordMixin for testing."""
|
|
app = FastMCP("Test")
|
|
return WordMixin(app)
|
|
|
|
def test_parse_page_range_single_page(self, mixin):
|
|
"""Test parsing single page range."""
|
|
result = mixin._parse_page_range("5")
|
|
assert result == [5]
|
|
|
|
def test_parse_page_range_range(self, mixin):
|
|
"""Test parsing page ranges."""
|
|
result = mixin._parse_page_range("1-5")
|
|
assert result == [1, 2, 3, 4, 5]
|
|
|
|
def test_parse_page_range_complex(self, mixin):
|
|
"""Test parsing complex page ranges."""
|
|
result = mixin._parse_page_range("1,3,5-7,10")
|
|
expected = [1, 3, 5, 6, 7, 10]
|
|
assert result == expected
|
|
|
|
def test_parse_page_range_invalid(self, mixin):
|
|
"""Test parsing invalid page ranges."""
|
|
with pytest.raises(OfficeFileError):
|
|
mixin._parse_page_range("invalid")
|
|
|
|
with pytest.raises(OfficeFileError):
|
|
mixin._parse_page_range("10-5") # End before start
|
|
|
|
def test_get_processing_recommendation(self, mixin):
|
|
"""Test processing recommendation logic."""
|
|
# Small document - proceed normally
|
|
doc_analysis = {"estimated_pages": 3, "estimated_size": "small"}
|
|
result = mixin._get_processing_recommendation(doc_analysis, "", False)
|
|
assert result["recommendation"] == "proceed"
|
|
|
|
# Large document without page range - suggest summary
|
|
doc_analysis = {"estimated_pages": 25, "estimated_size": "large"}
|
|
result = mixin._get_processing_recommendation(doc_analysis, "", False)
|
|
assert result["recommendation"] == "summary_recommended"
|
|
|
|
# Large document with page range - proceed
|
|
doc_analysis = {"estimated_pages": 25, "estimated_size": "large"}
|
|
result = mixin._get_processing_recommendation(doc_analysis, "1-5", False)
|
|
assert result["recommendation"] == "proceed"
|
|
|
|
# Summary mode requested - proceed with summary
|
|
doc_analysis = {"estimated_pages": 25, "estimated_size": "large"}
|
|
result = mixin._get_processing_recommendation(doc_analysis, "", True)
|
|
assert result["recommendation"] == "proceed"
|
|
|
|
|
|
class TestDirectToolAccess:
|
|
"""Test WordMixin integration with direct tool access."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_tool_execution_direct(self):
|
|
"""Test Word tool execution through direct tool access."""
|
|
app = FastMCP("Test App")
|
|
WordMixin(app)
|
|
|
|
# Test error handling via direct access (nonexistent file)
|
|
convert_to_markdown_tool = app._tools["convert_to_markdown"]
|
|
with pytest.raises(OfficeFileError):
|
|
await convert_to_markdown_tool(file_path="/nonexistent/file.docx")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_tool_parameter_validation_direct(self):
|
|
"""Test parameter validation through direct access."""
|
|
app = FastMCP("Test App")
|
|
WordMixin(app)
|
|
|
|
# Test with various parameter combinations - wrong file type should be caught
|
|
convert_to_markdown_tool = app._tools["convert_to_markdown"]
|
|
|
|
# This should trigger the format validation and raise OfficeFileError
|
|
with pytest.raises(OfficeFileError):
|
|
await convert_to_markdown_tool(
|
|
file_path="/test.xlsx", # Wrong file type
|
|
include_images=True,
|
|
image_mode="base64",
|
|
preserve_structure=True
|
|
)
|
|
|
|
|
|
class TestLegacyWordSupport:
|
|
"""Test support for legacy Word documents (.doc)."""
|
|
|
|
@pytest.fixture
|
|
def mixin(self):
|
|
"""Create WordMixin for testing."""
|
|
app = FastMCP("Test")
|
|
return WordMixin(app)
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('mcp_office_tools.utils.validation.resolve_office_file_path')
|
|
@patch('mcp_office_tools.utils.validation.validate_office_file')
|
|
@patch('mcp_office_tools.utils.file_detection.detect_format')
|
|
async def test_convert_legacy_doc_to_markdown(self, mock_detect, mock_validate, mock_resolve, mixin):
|
|
"""Test conversion of legacy .doc files."""
|
|
mock_resolve.return_value = "/test.doc"
|
|
mock_validate.return_value = {"is_valid": True, "errors": []}
|
|
mock_detect.return_value = {
|
|
"category": "word",
|
|
"extension": ".doc",
|
|
"format_name": "Word Document (Legacy)"
|
|
}
|
|
|
|
# Mock internal methods for legacy support
|
|
with patch.object(mixin, '_analyze_document_size') as mock_analyze:
|
|
with patch.object(mixin, '_get_processing_recommendation') as mock_recommendation:
|
|
with patch.object(mixin, '_convert_doc_to_markdown') as mock_convert:
|
|
mock_analyze.return_value = {"estimated_pages": 3}
|
|
mock_recommendation.return_value = {"recommendation": "proceed"}
|
|
mock_convert.return_value = {
|
|
"markdown": "# Legacy Document\n\nContent from .doc file",
|
|
"images": [],
|
|
"metadata": {"conversion_method": "legacy-parser"},
|
|
"processing_notes": ["Converted from legacy format"]
|
|
}
|
|
|
|
result = await mixin.convert_to_markdown("/test.doc")
|
|
|
|
# Verify legacy conversion worked
|
|
assert "# Legacy Document" in result["markdown"]
|
|
assert "legacy-parser" in str(result["metadata"])
|
|
assert len(result["processing_info"]["processing_notes"]) > 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"]) |