mcp-legacy-files/tests/test_detection.py
Ryan Malloy 572379d9aa 🎉 Complete Phase 2: WordPerfect processor implementation
 WordPerfect Production Support:
- Comprehensive WordPerfect processor with 5-layer fallback chain
- Support for WP 4.2, 5.0-5.1, 6.0+ (.wpd, .wp, .wp5, .wp6)
- libwpd integration (wpd2text, wpd2html, wpd2raw)
- Binary strings extraction and emergency parsing
- Password detection and encoding intelligence
- Document structure analysis and integrity checking

🏗️ Infrastructure Enhancements:
- Created comprehensive CLAUDE.md development guide
- Updated implementation status documentation
- Added WordPerfect processor test suite
- Enhanced format detection with WP magic signatures
- Production-ready with graceful dependency handling

📊 Project Status:
- 2/4 core processors complete (dBASE + WordPerfect)
- 25+ legacy format detection engine operational
- Phase 2 complete: Ready for Lotus 1-2-3 implementation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 02:03:44 -06:00

133 lines
4.8 KiB
Python

"""
Tests for legacy format detection.
"""
import pytest
import tempfile
import os
from pathlib import Path
from mcp_legacy_files.core.detection import LegacyFormatDetector, FormatInfo
class TestLegacyFormatDetector:
"""Test legacy format detection capabilities."""
@pytest.fixture
def detector(self):
return LegacyFormatDetector()
@pytest.fixture
def mock_dbase_file(self):
"""Create mock dBASE file with proper header."""
with tempfile.NamedTemporaryFile(suffix='.dbf', delete=False) as f:
# dBASE III header
header = bytearray(32)
header[0] = 0x03 # dBASE III version
header[1:4] = [24, 1, 1] # Date: 2024-01-01
header[4:8] = (10).to_bytes(4, 'little') # 10 records
header[8:10] = (65).to_bytes(2, 'little') # Header length
header[10:12] = (50).to_bytes(2, 'little') # Record length
f.write(header)
f.flush()
yield f.name
# Cleanup
try:
os.unlink(f.name)
except FileNotFoundError:
pass
@pytest.fixture
def mock_wordperfect_file(self):
"""Create mock WordPerfect file with magic signature."""
with tempfile.NamedTemporaryFile(suffix='.wpd', delete=False) as f:
# WordPerfect 6.0 signature
header = b'\xFF\x57\x50\x43' + b'\x00' * 100
f.write(header)
f.flush()
yield f.name
# Cleanup
try:
os.unlink(f.name)
except FileNotFoundError:
pass
@pytest.mark.asyncio
async def test_detect_dbase_format(self, detector, mock_dbase_file):
"""Test dBASE format detection."""
format_info = await detector.detect_format(mock_dbase_file)
assert format_info.format_family == "dbase"
assert format_info.is_legacy_format == True
assert format_info.confidence > 0.9 # Should have high confidence
assert "dBASE" in format_info.format_name
assert format_info.category == "database"
@pytest.mark.asyncio
async def test_detect_wordperfect_format(self, detector, mock_wordperfect_file):
"""Test WordPerfect format detection."""
format_info = await detector.detect_format(mock_wordperfect_file)
assert format_info.format_family == "wordperfect"
assert format_info.is_legacy_format == True
assert format_info.confidence > 0.9
assert "WordPerfect" in format_info.format_name
assert format_info.category == "word_processing"
@pytest.mark.asyncio
async def test_detect_nonexistent_file(self, detector):
"""Test detection of non-existent file."""
format_info = await detector.detect_format("/nonexistent/file.dbf")
assert format_info.format_name == "File Not Found"
assert format_info.confidence == 0.0
@pytest.mark.asyncio
async def test_detect_unknown_format(self, detector):
"""Test detection of unknown format."""
with tempfile.NamedTemporaryFile(suffix='.unknown') as f:
f.write(b"This is not a legacy format")
f.flush()
format_info = await detector.detect_format(f.name)
assert format_info.is_legacy_format == False
assert format_info.format_name == "Unknown Format"
@pytest.mark.asyncio
async def test_get_supported_formats(self, detector):
"""Test getting list of supported formats."""
formats = await detector.get_supported_formats()
assert len(formats) > 0
assert any(fmt['format_family'] == 'dbase' for fmt in formats)
assert any(fmt['format_family'] == 'wordperfect' for fmt in formats)
# Check format structure
for fmt in formats[:3]: # Check first few
assert 'extension' in fmt
assert 'format_name' in fmt
assert 'format_family' in fmt
assert 'category' in fmt
assert 'era' in fmt
def test_magic_signatures_loaded(self, detector):
"""Test that magic signatures are properly loaded."""
assert len(detector.magic_signatures) > 0
assert 'dbase' in detector.magic_signatures
assert 'wordperfect' in detector.magic_signatures
def test_extension_mappings_loaded(self, detector):
"""Test that extension mappings are properly loaded."""
assert len(detector.extension_mappings) > 0
assert '.dbf' in detector.extension_mappings
assert '.wpd' in detector.extension_mappings
# Check mapping structure
dbf_mapping = detector.extension_mappings['.dbf']
assert dbf_mapping['format_family'] == 'dbase'
assert dbf_mapping['legacy'] == True