✅ WordPerfect Production Support: - Comprehensive WordPerfect processor with 5-layer fallback chain - Support for WP 4.2, 5.0-5.1, 6.0+ (.wpd, .wp, .wp5, .wp6) - libwpd integration (wpd2text, wpd2html, wpd2raw) - Binary strings extraction and emergency parsing - Password detection and encoding intelligence - Document structure analysis and integrity checking 🏗️ Infrastructure Enhancements: - Created comprehensive CLAUDE.md development guide - Updated implementation status documentation - Added WordPerfect processor test suite - Enhanced format detection with WP magic signatures - Production-ready with graceful dependency handling 📊 Project Status: - 2/4 core processors complete (dBASE + WordPerfect) - 25+ legacy format detection engine operational - Phase 2 complete: Ready for Lotus 1-2-3 implementation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
133 lines
4.8 KiB
Python
133 lines
4.8 KiB
Python
"""
|
|
Tests for legacy format detection.
|
|
"""
|
|
|
|
import pytest
|
|
import tempfile
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from mcp_legacy_files.core.detection import LegacyFormatDetector, FormatInfo
|
|
|
|
class TestLegacyFormatDetector:
|
|
"""Test legacy format detection capabilities."""
|
|
|
|
@pytest.fixture
|
|
def detector(self):
|
|
return LegacyFormatDetector()
|
|
|
|
@pytest.fixture
|
|
def mock_dbase_file(self):
|
|
"""Create mock dBASE file with proper header."""
|
|
with tempfile.NamedTemporaryFile(suffix='.dbf', delete=False) as f:
|
|
# dBASE III header
|
|
header = bytearray(32)
|
|
header[0] = 0x03 # dBASE III version
|
|
header[1:4] = [24, 1, 1] # Date: 2024-01-01
|
|
header[4:8] = (10).to_bytes(4, 'little') # 10 records
|
|
header[8:10] = (65).to_bytes(2, 'little') # Header length
|
|
header[10:12] = (50).to_bytes(2, 'little') # Record length
|
|
|
|
f.write(header)
|
|
f.flush()
|
|
|
|
yield f.name
|
|
|
|
# Cleanup
|
|
try:
|
|
os.unlink(f.name)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
@pytest.fixture
|
|
def mock_wordperfect_file(self):
|
|
"""Create mock WordPerfect file with magic signature."""
|
|
with tempfile.NamedTemporaryFile(suffix='.wpd', delete=False) as f:
|
|
# WordPerfect 6.0 signature
|
|
header = b'\xFF\x57\x50\x43' + b'\x00' * 100
|
|
f.write(header)
|
|
f.flush()
|
|
|
|
yield f.name
|
|
|
|
# Cleanup
|
|
try:
|
|
os.unlink(f.name)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_detect_dbase_format(self, detector, mock_dbase_file):
|
|
"""Test dBASE format detection."""
|
|
format_info = await detector.detect_format(mock_dbase_file)
|
|
|
|
assert format_info.format_family == "dbase"
|
|
assert format_info.is_legacy_format == True
|
|
assert format_info.confidence > 0.9 # Should have high confidence
|
|
assert "dBASE" in format_info.format_name
|
|
assert format_info.category == "database"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_detect_wordperfect_format(self, detector, mock_wordperfect_file):
|
|
"""Test WordPerfect format detection."""
|
|
format_info = await detector.detect_format(mock_wordperfect_file)
|
|
|
|
assert format_info.format_family == "wordperfect"
|
|
assert format_info.is_legacy_format == True
|
|
assert format_info.confidence > 0.9
|
|
assert "WordPerfect" in format_info.format_name
|
|
assert format_info.category == "word_processing"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_detect_nonexistent_file(self, detector):
|
|
"""Test detection of non-existent file."""
|
|
format_info = await detector.detect_format("/nonexistent/file.dbf")
|
|
|
|
assert format_info.format_name == "File Not Found"
|
|
assert format_info.confidence == 0.0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_detect_unknown_format(self, detector):
|
|
"""Test detection of unknown format."""
|
|
with tempfile.NamedTemporaryFile(suffix='.unknown') as f:
|
|
f.write(b"This is not a legacy format")
|
|
f.flush()
|
|
|
|
format_info = await detector.detect_format(f.name)
|
|
|
|
assert format_info.is_legacy_format == False
|
|
assert format_info.format_name == "Unknown Format"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_supported_formats(self, detector):
|
|
"""Test getting list of supported formats."""
|
|
formats = await detector.get_supported_formats()
|
|
|
|
assert len(formats) > 0
|
|
assert any(fmt['format_family'] == 'dbase' for fmt in formats)
|
|
assert any(fmt['format_family'] == 'wordperfect' for fmt in formats)
|
|
|
|
# Check format structure
|
|
for fmt in formats[:3]: # Check first few
|
|
assert 'extension' in fmt
|
|
assert 'format_name' in fmt
|
|
assert 'format_family' in fmt
|
|
assert 'category' in fmt
|
|
assert 'era' in fmt
|
|
|
|
def test_magic_signatures_loaded(self, detector):
|
|
"""Test that magic signatures are properly loaded."""
|
|
assert len(detector.magic_signatures) > 0
|
|
assert 'dbase' in detector.magic_signatures
|
|
assert 'wordperfect' in detector.magic_signatures
|
|
|
|
def test_extension_mappings_loaded(self, detector):
|
|
"""Test that extension mappings are properly loaded."""
|
|
assert len(detector.extension_mappings) > 0
|
|
assert '.dbf' in detector.extension_mappings
|
|
assert '.wpd' in detector.extension_mappings
|
|
|
|
# Check mapping structure
|
|
dbf_mapping = detector.extension_mappings['.dbf']
|
|
assert dbf_mapping['format_family'] == 'dbase'
|
|
assert dbf_mapping['legacy'] == True |