mcp-legacy-files/examples/test_lotus123_processor.py

#!/usr/bin/env python3
"""
Test Lotus 1-2-3 processor implementation without requiring actual WK1/WK3/WK4 files.

This test verifies:
1. Lotus 1-2-3 processor initialization
2. Processing chain detection
3. File structure analysis capabilities
4. Binary parsing functionality
5. Error handling and fallback systems
"""

import sys
import os
import tempfile
import struct
from pathlib import Path

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), 'src'))

def create_mock_lotus_file(format_type: str = "wk1") -> str:
    """Create a mock Lotus 1-2-3 file for testing."""
    # Lotus 1-2-3 magic signatures
    signatures = {
        "wks": b"\x0E\x00\x1A\x00",  # Lotus 1-2-3 Release 1A
        "wk1": b"\x00\x00\x02\x00\x06\x04\x06\x00",  # Release 2.x
        "wk3": b"\x00\x00\x1A\x00\x02\x04\x04\x00",  # Release 3.x
        "wk4": b"\x00\x00\x1A\x00\x05\x05\x04\x00",  # Release 4.x
        "symphony": b"\xFF\x00\x02\x00\x04\x04\x05\x00"  # Symphony
    }

    # Create temporary file with Lotus signature
    temp_file = tempfile.NamedTemporaryFile(mode='wb', suffix=f'.{format_type}', delete=False)

    # Write Lotus header
    signature = signatures.get(format_type, signatures["wk1"])
    temp_file.write(signature)

    # Add BOF (Beginning of File) record for WK1/WK3/WK4 formats
    if format_type in ["wk1", "wk3", "wk4"]:
        # BOF record: type=0x00, length=0x02, version bytes
        temp_file.write(struct.pack('<HH', 0x00, 0x02))  # BOF record
        temp_file.write(b'\x04\x04')  # Version info

        # Add some mock cell records
        mock_cells = [
            # INTEGER cell at A1 (col=0, row=0): value=42
            (0x0F, struct.pack('<BBHB', 0, 0, 0, 0xFF) + struct.pack('<h', 42)),

            # NUMBER cell at B1 (col=1, row=0): value=3.14159
            (0x10, struct.pack('<BBHB', 1, 0, 0, 0xFF) + struct.pack('<d', 3.14159)),

            # LABEL cell at C1 (col=2, row=0): "Hello Lotus"
            (0x11, struct.pack('<BBHB', 2, 0, 0, 0x27) + b'Hello Lotus\x00'),

            # FORMULA cell at A2 (col=0, row=1): value=85 (42+43)
            (0x12, struct.pack('<BBHB', 0, 1, 0, 0xFF) + struct.pack('<d', 85.0) + b'\x05\x00\x00\x00\x00'),
        ]

        for record_type, record_data in mock_cells:
            temp_file.write(struct.pack('<HH', record_type, len(record_data)))
            temp_file.write(record_data)

        # EOF record
        temp_file.write(struct.pack('<HH', 0x01, 0x00))

    else:  # WKS format - simpler structure
        # Add some basic data
        temp_file.write(b'\x00' * 50)  # Padding
        temp_file.write(b'Sample WKS Data\x00')
        temp_file.write(b'Row 1, Col 1\x00')
        temp_file.write(b'123.45\x00')

    temp_file.close()
    return temp_file.name

async def test_lotus123_processor():
    """Test Lotus 1-2-3 processor functionality."""
    print("🏛️  Lotus 1-2-3 Processor Test")
    print("=" * 60)

    success_count = 0
    total_tests = 0

    try:
        from mcp_legacy_files.processors.lotus123 import Lotus123Processor, Lotus123FileInfo

        # Test 1: Processor initialization
        total_tests += 1
        print(f"\n📋 Test 1: Processor Initialization")
        try:
            processor = Lotus123Processor()
            processing_chain = processor.get_processing_chain()

            print(f"✅ Lotus 1-2-3 processor initialized")
            print(f"   Processing chain: {processing_chain}")
            print(f"   Available methods: {len(processing_chain)}")

            # Check supported versions
            print(f"   Supported versions: {len(processor.supported_versions)}")
            for signature, version in list(processor.supported_versions.items())[:3]:
                print(f"     {version}: {signature.hex()}")

            # Verify fallback chain includes binary parser
            if "binary_parser" in processing_chain:
                print(f"   ✅ Emergency binary parser available")
                success_count += 1
            else:
                print(f"   ❌ Missing emergency fallback")

        except Exception as e:
            print(f"❌ Processor initialization failed: {e}")

        # Test 2: File structure analysis
        total_tests += 1
        print(f"\n📋 Test 2: File Structure Analysis")

        # Test with different Lotus formats
        test_formats = ["wks", "wk1", "wk3", "wk4", "symphony"]
        format_results = {}

        for format_type in test_formats:
            try:
                mock_file = create_mock_lotus_file(format_type)

                # Test structure analysis
                file_info = await processor._analyze_lotus_structure(mock_file)

                if file_info:
                    format_results[format_type] = "✅"
                    print(f"   ✅ {format_type.upper()}: {file_info.version}")
                    print(f"      Variant: {file_info.format_variant}")
                    print(f"      Size: {file_info.file_size} bytes")
                    print(f"      Encoding: {file_info.encoding}")
                    print(f"      Worksheets: {file_info.worksheet_count}")
                else:
                    format_results[format_type] = "❌"
                    print(f"   ❌ {format_type.upper()}: Structure analysis failed")

                # Clean up
                os.unlink(mock_file)

            except Exception as e:
                format_results[format_type] = "❌"
                print(f"   ❌ {format_type.upper()}: Error - {e}")
                if 'mock_file' in locals():
                    try:
                        os.unlink(mock_file)
                    except:
                        pass

        # Count successful format analyses
        successful_formats = sum(1 for result in format_results.values() if result == "✅")
        if successful_formats >= 3:  # At least 3 out of 5 formats working
            success_count += 1

        # Test 3: Binary parser functionality
        total_tests += 1
        print(f"\n📋 Test 3: Binary Parser Functionality")

        try:
            # Create a WK1 file with structured data for binary parsing
            mock_file = create_mock_lotus_file("wk1")
            file_info = await processor._analyze_lotus_structure(mock_file)

            if file_info:
                # Test binary parsing method directly
                result = await processor._process_with_binary_parser(
                    mock_file, file_info, preserve_formatting=True
                )

                if result and result.success:
                    print(f"   ✅ Binary parser: Success")
                    print(f"      Method used: {result.method_used}")
                    print(f"      Text length: {len(result.text_content or '')}")

                    if result.structured_content:
                        data = result.structured_content.get("data", [])
                        print(f"      Cells extracted: {len(data)}")

                        # Check if we got expected cell types
                        if data:
                            cell_types = [cell.get("type") for cell in data if isinstance(cell, dict)]
                            unique_types = set(cell_types)
                            print(f"      Cell types found: {list(unique_types)}")

                    success_count += 1
                else:
                    print(f"   ❌ Binary parser failed: {result.error_message if result else 'No result'}")
            else:
                print(f"   ❌ Could not analyze file for binary parsing")

            os.unlink(mock_file)

        except Exception as e:
            print(f"❌ Binary parser test failed: {e}")

        # Test 4: Cell parsing functions
        total_tests += 1
        print(f"\n📋 Test 4: Cell Parsing Functions")

        try:
            # Test integer cell parsing
            int_record = struct.pack('<BBHB', 0, 0, 0, 0xFF) + struct.pack('<h', 123)
            int_cell = processor._parse_integer_cell(int_record)

            # Test number cell parsing
            num_record = struct.pack('<BBHB', 1, 0, 0, 0xFF) + struct.pack('<d', 456.789)
            num_cell = processor._parse_number_cell(num_record)

            # Test label cell parsing
            label_record = struct.pack('<BBHB', 2, 0, 0, 0x27) + b'Test Label\x00'
            label_cell = processor._parse_label_cell(label_record, "cp437")

            # Test formula cell parsing
            formula_record = struct.pack('<BBHB', 0, 1, 0, 0xFF) + struct.pack('<d', 579.0) + b'\x05\x00\x00\x00\x00'
            formula_cell = processor._parse_formula_cell(formula_record)

            parsing_results = []
            if int_cell and int_cell.get("type") == "integer" and int_cell.get("value") == 123:
                parsing_results.append("✅ Integer")
            else:
                parsing_results.append("❌ Integer")

            if num_cell and num_cell.get("type") == "number" and abs(num_cell.get("value", 0) - 456.789) < 0.001:
                parsing_results.append("✅ Number")
            else:
                parsing_results.append("❌ Number")

            if label_cell and label_cell.get("type") == "label" and "Test Label" in str(label_cell.get("value", "")):
                parsing_results.append("✅ Label")
            else:
                parsing_results.append("❌ Label")

            if formula_cell and formula_cell.get("type") == "formula":
                parsing_results.append("✅ Formula")
            else:
                parsing_results.append("❌ Formula")

            print(f"   Cell parsing results: {' | '.join(parsing_results)}")

            # Success if at least 3 out of 4 cell types work
            successful_parsing = sum(1 for result in parsing_results if result.startswith("✅"))
            if successful_parsing >= 3:
                success_count += 1

        except Exception as e:
            print(f"❌ Cell parsing test failed: {e}")

        # Test 5: Encoding detection
        total_tests += 1
        print(f"\n📋 Test 5: Encoding Detection")

        try:
            # Test encoding detection for different formats
            format_encodings = {
                "wks": "cp437",
                "wk1": "cp437",
                "wk3": "cp850",
                "wk4": "cp1252",
                "symphony": "cp437"
            }

            encoding_tests_passed = 0
            for format_variant, expected_encoding in format_encodings.items():
                detected_encoding = processor._detect_lotus_encoding(format_variant)
                if detected_encoding == expected_encoding:
                    print(f"   ✅ {format_variant.upper()}: {detected_encoding}")
                    encoding_tests_passed += 1
                else:
                    print(f"   ❌ {format_variant.upper()}: Expected {expected_encoding}, got {detected_encoding}")

            if encoding_tests_passed >= 4:  # At least 4 out of 5 encodings correct
                success_count += 1

        except Exception as e:
            print(f"❌ Encoding detection test failed: {e}")

    except ImportError as e:
        print(f"❌ Could not import Lotus 1-2-3 processor: {e}")
        return False

    # Summary
    print("\n" + "=" * 60)
    print("🏆 Lotus 1-2-3 Processor Test Results:")
    print(f"   Tests passed: {success_count}/{total_tests}")
    print(f"   Success rate: {(success_count/total_tests)*100:.1f}%")

    if success_count == total_tests:
        print("   🎉 All tests passed! Lotus 1-2-3 processor ready for use.")
    elif success_count >= total_tests * 0.8:
        print("   ✅ Most tests passed. Lotus 1-2-3 processor functional with some limitations.")
    else:
        print("   ⚠️  Several tests failed. Lotus 1-2-3 processor needs attention.")

    print("\n💡 Next Steps:")
    print("   • Install Gnumeric for best Lotus 1-2-3 support:")
    print("     sudo apt-get install gnumeric")
    print("   • Or install LibreOffice for alternative processing:")
    print("     sudo apt-get install libreoffice-calc")
    print("   • Test with real Lotus 1-2-3 files from your archives")
    print("   • Verify spreadsheet formulas and formatting preservation")

    return success_count >= total_tests * 0.8

if __name__ == "__main__":
    import asyncio

    success = asyncio.run(test_lotus123_processor())
    sys.exit(0 if success else 1)