✅ WordPerfect Production Support: - Comprehensive WordPerfect processor with 5-layer fallback chain - Support for WP 4.2, 5.0-5.1, 6.0+ (.wpd, .wp, .wp5, .wp6) - libwpd integration (wpd2text, wpd2html, wpd2raw) - Binary strings extraction and emergency parsing - Password detection and encoding intelligence - Document structure analysis and integrity checking 🏗️ Infrastructure Enhancements: - Created comprehensive CLAUDE.md development guide - Updated implementation status documentation - Added WordPerfect processor test suite - Enhanced format detection with WP magic signatures - Production-ready with graceful dependency handling 📊 Project Status: - 2/4 core processors complete (dBASE + WordPerfect) - 25+ legacy format detection engine operational - Phase 2 complete: Ready for Lotus 1-2-3 implementation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
245 lines
6.2 KiB
TOML
245 lines
6.2 KiB
TOML
[build-system]
|
|
requires = ["setuptools>=61.0", "wheel"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "mcp-legacy-files"
|
|
version = "0.1.0"
|
|
description = "The Ultimate Vintage Document Processing Powerhouse for AI - Transform 25+ legacy formats into modern intelligence"
|
|
authors = [
|
|
{name = "MCP Legacy Files Team", email = "legacy@mcp.dev"}
|
|
]
|
|
readme = "README.md"
|
|
license = {text = "MIT"}
|
|
keywords = [
|
|
"mcp", "legacy", "vintage", "documents", "dbase", "wordperfect",
|
|
"lotus123", "appleworks", "hypercard", "ai", "processing"
|
|
]
|
|
classifiers = [
|
|
"Development Status :: 4 - Beta",
|
|
"Intended Audience :: Developers",
|
|
"Intended Audience :: End Users/Desktop",
|
|
"License :: OSI Approved :: MIT License",
|
|
"Operating System :: OS Independent",
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
"Topic :: Office/Business",
|
|
"Topic :: Text Processing",
|
|
"Topic :: Database",
|
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
]
|
|
requires-python = ">=3.11"
|
|
|
|
dependencies = [
|
|
# FastMCP framework
|
|
"fastmcp>=0.5.0",
|
|
|
|
# Core async libraries
|
|
"asyncio-throttle>=1.0.2",
|
|
"aiofiles>=23.2.0",
|
|
"aiohttp>=3.9.0",
|
|
|
|
# Data processing
|
|
"pandas>=2.1.0",
|
|
"numpy>=1.24.0",
|
|
|
|
# Legacy format processing - Core libraries
|
|
"dbfread>=2.0.7", # dBASE file reading
|
|
"simpledbf>=0.2.6", # Alternative dBASE reader
|
|
|
|
# Text processing and AI
|
|
"python-magic>=0.4.27", # File type detection
|
|
"chardet>=5.2.0", # Character encoding detection
|
|
"beautifulsoup4>=4.12.0", # Text cleaning
|
|
|
|
# Caching and performance
|
|
"diskcache>=5.6.3", # Intelligent disk caching
|
|
"python-dateutil>=2.8.2", # Date parsing for vintage files
|
|
|
|
# Logging and monitoring
|
|
"structlog>=23.2.0", # Structured logging
|
|
"rich>=13.7.0", # Rich terminal output
|
|
|
|
# Configuration and utilities
|
|
"pydantic>=2.5.0", # Data validation
|
|
"click>=8.1.7", # CLI interface
|
|
"typer>=0.9.0", # Modern CLI framework
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
# Legacy format processing libraries
|
|
legacy-full = [
|
|
# WordPerfect processing
|
|
"python-docx>=1.1.0", # For modern conversion fallbacks
|
|
|
|
# Spreadsheet processing
|
|
"openpyxl>=3.1.0", # Excel format fallbacks
|
|
"xlrd>=2.0.1", # Legacy Excel reading
|
|
|
|
# Archive processing
|
|
"py7zr>=0.21.0", # 7-Zip archives
|
|
"rarfile>=4.1", # RAR archives
|
|
|
|
# Mac format processing
|
|
"biplist>=1.0.3", # Binary plist processing
|
|
"macholib>=1.16.3", # Mac binary analysis
|
|
]
|
|
|
|
# AI and machine learning
|
|
ai-enhanced = [
|
|
"transformers>=4.36.0", # HuggingFace transformers
|
|
"torch>=2.1.0", # PyTorch for AI models
|
|
"scikit-learn>=1.3.0", # ML utilities
|
|
"spacy>=3.7.0", # NLP processing
|
|
]
|
|
|
|
# Development dependencies
|
|
dev = [
|
|
"pytest>=7.4.0",
|
|
"pytest-asyncio>=0.21.0",
|
|
"pytest-cov>=4.1.0",
|
|
"black>=23.12.0",
|
|
"ruff>=0.1.8",
|
|
"mypy>=1.8.0",
|
|
"pre-commit>=3.6.0",
|
|
]
|
|
|
|
# Enterprise features
|
|
enterprise = [
|
|
"prometheus-client>=0.19.0", # Metrics collection
|
|
"opentelemetry-api>=1.21.0", # Observability
|
|
"cryptography>=41.0.0", # Security features
|
|
"psutil>=5.9.0", # System monitoring
|
|
]
|
|
|
|
[project.urls]
|
|
Homepage = "https://github.com/MCP/mcp-legacy-files"
|
|
Documentation = "https://github.com/MCP/mcp-legacy-files/blob/main/README.md"
|
|
Repository = "https://github.com/MCP/mcp-legacy-files"
|
|
Issues = "https://github.com/MCP/mcp-legacy-files/issues"
|
|
Changelog = "https://github.com/MCP/mcp-legacy-files/blob/main/CHANGELOG.md"
|
|
|
|
[project.scripts]
|
|
mcp-legacy-files = "mcp_legacy_files.server:main"
|
|
legacy-files-cli = "mcp_legacy_files.cli:main"
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["src"]
|
|
|
|
[tool.setuptools.package-data]
|
|
mcp_legacy_files = [
|
|
"data/*.json",
|
|
"data/signatures/*.dat",
|
|
"templates/*.json",
|
|
]
|
|
|
|
# Black code formatter
|
|
[tool.black]
|
|
line-length = 88
|
|
target-version = ['py311']
|
|
include = '\.pyi?$'
|
|
extend-exclude = '''
|
|
/(
|
|
# directories
|
|
\.eggs
|
|
| \.git
|
|
| \.hg
|
|
| \.mypy_cache
|
|
| \.tox
|
|
| \.venv
|
|
| build
|
|
| dist
|
|
)/
|
|
'''
|
|
|
|
# Ruff linter
|
|
[tool.ruff]
|
|
target-version = "py311"
|
|
line-length = 88
|
|
select = [
|
|
"E", # pycodestyle errors
|
|
"W", # pycodestyle warnings
|
|
"F", # pyflakes
|
|
"I", # isort
|
|
"B", # flake8-bugbear
|
|
"C4", # flake8-comprehensions
|
|
"UP", # pyupgrade
|
|
]
|
|
ignore = [
|
|
"E501", # line too long, handled by black
|
|
"B008", # do not perform function calls in argument defaults
|
|
"C901", # too complex
|
|
]
|
|
|
|
[tool.ruff.per-file-ignores]
|
|
"__init__.py" = ["F401"]
|
|
|
|
# MyPy type checker
|
|
[tool.mypy]
|
|
python_version = "3.11"
|
|
warn_return_any = true
|
|
warn_unused_configs = true
|
|
disallow_untyped_defs = true
|
|
disallow_incomplete_defs = true
|
|
check_untyped_defs = true
|
|
disallow_untyped_decorators = true
|
|
no_implicit_optional = true
|
|
warn_redundant_casts = true
|
|
warn_unused_ignores = true
|
|
warn_no_return = true
|
|
warn_unreachable = true
|
|
strict_equality = true
|
|
|
|
[[tool.mypy.overrides]]
|
|
module = [
|
|
"dbfread.*",
|
|
"simpledbf.*",
|
|
"python_magic.*",
|
|
"diskcache.*",
|
|
]
|
|
ignore_missing_imports = true
|
|
|
|
# Pytest configuration
|
|
[tool.pytest.ini_options]
|
|
minversion = "7.0"
|
|
addopts = [
|
|
"-ra",
|
|
"--strict-markers",
|
|
"--strict-config",
|
|
"--cov=mcp_legacy_files",
|
|
"--cov-report=term-missing",
|
|
"--cov-report=html",
|
|
"--cov-report=xml",
|
|
]
|
|
testpaths = ["tests"]
|
|
asyncio_mode = "auto"
|
|
markers = [
|
|
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
"integration: marks tests as integration tests",
|
|
"legacy_format: marks tests that require legacy format test files",
|
|
]
|
|
|
|
# Coverage configuration
|
|
[tool.coverage.run]
|
|
source = ["src"]
|
|
branch = true
|
|
omit = [
|
|
"*/tests/*",
|
|
"*/test_*.py",
|
|
"*/__init__.py",
|
|
]
|
|
|
|
[tool.coverage.report]
|
|
exclude_lines = [
|
|
"pragma: no cover",
|
|
"def __repr__",
|
|
"if self.debug:",
|
|
"if settings.DEBUG",
|
|
"raise AssertionError",
|
|
"raise NotImplementedError",
|
|
"if 0:",
|
|
"if __name__ == .__main__.:",
|
|
"class .*\\bProtocol\\):",
|
|
"@(abc\\.)?abstractmethod",
|
|
] |