Complete architecture cleanup - eliminated duplicate server files: - Deleted server_monolithic.py (2249 lines) - Deleted server_legacy.py (2209 lines) New utility modules created: - utils/word_processing.py - Word extraction/conversion (preserves page range fixes) - utils/excel_processing.py - Excel extraction - utils/powerpoint_processing.py - PowerPoint extraction - utils/processing.py - Universal helpers (parse_page_range, health checks, etc.) Updated mixins to import from utils instead of server_monolithic. Entry point remains server.py (48 lines) using mixin architecture. All 53 tests pass. Coverage improved from 11% to 22% by removing duplicate code.
95 lines
1.9 KiB
Python
95 lines
1.9 KiB
Python
"""Utility modules for MCP Office Tools."""
|
|
|
|
from .validation import (
|
|
OfficeFileError,
|
|
validate_office_file,
|
|
validate_office_path,
|
|
get_supported_extensions,
|
|
get_format_info,
|
|
detect_file_format,
|
|
is_url,
|
|
download_office_file
|
|
)
|
|
|
|
from .file_detection import (
|
|
detect_format,
|
|
classify_document_type
|
|
)
|
|
|
|
from .caching import (
|
|
OfficeFileCache,
|
|
get_cache,
|
|
resolve_office_file_path
|
|
)
|
|
|
|
from .decorators import (
|
|
resolve_field_defaults,
|
|
handle_office_errors
|
|
)
|
|
|
|
from .processing import (
|
|
TEMP_DIR,
|
|
DEBUG,
|
|
_extract_basic_metadata,
|
|
_calculate_health_score,
|
|
_get_health_recommendations,
|
|
_smart_truncate_content,
|
|
_parse_page_range,
|
|
_get_processing_recommendation,
|
|
)
|
|
|
|
from .word_processing import (
|
|
_extract_word_text,
|
|
_extract_word_images,
|
|
_extract_word_metadata,
|
|
_convert_docx_to_markdown,
|
|
_convert_docx_with_python_docx,
|
|
_convert_doc_to_markdown,
|
|
_get_ultra_fast_summary,
|
|
_find_bookmark_content_range,
|
|
_find_chapter_content_range,
|
|
_get_available_headings,
|
|
_has_page_break,
|
|
_analyze_document_size,
|
|
_paragraph_to_markdown,
|
|
_table_to_markdown,
|
|
_html_to_markdown,
|
|
_extract_markdown_structure,
|
|
)
|
|
|
|
from .excel_processing import (
|
|
_extract_excel_text,
|
|
_extract_excel_images,
|
|
_extract_excel_metadata,
|
|
)
|
|
|
|
from .powerpoint_processing import (
|
|
_extract_powerpoint_text,
|
|
_extract_powerpoint_images,
|
|
_extract_powerpoint_metadata,
|
|
)
|
|
|
|
__all__ = [
|
|
# Validation
|
|
"OfficeFileError",
|
|
"validate_office_file",
|
|
"validate_office_path",
|
|
"get_supported_extensions",
|
|
"get_format_info",
|
|
"detect_file_format",
|
|
"is_url",
|
|
"download_office_file",
|
|
|
|
# File detection
|
|
"detect_format",
|
|
"classify_document_type",
|
|
|
|
# Caching
|
|
"OfficeFileCache",
|
|
"get_cache",
|
|
"resolve_office_file_path",
|
|
|
|
# Decorators
|
|
"resolve_field_defaults",
|
|
"handle_office_errors"
|
|
] |