Ryan Malloy af6aadf559 Refactor: Extract processing logic into utility modules
Complete architecture cleanup - eliminated duplicate server files:
- Deleted server_monolithic.py (2249 lines)
- Deleted server_legacy.py (2209 lines)

New utility modules created:
- utils/word_processing.py - Word extraction/conversion (preserves page range fixes)
- utils/excel_processing.py - Excel extraction
- utils/powerpoint_processing.py - PowerPoint extraction
- utils/processing.py - Universal helpers (parse_page_range, health checks, etc.)

Updated mixins to import from utils instead of server_monolithic.
Entry point remains server.py (48 lines) using mixin architecture.

All 53 tests pass. Coverage improved from 11% to 22% by removing duplicate code.
2026-01-11 05:08:18 -07:00

95 lines
1.9 KiB
Python

"""Utility modules for MCP Office Tools."""
from .validation import (
OfficeFileError,
validate_office_file,
validate_office_path,
get_supported_extensions,
get_format_info,
detect_file_format,
is_url,
download_office_file
)
from .file_detection import (
detect_format,
classify_document_type
)
from .caching import (
OfficeFileCache,
get_cache,
resolve_office_file_path
)
from .decorators import (
resolve_field_defaults,
handle_office_errors
)
from .processing import (
TEMP_DIR,
DEBUG,
_extract_basic_metadata,
_calculate_health_score,
_get_health_recommendations,
_smart_truncate_content,
_parse_page_range,
_get_processing_recommendation,
)
from .word_processing import (
_extract_word_text,
_extract_word_images,
_extract_word_metadata,
_convert_docx_to_markdown,
_convert_docx_with_python_docx,
_convert_doc_to_markdown,
_get_ultra_fast_summary,
_find_bookmark_content_range,
_find_chapter_content_range,
_get_available_headings,
_has_page_break,
_analyze_document_size,
_paragraph_to_markdown,
_table_to_markdown,
_html_to_markdown,
_extract_markdown_structure,
)
from .excel_processing import (
_extract_excel_text,
_extract_excel_images,
_extract_excel_metadata,
)
from .powerpoint_processing import (
_extract_powerpoint_text,
_extract_powerpoint_images,
_extract_powerpoint_metadata,
)
__all__ = [
# Validation
"OfficeFileError",
"validate_office_file",
"validate_office_path",
"get_supported_extensions",
"get_format_info",
"detect_file_format",
"is_url",
"download_office_file",
# File detection
"detect_format",
"classify_document_type",
# Caching
"OfficeFileCache",
"get_cache",
"resolve_office_file_path",
# Decorators
"resolve_field_defaults",
"handle_office_errors"
]