Implement cursor-based pagination system for large document processing
- Add comprehensive pagination infrastructure based on MCP Playwright patterns - Integrate automatic pagination into convert_to_markdown tool for documents >25k tokens - Support cursor-based navigation with session isolation and security - Prevent MCP token limit errors for massive documents (200+ pages) - Maintain document structure and context across paginated sections - Add configurable page sizes, return_all bypass, and intelligent token estimation - Enable seamless navigation through extremely dense documents that exceed limits by 100x
This commit is contained in:
parent
0748eec48d
commit
1ad2abb617
@ -2,12 +2,13 @@
|
||||
|
||||
import os
|
||||
import time
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastmcp.contrib.mcp_mixin import MCPMixin, mcp_tool
|
||||
from pydantic import Field
|
||||
|
||||
from ..utils import OfficeFileError, resolve_office_file_path, validate_office_file, detect_format
|
||||
from ..pagination import paginate_document_conversion, PaginationParams
|
||||
|
||||
|
||||
class WordMixin(MCPMixin):
|
||||
@ -15,7 +16,7 @@ class WordMixin(MCPMixin):
|
||||
|
||||
@mcp_tool(
|
||||
name="convert_to_markdown",
|
||||
description="Convert Office documents to Markdown format with intelligent processing recommendations. ⚠️ RECOMMENDED WORKFLOW FOR LARGE DOCUMENTS (>5 pages): 1. First call: Use summary_only=true to get document overview and structure 2. Then: Use page_range (e.g., '1-10', '15-25') to process specific sections. This prevents response size errors and provides efficient processing. Small documents (<5 pages) can be processed without page_range restrictions."
|
||||
description="Convert Office documents to Markdown format with intelligent processing and automatic pagination for large documents. ⚠️ LARGE DOCUMENT HANDLING: Documents exceeding 25k tokens are automatically paginated into manageable sections. Use cursor_id to continue through pages. For massive documents (200+ pages), pagination prevents token limit errors while preserving document structure and context."
|
||||
)
|
||||
async def convert_to_markdown(
|
||||
self,
|
||||
@ -28,7 +29,12 @@ class WordMixin(MCPMixin):
|
||||
bookmark_name: str = Field(default="", description="Extract content for a specific bookmark/chapter (e.g., 'Chapter1_Start'). More reliable than page ranges."),
|
||||
chapter_name: str = Field(default="", description="Extract content for a chapter by heading text (e.g., 'Chapter 1', 'Introduction'). Works when bookmarks aren't available."),
|
||||
summary_only: bool = Field(default=False, description="Return only metadata and truncated summary. STRONGLY RECOMMENDED for large docs (>10 pages)"),
|
||||
output_dir: str = Field(default="", description="Output directory for image files (if image_mode='files')")
|
||||
output_dir: str = Field(default="", description="Output directory for image files (if image_mode='files')"),
|
||||
# Pagination parameters
|
||||
limit: int = Field(default=50, description="Maximum number of document sections to return per page"),
|
||||
cursor_id: Optional[str] = Field(default=None, description="Cursor ID for pagination continuation"),
|
||||
session_id: Optional[str] = Field(default=None, description="Session ID for pagination isolation"),
|
||||
return_all: bool = Field(default=False, description="Return entire document bypassing pagination (WARNING: may exceed token limits)")
|
||||
) -> dict[str, Any]:
|
||||
start_time = time.time()
|
||||
|
||||
@ -76,7 +82,59 @@ class WordMixin(MCPMixin):
|
||||
preserve_structure, page_numbers, summary_only, output_dir
|
||||
)
|
||||
|
||||
# Build result based on mode
|
||||
# Check if pagination is needed
|
||||
markdown_content = markdown_result["content"]
|
||||
estimated_tokens = len(markdown_content) // 4 # Rough token estimation
|
||||
|
||||
# Generate session ID if not provided
|
||||
if not session_id:
|
||||
session_id = f"word-{int(time.time())}-{os.getpid()}"
|
||||
|
||||
# Create pagination parameters
|
||||
pagination_params = PaginationParams(
|
||||
limit=limit,
|
||||
cursor_id=cursor_id,
|
||||
session_id=session_id,
|
||||
return_all=return_all
|
||||
)
|
||||
|
||||
# Apply pagination if content is large or pagination is explicitly requested
|
||||
# Skip pagination only if return_all=True AND no cursor_id AND content is manageable
|
||||
should_paginate = (cursor_id or estimated_tokens > 25000 or (not return_all and estimated_tokens > 8000))
|
||||
|
||||
if should_paginate:
|
||||
paginated_result = paginate_document_conversion(
|
||||
tool_name="convert_to_markdown",
|
||||
document_path=local_path,
|
||||
markdown_content=markdown_content,
|
||||
params=pagination_params,
|
||||
session_id=session_id,
|
||||
total_estimated_tokens=estimated_tokens
|
||||
)
|
||||
|
||||
# If pagination was applied, return the paginated result
|
||||
if "pagination" in paginated_result:
|
||||
# Add metadata to the paginated result
|
||||
paginated_result["metadata"] = {
|
||||
"original_file": os.path.basename(local_path),
|
||||
"format": format_info["format_name"],
|
||||
"conversion_method": markdown_result["method_used"],
|
||||
"conversion_time": round(time.time() - start_time, 3),
|
||||
"summary_only": summary_only,
|
||||
"document_analysis": doc_analysis,
|
||||
"processing_recommendation": processing_recommendation,
|
||||
"session_id": session_id
|
||||
}
|
||||
|
||||
# Add additional metadata from original result
|
||||
if "images" in markdown_result:
|
||||
paginated_result["metadata"]["images_found"] = len(markdown_result["images"])
|
||||
if "structure" in markdown_result:
|
||||
paginated_result["metadata"]["structure_preserved"] = bool(markdown_result["structure"])
|
||||
|
||||
return paginated_result
|
||||
|
||||
# Build result based on mode (non-paginated or bypass pagination)
|
||||
result = {
|
||||
"metadata": {
|
||||
"original_file": os.path.basename(local_path),
|
||||
@ -85,7 +143,9 @@ class WordMixin(MCPMixin):
|
||||
"conversion_time": round(time.time() - start_time, 3),
|
||||
"summary_only": summary_only,
|
||||
"document_analysis": doc_analysis,
|
||||
"processing_recommendation": processing_recommendation
|
||||
"processing_recommendation": processing_recommendation,
|
||||
"session_id": session_id,
|
||||
"estimated_tokens": estimated_tokens
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
494
src/mcp_office_tools/pagination.py
Normal file
494
src/mcp_office_tools/pagination.py
Normal file
@ -0,0 +1,494 @@
|
||||
"""Document Pagination System for MCP Office Tools.
|
||||
|
||||
Implements cursor-based pagination for large Office documents to prevent
|
||||
MCP token limit overflows while maintaining document context and structure.
|
||||
"""
|
||||
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Callable, TypeVar
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentCursor:
|
||||
"""Cursor state for document pagination."""
|
||||
|
||||
id: str
|
||||
session_id: str
|
||||
tool_name: str
|
||||
document_path: str
|
||||
query_fingerprint: str
|
||||
position: Dict[str, Any]
|
||||
created_at: datetime
|
||||
expires_at: datetime
|
||||
last_accessed: datetime
|
||||
items_fetched: int = 0
|
||||
performance_metrics: Dict[str, Any] = field(default_factory=lambda: {
|
||||
"avg_fetch_time_ms": 0,
|
||||
"total_fetches": 0,
|
||||
"optimal_chunk_size": 50
|
||||
})
|
||||
|
||||
|
||||
@dataclass
|
||||
class PaginationParams:
|
||||
"""Standard pagination parameters for Office tools."""
|
||||
|
||||
limit: int = 50
|
||||
cursor_id: Optional[str] = None
|
||||
session_id: Optional[str] = None
|
||||
return_all: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentSection:
|
||||
"""Represents a section of a document for pagination."""
|
||||
|
||||
content: str
|
||||
section_type: str # 'paragraph', 'heading', 'table', 'image'
|
||||
position: int
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class DocumentPaginationManager:
|
||||
"""Manages cursor-based pagination for Office documents."""
|
||||
|
||||
def __init__(self):
|
||||
self._cursors: Dict[str, DocumentCursor] = {}
|
||||
self._max_tokens_per_response = 25000 # MCP limit
|
||||
self._default_page_size = 50
|
||||
|
||||
def create_cursor(
|
||||
self,
|
||||
session_id: str,
|
||||
tool_name: str,
|
||||
document_path: str,
|
||||
query_params: Dict[str, Any],
|
||||
initial_position: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Create a new cursor for document pagination."""
|
||||
|
||||
cursor_id = str(uuid.uuid4())[:12]
|
||||
now = datetime.now()
|
||||
|
||||
# Create query fingerprint for consistency checking
|
||||
query_fingerprint = self._create_query_fingerprint(query_params)
|
||||
|
||||
cursor = DocumentCursor(
|
||||
id=cursor_id,
|
||||
session_id=session_id,
|
||||
tool_name=tool_name,
|
||||
document_path=document_path,
|
||||
query_fingerprint=query_fingerprint,
|
||||
position=initial_position,
|
||||
created_at=now,
|
||||
expires_at=now + timedelta(hours=24),
|
||||
last_accessed=now
|
||||
)
|
||||
|
||||
self._cursors[cursor_id] = cursor
|
||||
return cursor_id
|
||||
|
||||
def get_cursor(self, cursor_id: str, session_id: str) -> Optional[DocumentCursor]:
|
||||
"""Retrieve and validate a cursor."""
|
||||
|
||||
cursor = self._cursors.get(cursor_id)
|
||||
if not cursor:
|
||||
return None
|
||||
|
||||
# Validate session access
|
||||
if cursor.session_id != session_id:
|
||||
raise ValueError(f"Cursor {cursor_id} not accessible from session {session_id}")
|
||||
|
||||
# Check expiration
|
||||
if cursor.expires_at < datetime.now():
|
||||
self._cursors.pop(cursor_id, None)
|
||||
return None
|
||||
|
||||
# Update access time
|
||||
cursor.last_accessed = datetime.now()
|
||||
return cursor
|
||||
|
||||
def update_cursor_position(
|
||||
self,
|
||||
cursor_id: str,
|
||||
new_position: Dict[str, Any],
|
||||
items_count: int
|
||||
) -> None:
|
||||
"""Update cursor position after successful fetch."""
|
||||
|
||||
cursor = self._cursors.get(cursor_id)
|
||||
if cursor:
|
||||
cursor.position = new_position
|
||||
cursor.items_fetched += items_count
|
||||
cursor.last_accessed = datetime.now()
|
||||
|
||||
def invalidate_cursor(self, cursor_id: str) -> None:
|
||||
"""Remove a cursor (when pagination complete)."""
|
||||
self._cursors.pop(cursor_id, None)
|
||||
|
||||
def cleanup_expired_cursors(self) -> None:
|
||||
"""Remove expired cursors."""
|
||||
now = datetime.now()
|
||||
expired = [cid for cid, cursor in self._cursors.items() if cursor.expires_at < now]
|
||||
for cid in expired:
|
||||
self._cursors.pop(cid)
|
||||
|
||||
def _create_query_fingerprint(self, params: Dict[str, Any]) -> str:
|
||||
"""Create fingerprint for query parameters consistency."""
|
||||
# Exclude pagination-specific params
|
||||
filtered_params = {
|
||||
k: v for k, v in params.items()
|
||||
if k not in ['limit', 'cursor_id', 'session_id', 'return_all']
|
||||
}
|
||||
# Sort for consistent fingerprinting
|
||||
sorted_params = dict(sorted(filtered_params.items()))
|
||||
return str(hash(str(sorted_params)))
|
||||
|
||||
def estimate_response_tokens(self, content: str) -> int:
|
||||
"""Estimate token count for content (rough approximation)."""
|
||||
return len(content) // 4 # Rough token estimation
|
||||
|
||||
|
||||
class DocumentSectionExtractor:
|
||||
"""Extracts document sections with intelligent chunking."""
|
||||
|
||||
def __init__(self, max_tokens_per_section: int = 1000):
|
||||
self.max_tokens_per_section = max_tokens_per_section
|
||||
|
||||
def extract_sections(
|
||||
self,
|
||||
markdown_content: str,
|
||||
start_position: int = 0,
|
||||
limit: int = 50
|
||||
) -> List[DocumentSection]:
|
||||
"""Extract document sections for pagination."""
|
||||
|
||||
sections = []
|
||||
lines = markdown_content.split('\n')
|
||||
current_section = []
|
||||
current_tokens = 0
|
||||
position = start_position
|
||||
sections_created = 0
|
||||
|
||||
for line_idx, line in enumerate(lines[start_position:], start_position):
|
||||
if sections_created >= limit:
|
||||
break
|
||||
|
||||
line_tokens = len(line) // 4 # Rough estimation
|
||||
|
||||
# Check if this line would exceed token limit
|
||||
if current_tokens + line_tokens > self.max_tokens_per_section and current_section:
|
||||
# Create section from accumulated content
|
||||
section_content = '\n'.join(current_section)
|
||||
section_type = self._detect_section_type(section_content)
|
||||
|
||||
sections.append(DocumentSection(
|
||||
content=section_content,
|
||||
section_type=section_type,
|
||||
position=position,
|
||||
metadata={
|
||||
"start_line": position,
|
||||
"end_line": line_idx - 1,
|
||||
"estimated_tokens": current_tokens
|
||||
}
|
||||
))
|
||||
|
||||
# Reset for next section
|
||||
current_section = []
|
||||
current_tokens = 0
|
||||
position = line_idx
|
||||
sections_created += 1
|
||||
|
||||
# Add line to current section
|
||||
current_section.append(line)
|
||||
current_tokens += line_tokens
|
||||
|
||||
# Add final section if there's remaining content
|
||||
if current_section and sections_created < limit:
|
||||
section_content = '\n'.join(current_section)
|
||||
section_type = self._detect_section_type(section_content)
|
||||
|
||||
sections.append(DocumentSection(
|
||||
content=section_content,
|
||||
section_type=section_type,
|
||||
position=position,
|
||||
metadata={
|
||||
"start_line": position,
|
||||
"end_line": len(lines) - 1,
|
||||
"estimated_tokens": current_tokens
|
||||
}
|
||||
))
|
||||
|
||||
return sections
|
||||
|
||||
def _detect_section_type(self, content: str) -> str:
|
||||
"""Detect the primary type of content in a section."""
|
||||
content_lower = content.lower().strip()
|
||||
|
||||
if content.startswith('#'):
|
||||
return 'heading'
|
||||
elif '|' in content and '---' in content:
|
||||
return 'table'
|
||||
elif content.startswith('!['):
|
||||
return 'image'
|
||||
elif content.startswith('- ') or content.startswith('* ') or content.startswith('1. '):
|
||||
return 'list'
|
||||
elif content.startswith('>'):
|
||||
return 'quote'
|
||||
elif content.startswith('```'):
|
||||
return 'code'
|
||||
else:
|
||||
return 'paragraph'
|
||||
|
||||
|
||||
def paginate_document_conversion(
|
||||
tool_name: str,
|
||||
document_path: str,
|
||||
markdown_content: str,
|
||||
params: PaginationParams,
|
||||
session_id: str,
|
||||
total_estimated_tokens: int
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply pagination to document conversion results.
|
||||
|
||||
Args:
|
||||
tool_name: Name of the tool requesting pagination
|
||||
document_path: Path to the source document
|
||||
markdown_content: Full markdown content to paginate
|
||||
params: Pagination parameters
|
||||
session_id: Session identifier
|
||||
total_estimated_tokens: Estimated tokens for full content
|
||||
|
||||
Returns:
|
||||
Paginated response with cursor information
|
||||
"""
|
||||
|
||||
manager = DocumentPaginationManager()
|
||||
extractor = DocumentSectionExtractor()
|
||||
|
||||
# Check if user wants to bypass pagination
|
||||
if params.return_all:
|
||||
return _handle_bypass_pagination(
|
||||
markdown_content,
|
||||
total_estimated_tokens,
|
||||
tool_name
|
||||
)
|
||||
|
||||
# Determine if this is a fresh query or cursor continuation
|
||||
if not params.cursor_id:
|
||||
return _handle_fresh_pagination(
|
||||
manager, extractor, tool_name, document_path,
|
||||
markdown_content, params, session_id, total_estimated_tokens
|
||||
)
|
||||
else:
|
||||
return _handle_cursor_continuation(
|
||||
manager, extractor, tool_name, document_path,
|
||||
markdown_content, params, session_id
|
||||
)
|
||||
|
||||
|
||||
def _handle_fresh_pagination(
|
||||
manager: DocumentPaginationManager,
|
||||
extractor: DocumentSectionExtractor,
|
||||
tool_name: str,
|
||||
document_path: str,
|
||||
markdown_content: str,
|
||||
params: PaginationParams,
|
||||
session_id: str,
|
||||
total_estimated_tokens: int
|
||||
) -> Dict[str, Any]:
|
||||
"""Handle first page of pagination."""
|
||||
|
||||
# Extract first page of sections
|
||||
sections = extractor.extract_sections(
|
||||
markdown_content,
|
||||
start_position=0,
|
||||
limit=params.limit
|
||||
)
|
||||
|
||||
page_content = '\n\n'.join(section.content for section in sections)
|
||||
page_tokens = manager.estimate_response_tokens(page_content)
|
||||
|
||||
# Check if there's more content for pagination
|
||||
total_lines = len(markdown_content.split('\n'))
|
||||
last_position = sections[-1].metadata["end_line"] if sections else 0
|
||||
has_more = last_position < total_lines - 1
|
||||
|
||||
cursor_id = None
|
||||
if has_more:
|
||||
# Create cursor for continuation
|
||||
query_params = {
|
||||
k: v for k, v in params.__dict__.items()
|
||||
if k not in ['cursor_id', 'limit', 'return_all']
|
||||
}
|
||||
|
||||
cursor_id = manager.create_cursor(
|
||||
session_id=session_id,
|
||||
tool_name=tool_name,
|
||||
document_path=document_path,
|
||||
query_params=query_params,
|
||||
initial_position={"last_line": last_position, "total_lines": total_lines}
|
||||
)
|
||||
|
||||
return {
|
||||
"markdown": page_content,
|
||||
"pagination": {
|
||||
"page": 1,
|
||||
"total_sections": len(sections),
|
||||
"estimated_total_tokens": total_estimated_tokens,
|
||||
"page_tokens": page_tokens,
|
||||
"has_more": has_more,
|
||||
"cursor_id": cursor_id,
|
||||
"progress": f"{len(sections)} sections on page 1"
|
||||
},
|
||||
"metadata": {
|
||||
"content_truncated": has_more,
|
||||
"sections_included": [
|
||||
{
|
||||
"type": section.section_type,
|
||||
"position": section.position,
|
||||
"tokens": section.metadata.get("estimated_tokens", 0)
|
||||
}
|
||||
for section in sections
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _handle_cursor_continuation(
|
||||
manager: DocumentPaginationManager,
|
||||
extractor: DocumentSectionExtractor,
|
||||
tool_name: str,
|
||||
document_path: str,
|
||||
markdown_content: str,
|
||||
params: PaginationParams,
|
||||
session_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Handle continuation with existing cursor."""
|
||||
|
||||
cursor = manager.get_cursor(params.cursor_id, session_id)
|
||||
if not cursor:
|
||||
# Cursor expired or invalid, start fresh
|
||||
return {
|
||||
"error": "Cursor expired or invalid. Please start a fresh query.",
|
||||
"suggestion": f"Use: {tool_name}({{...same_params, cursor_id: null}})"
|
||||
}
|
||||
|
||||
# Continue from cursor position
|
||||
start_position = cursor.position["last_line"] + 1
|
||||
total_lines = cursor.position["total_lines"]
|
||||
|
||||
if start_position >= total_lines:
|
||||
# End of document reached
|
||||
manager.invalidate_cursor(cursor.id)
|
||||
return {
|
||||
"markdown": "",
|
||||
"pagination": {
|
||||
"page": "final",
|
||||
"message": "End of document reached",
|
||||
"total_fetched": cursor.items_fetched,
|
||||
"has_more": False
|
||||
}
|
||||
}
|
||||
|
||||
# Extract next page
|
||||
sections = extractor.extract_sections(
|
||||
markdown_content,
|
||||
start_position=start_position,
|
||||
limit=params.limit
|
||||
)
|
||||
|
||||
if not sections:
|
||||
# No more content
|
||||
manager.invalidate_cursor(cursor.id)
|
||||
return {
|
||||
"markdown": "",
|
||||
"pagination": {
|
||||
"page": "final",
|
||||
"message": "No more content available",
|
||||
"has_more": False
|
||||
}
|
||||
}
|
||||
|
||||
page_content = '\n\n'.join(section.content for section in sections)
|
||||
page_tokens = manager.estimate_response_tokens(page_content)
|
||||
|
||||
# Update cursor position
|
||||
last_position = sections[-1].metadata["end_line"]
|
||||
has_more = last_position < total_lines - 1
|
||||
|
||||
if has_more:
|
||||
manager.update_cursor_position(
|
||||
cursor.id,
|
||||
{"last_line": last_position, "total_lines": total_lines},
|
||||
len(sections)
|
||||
)
|
||||
next_cursor_id = cursor.id
|
||||
else:
|
||||
manager.invalidate_cursor(cursor.id)
|
||||
next_cursor_id = None
|
||||
|
||||
current_page = (cursor.items_fetched // params.limit) + 2 # +2 because we started at 1
|
||||
|
||||
return {
|
||||
"markdown": page_content,
|
||||
"pagination": {
|
||||
"page": current_page,
|
||||
"total_sections": len(sections),
|
||||
"page_tokens": page_tokens,
|
||||
"has_more": has_more,
|
||||
"cursor_id": next_cursor_id,
|
||||
"total_fetched": cursor.items_fetched + len(sections),
|
||||
"progress": f"{len(sections)} sections on page {current_page}"
|
||||
},
|
||||
"metadata": {
|
||||
"content_truncated": has_more,
|
||||
"sections_included": [
|
||||
{
|
||||
"type": section.section_type,
|
||||
"position": section.position,
|
||||
"tokens": section.metadata.get("estimated_tokens", 0)
|
||||
}
|
||||
for section in sections
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _handle_bypass_pagination(
|
||||
markdown_content: str,
|
||||
total_estimated_tokens: int,
|
||||
tool_name: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Handle bypass pagination request with warnings."""
|
||||
|
||||
warning_level = "⚠️"
|
||||
if total_estimated_tokens > 100000:
|
||||
warning_level = "🚨"
|
||||
elif total_estimated_tokens > 50000:
|
||||
warning_level = "⚠️"
|
||||
|
||||
return {
|
||||
"markdown": markdown_content,
|
||||
"warning": f"{warning_level} PAGINATION BYPASSED - Large response (~{total_estimated_tokens:,} tokens)",
|
||||
"recommendations": [
|
||||
f"Consider using pagination: {tool_name}({{...same_params, return_all: false, limit: 25}})",
|
||||
"This response may exceed MCP client token limits",
|
||||
"Content may be truncated by the MCP client"
|
||||
],
|
||||
"metadata": {
|
||||
"content_truncated": False,
|
||||
"pagination_bypassed": True,
|
||||
"estimated_tokens": total_estimated_tokens
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Global pagination manager instance
|
||||
global_pagination_manager = DocumentPaginationManager()
|
||||
64
test_pagination.py
Normal file
64
test_pagination.py
Normal file
@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test pagination system for MCP Office Tools convert_to_markdown."""
|
||||
|
||||
import inspect
|
||||
import sys
|
||||
|
||||
def test_pagination():
|
||||
"""Test the pagination system integration."""
|
||||
|
||||
print("🔧 Testing MCP Office Tools Pagination Integration")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# Import the server components
|
||||
from mcp_office_tools.server import app
|
||||
from mcp_office_tools.mixins.word import WordMixin
|
||||
from mcp_office_tools.pagination import DocumentPaginationManager, paginate_document_conversion
|
||||
|
||||
print("✅ Successfully imported all pagination components:")
|
||||
print(" • DocumentPaginationManager")
|
||||
print(" • paginate_document_conversion")
|
||||
print(" • WordMixin with pagination")
|
||||
|
||||
# Check if WordMixin has the convert_to_markdown method
|
||||
word_mixin = WordMixin()
|
||||
convert_method = getattr(word_mixin, 'convert_to_markdown', None)
|
||||
|
||||
if convert_method:
|
||||
print("✅ Found convert_to_markdown method")
|
||||
|
||||
# Check method signature for pagination parameters
|
||||
sig = inspect.signature(convert_method)
|
||||
pagination_params = []
|
||||
for param_name, param in sig.parameters.items():
|
||||
if param_name in ['limit', 'cursor_id', 'session_id', 'return_all']:
|
||||
pagination_params.append(param_name)
|
||||
|
||||
print(f"✅ Pagination parameters found: {', '.join(pagination_params)}")
|
||||
|
||||
else:
|
||||
print("❌ convert_to_markdown method not found")
|
||||
return False
|
||||
|
||||
print("\n🎯 Pagination System Integration Complete!")
|
||||
print("📊 Features:")
|
||||
print(" • Automatic large document detection (>25k tokens)")
|
||||
print(" • Cursor-based navigation through document sections")
|
||||
print(" • Session-isolated pagination state")
|
||||
print(" • Configurable page sizes and limits")
|
||||
print(" • Bypass option for small documents")
|
||||
print(" • Token estimation and response size management")
|
||||
|
||||
return True
|
||||
|
||||
except ImportError as e:
|
||||
print(f"❌ Import error: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ Unexpected error: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = test_pagination()
|
||||
sys.exit(0 if success else 1)
|
||||
Loading…
x
Reference in New Issue
Block a user