From 6fb76d87601253d70f6e8829b3535d6a02f13345 Mon Sep 17 00:00:00 2001
From: Ryan Malloy <ryan@supported.systems>
Date: Sun, 11 Jan 2026 10:23:47 -0700
Subject: [PATCH] Add MCP resources documentation and fix section format suffix

- Document MCP resource system in README with URI patterns, format
  suffixes, range syntax, and section detection strategies
- Add index_document to Universal Tools table
- Update architecture section to include resources.py
- Fix section:// resource to support .md/.txt/.html format suffixes
  (matching chapter:// behavior)
---
 README.md                      | 98 +++++++++++++++++++++++++++++++++-
 src/mcp_office_tools/server.py |  9 ++--
 2 files changed, 101 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 6a7f512..fb3ce60 100644
--- a/README.md
+++ b/README.md
@@ -75,6 +75,7 @@ claude mcp add office-tools "uvx mcp-office-tools"
 | `detect_office_format` | Identify format, version, encryption status |
 | `analyze_document_health` | Check integrity, corruption, password protection |
 | `get_supported_formats` | List all supported file extensions |
+| `index_document` | Scan document and create resource URIs for on-demand fetching |
 
 ### Word Tools
 
@@ -124,6 +125,62 @@ Here's what works and what's "good enough" — legacy formats from Office 97-200
 
 ---
 
+## 🔗 MCP Resources
+
+Instead of returning entire documents in tool responses, you can index a document once and fetch content on-demand via URI-based resources. This keeps context windows manageable when working with large files.
+
+### How It Works
+
+1. **Index the document** — `index_document` scans the file and returns URIs
+2. **Fetch what you need** — Request specific chapters, sheets, slides, or images by URI
+3. **Format on demand** — Append `.txt` or `.html` to get different output formats
+
+### Resource URI Patterns
+
+| URI Pattern | Description | Example |
+|-------------|-------------|---------|
+| `chapter://{doc_id}/{n}` | Single chapter/section | `chapter://abc123/3` |
+| `chapters://{doc_id}/{range}` | Multiple chapters | `chapters://abc123/1-5` |
+| `section://{doc_id}/{n}` | Section by heading style | `section://abc123/2` |
+| `paragraph://{doc_id}/{ch}/{p}` | Specific paragraph | `paragraph://abc123/3/7` |
+| `sheet://{doc_id}/{name}` | Excel sheet as markdown table | `sheet://abc123/Revenue` |
+| `slide://{doc_id}/{n}` | PowerPoint slide | `slide://abc123/5` |
+| `slides://{doc_id}/{range}` | Multiple slides | `slides://abc123/1,3,5` |
+| `image://{doc_id}/{n}` | Embedded image | `image://abc123/0` |
+
+### Format Suffixes
+
+Append a format suffix to convert on the fly:
+
+| Suffix | Output |
+|--------|--------|
+| `.md` (default) | Markdown |
+| `.txt` | Plain text (no formatting) |
+| `.html` | Basic HTML |
+
+Examples:
+- `chapter://abc123/3` → Markdown (default)
+- `chapter://abc123/3.txt` → Plain text
+- `chapter://abc123/3.html` → HTML
+
+### Range Syntax
+
+Fetch multiple items at once:
+- `1-5` → Items 1 through 5
+- `1,3,5` → Specific items
+- `1-3,7,9-10` → Mixed ranges
+
+### Section Detection
+
+The indexer detects document structure automatically:
+
+1. **Heading 1 styles** (primary) — Business docs, manuals, technical documents
+2. **"Chapter X" text patterns** (fallback) — Books, manuscripts, narratives
+
+Use `text_patterns_only=True` to skip heading style detection for documents with messy formatting.
+
+---
+
 ## 🎯 MCP Prompts
 
 Pre-built workflows that chain multiple tools together. Use these as starting points:
@@ -283,6 +340,42 @@ result = await extract_text("https://example.com/report.docx")
 # Cache expires after 1 hour by default
 ```
 
+### Index Document for On-Demand Resource Fetching
+
+```python
+# Index the document - returns URIs for all content
+result = await index_document("novel.docx")
+
+# Returns:
+# {
+#   "doc_id": "56036b0f171a",
+#   "resources": {
+#     "chapter": [
+#       {"id": "1", "title": "Chapter 1: The Beginning", "uri": "chapter://56036b0f171a/1"},
+#       {"id": "2", "title": "Chapter 2: Rising Action", "uri": "chapter://56036b0f171a/2"},
+#       ...
+#     ],
+#     "image": [
+#       {"id": "0", "uri": "image://56036b0f171a/0"},
+#       ...
+#     ]
+#   }
+# }
+
+# Now fetch specific content via MCP resources:
+# - chapter://56036b0f171a/1      → Chapter 1 as markdown
+# - chapter://56036b0f171a/1.txt  → Chapter 1 as plain text
+# - chapters://56036b0f171a/1-3   → Chapters 1-3 combined
+# - image://56036b0f171a/0        → First embedded image
+
+# Works with Excel and PowerPoint too:
+await index_document("data.xlsx")
+# → sheet://abc123/Revenue, sheet://abc123/Expenses, ...
+
+await index_document("presentation.pptx")
+# → slide://def456/1, slide://def456/2, ...
+```
+
 ---
 
 ## 🧪 Testing
@@ -311,9 +404,10 @@ The mixin pattern keeps things modular — universal tools work on everything, f
 ```
 mcp-office-tools/
 ├── src/mcp_office_tools/
-│   ├── server.py              # FastMCP server entry point
+│   ├── server.py              # FastMCP server + resource templates
+│   ├── resources.py           # Resource store for on-demand content
 │   ├── mixins/
-│   │   ├── universal.py       # Format-agnostic tools
+│   │   ├── universal.py       # Format-agnostic tools (incl. index_document)
 │   │   ├── word.py            # Word-specific tools
 │   │   ├── excel.py           # Excel-specific tools
 │   │   └── powerpoint.py      # PowerPoint tools (WIP)
diff --git a/src/mcp_office_tools/server.py b/src/mcp_office_tools/server.py
index 7b554dc..4586623 100644
--- a/src/mcp_office_tools/server.py
+++ b/src/mcp_office_tools/server.py
@@ -196,14 +196,15 @@ def get_chapter_resource(doc_id: str, resource_id: str) -> str:
     "section://{doc_id}/{resource_id}",
     mime_type="text/markdown",
     name="document_section",
-    description="Section from a document as Markdown"
+    description="Section from a document. Supports format suffixes: section://doc/2.md, section://doc/2.txt, section://doc/2.html"
 )
 def get_section_resource(doc_id: str, resource_id: str) -> str:
-    """Retrieve a section as markdown."""
-    resource = resource_store.get(doc_id, "section", resource_id)
+    """Retrieve a section with optional format conversion."""
+    section_id, fmt = _parse_format_suffix(resource_id)
+    resource = resource_store.get(doc_id, "section", section_id)
     if resource is None:
         raise ValueError(f"Section not found: section://{doc_id}/{resource_id}")
-    return resource.data
+    return _convert_markdown_to_format(resource.data, fmt)
 
 
 @app.resource(