claude-hooks/lib/context_monitor.py
Ryan Malloy 162ca67098 Initial commit: Claude Code Hooks with Diátaxis documentation
 Features:
- 🧠 Shadow learner that builds intelligence from command patterns
- 🛡️ Smart command validation with safety checks
- 💾 Automatic context monitoring and backup system
- 🔄 Session continuity across Claude restarts

📚 Documentation:
- Complete Diátaxis-organized documentation
- Learning-oriented tutorial for getting started
- Task-oriented how-to guides for specific problems
- Information-oriented reference for quick lookup
- Understanding-oriented explanations of architecture

🚀 Installation:
- One-command installation script
- Bootstrap prompt for installation via Claude
- Cross-platform compatibility
- Comprehensive testing suite

🎯 Ready for real-world use and community feedback!

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-19 18:25:34 -06:00

321 lines
12 KiB
Python

#!/usr/bin/env python3
"""Context Monitor - Token estimation and backup trigger system"""
import json
import time
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Any, Optional
try:
from .models import BackupDecision
except ImportError:
from models import BackupDecision
class ContextMonitor:
"""Monitors conversation context and predicts token usage"""
def __init__(self, storage_path: str = ".claude_hooks"):
self.storage_path = Path(storage_path)
self.storage_path.mkdir(parents=True, exist_ok=True)
self.session_start = datetime.now()
self.prompt_count = 0
self.estimated_tokens = 0
self.tool_executions = 0
self.file_operations = 0
# Token estimation constants (conservative estimates)
self.TOKENS_PER_CHAR = 0.25 # Average for English text
self.TOOL_OVERHEAD = 200 # Tokens per tool call
self.SYSTEM_OVERHEAD = 500 # Base conversation overhead
self.MAX_CONTEXT = 200000 # Claude's context limit
# Backup thresholds
self.backup_threshold = 0.85
self.emergency_threshold = 0.95
# Error tracking
self.estimation_errors = 0
self.max_errors = 5
self._last_good_estimate = 0.5
# Load previous session state if available
self._load_session_state()
def estimate_prompt_tokens(self, prompt_data: Dict[str, Any]) -> int:
"""Estimate tokens in user prompt"""
try:
prompt_text = prompt_data.get("prompt", "")
# Basic character count estimation
base_tokens = len(prompt_text) * self.TOKENS_PER_CHAR
# Add overhead for system prompts, context, etc.
overhead_tokens = self.SYSTEM_OVERHEAD
return int(base_tokens + overhead_tokens)
except Exception:
# Fallback estimation
return 1000
def estimate_conversation_tokens(self) -> int:
"""Estimate total conversation tokens"""
try:
# Base conversation context
base_tokens = self.estimated_tokens
# Add tool execution overhead
tool_tokens = self.tool_executions * self.TOOL_OVERHEAD
# Add file operation overhead (file contents in context)
file_tokens = self.file_operations * 1000 # Average file size
# Conversation history grows over time
history_tokens = self.prompt_count * 300 # Average response size
total = base_tokens + tool_tokens + file_tokens + history_tokens
return min(total, self.MAX_CONTEXT)
except Exception:
return self._handle_estimation_failure()
def get_context_usage_ratio(self) -> float:
"""Get estimated context usage as ratio (0.0 to 1.0)"""
try:
estimated = self.estimate_conversation_tokens()
ratio = min(1.0, estimated / self.MAX_CONTEXT)
# Reset error counter on success
self.estimation_errors = 0
self._last_good_estimate = ratio
return ratio
except Exception:
self.estimation_errors += 1
# Too many errors - use conservative fallback
if self.estimation_errors >= self.max_errors:
return 0.7 # Conservative threshold
# Single error - use last known good value
return self._last_good_estimate
def should_trigger_backup(self, threshold: Optional[float] = None) -> bool:
"""Check if backup should be triggered"""
try:
if threshold is None:
threshold = self.backup_threshold
usage = self.get_context_usage_ratio()
# Edge case: Very early in session
if self.prompt_count < 2:
return False
# Edge case: Already near context limit
if usage > self.emergency_threshold:
# Emergency backup - don't wait for other conditions
return True
# Session duration factor
session_hours = (datetime.now() - self.session_start).total_seconds() / 3600
complexity_factor = (self.tool_executions + self.file_operations) / 20
# Trigger earlier for complex sessions
adjusted_threshold = threshold - (complexity_factor * 0.1)
# Multiple trigger conditions
return (
usage > adjusted_threshold or
session_hours > 2.0 or
(usage > 0.7 and session_hours > 1.0)
)
except Exception:
# When in doubt, backup (better safe than sorry)
return True
def update_from_prompt(self, prompt_data: Dict[str, Any]):
"""Update estimates when user submits prompt"""
try:
self.prompt_count += 1
prompt_tokens = self.estimate_prompt_tokens(prompt_data)
self.estimated_tokens += prompt_tokens
# Save state periodically
if self.prompt_count % 5 == 0:
self._save_session_state()
except Exception:
pass # Don't let tracking errors break the system
def update_from_tool_use(self, tool_data: Dict[str, Any]):
"""Update estimates when tools are used"""
try:
self.tool_executions += 1
tool_name = tool_data.get("tool", "")
# File operations add content to context
if tool_name in ["Read", "Edit", "Write", "Glob", "MultiEdit"]:
self.file_operations += 1
# Large outputs add to context
parameters = tool_data.get("parameters", {})
if "file_path" in parameters:
self.estimated_tokens += 500 # Estimated file content
# Save state periodically
if self.tool_executions % 10 == 0:
self._save_session_state()
except Exception:
pass # Don't let tracking errors break the system
def check_backup_triggers(self, hook_event: str, data: Dict[str, Any]) -> BackupDecision:
"""Check all backup trigger conditions"""
try:
# Context-based triggers
if self.should_trigger_backup():
usage = self.get_context_usage_ratio()
urgency = "high" if usage > self.emergency_threshold else "medium"
return BackupDecision(
should_backup=True,
reason="context_threshold",
urgency=urgency,
metadata={"usage_ratio": usage}
)
# Activity-based triggers
if self._should_backup_by_activity():
return BackupDecision(
should_backup=True,
reason="activity_threshold",
urgency="medium"
)
# Critical operation triggers
if self._is_critical_operation(data):
return BackupDecision(
should_backup=True,
reason="critical_operation",
urgency="high"
)
return BackupDecision(should_backup=False, reason="no_trigger")
except Exception:
# If trigger checking fails, err on side of safety
return BackupDecision(
should_backup=True,
reason="trigger_check_failed",
urgency="medium"
)
def _should_backup_by_activity(self) -> bool:
"""Activity-based backup triggers"""
# Backup after significant file modifications
if (self.file_operations % 10 == 0 and self.file_operations > 0):
return True
# Backup after many tool executions
if (self.tool_executions % 25 == 0 and self.tool_executions > 0):
return True
return False
def _is_critical_operation(self, data: Dict[str, Any]) -> bool:
"""Detect operations that should trigger immediate backup"""
tool = data.get("tool", "")
params = data.get("parameters", {})
# Git operations
if tool == "Bash":
command = params.get("command", "").lower()
if any(git_cmd in command for git_cmd in ["git commit", "git push", "git merge"]):
return True
# Package installations
if any(pkg_cmd in command for pkg_cmd in ["npm install", "pip install", "cargo install"]):
return True
# Major file operations
if tool in ["Write", "MultiEdit"]:
content = params.get("content", "")
if len(content) > 5000: # Large file changes
return True
return False
def _handle_estimation_failure(self) -> int:
"""Fallback estimation when primary method fails"""
# Method 1: Time-based estimation
session_duration = (datetime.now() - self.session_start).total_seconds() / 3600
if session_duration > 1.0: # 1 hour = likely high usage
return int(self.MAX_CONTEXT * 0.8)
# Method 2: Activity-based estimation
total_activity = self.tool_executions + self.file_operations
if total_activity > 50: # High activity = likely high context
return int(self.MAX_CONTEXT * 0.75)
# Method 3: Conservative default
return int(self.MAX_CONTEXT * 0.5)
def _save_session_state(self):
"""Save current session state to disk"""
try:
state_file = self.storage_path / "session_state.json"
state = {
"session_start": self.session_start.isoformat(),
"prompt_count": self.prompt_count,
"estimated_tokens": self.estimated_tokens,
"tool_executions": self.tool_executions,
"file_operations": self.file_operations,
"last_updated": datetime.now().isoformat()
}
with open(state_file, 'w') as f:
json.dump(state, f, indent=2)
except Exception:
pass # Don't let state saving errors break the system
def _load_session_state(self):
"""Load previous session state if available"""
try:
state_file = self.storage_path / "session_state.json"
if state_file.exists():
with open(state_file, 'r') as f:
state = json.load(f)
# Only load if session is recent (within last hour)
last_updated = datetime.fromisoformat(state["last_updated"])
if datetime.now() - last_updated < timedelta(hours=1):
self.prompt_count = state.get("prompt_count", 0)
self.estimated_tokens = state.get("estimated_tokens", 0)
self.tool_executions = state.get("tool_executions", 0)
self.file_operations = state.get("file_operations", 0)
except Exception:
pass # If loading fails, start fresh
def get_session_summary(self) -> Dict[str, Any]:
"""Get current session summary"""
return {
"session_duration": str(datetime.now() - self.session_start),
"prompt_count": self.prompt_count,
"tool_executions": self.tool_executions,
"file_operations": self.file_operations,
"estimated_tokens": self.estimate_conversation_tokens(),
"context_usage_ratio": self.get_context_usage_ratio(),
"should_backup": self.should_trigger_backup()
}