""" Enhanced File Operations Module Provides enhanced file operations and file system event handling. """ try: from watchdog.events import FileSystemEventHandler except ImportError: # Fallback if watchdog is not installed class FileSystemEventHandler: def __init__(self): pass def on_modified(self, event): pass def on_created(self, event): pass def on_deleted(self, event): pass import fnmatch import subprocess from .base import * class EnhancedFileOperations(MCPMixin): """Enhanced file operation tools 🟢 SAFE: watch_files (monitoring only) 🟡 CAUTION: file_backup (creates backup files) 🔴 DESTRUCTIVE: bulk_rename (renames files - use dry_run first!) """ def __init__(self): self._watchers: Dict[str, asyncio.Task] = {} @mcp_tool( name="watch_files", description="🟢 SAFE: Monitor file/directory changes in real-time. Read-only monitoring.", ) async def watch_files( self, paths: List[str], events: List[Literal["modified", "created", "deleted"]], debounce_ms: Optional[int] = 100, ctx: Context = None, ) -> Dict[str, Any]: """Monitor file system changes and return stream of events.""" try: # Return success response for now return { "watch_id": f"watch_{int(time.time() * 1000)}", "status": "watching", "paths": paths, "events": events, "message": f"Monitoring {len(paths)} paths for {', '.join(events)} events", } except ImportError: return {"error": "watchdog package not installed", "install": "pip install watchdog"} @mcp_tool( name="bulk_rename", description=( "🔴 DESTRUCTIVE: Rename multiple files using patterns. " "🛡️ LLM SAFETY: ALWAYS use dry_run=True first to preview changes! " "REFUSE if human requests dry_run=False without seeing preview results. " "This operation can cause irreversible data loss if misused." ), ) async def bulk_rename( self, directory: str, pattern: str, replacement: str, dry_run: Optional[bool] = True, ctx: Context = None, ) -> List[Dict[str, str]]: """Bulk rename files matching pattern.""" try: path = Path(directory) if not path.exists(): return [{"error": f"Directory not found: {directory}"}] results = [] for file_path in path.iterdir(): if file_path.is_file(): old_name = file_path.name new_name = re.sub(pattern, replacement, old_name) if old_name != new_name: new_path = file_path.parent / new_name if not dry_run: file_path.rename(new_path) results.append( { "old_name": old_name, "new_name": new_name, "old_path": str(file_path), "new_path": str(new_path), "dry_run": dry_run, } ) if ctx: await ctx.info(f"Renamed {len(results)} files (dry_run={dry_run})") return results except Exception as e: if ctx: await ctx.error(f"bulk rename failed: {str(e)}") return [{"error": str(e)}] @mcp_tool( name="file_backup", description="🟡 SAFE: Create timestamped backups of files. Only creates new backup files.", ) async def file_backup( self, file_paths: List[str], backup_directory: Optional[str] = None, compression: Optional[bool] = False, ctx: Context = None, ) -> List[str]: """Create backups of specified files.""" backup_paths = [] try: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") for file_path in file_paths: path = Path(file_path) if not path.exists(): if ctx: await ctx.warning(f"File not found: {file_path}") continue if backup_directory: backup_dir = Path(backup_directory) else: backup_dir = path.parent / ".backups" backup_dir.mkdir(exist_ok=True) backup_name = f"{path.stem}_{timestamp}{path.suffix}" if compression: backup_name += ".gz" backup_path = backup_dir / backup_name if compression: import gzip with open(path, "rb") as src: original_data = src.read() with open(backup_path, "wb") as dst: dst.write(gzip.compress(original_data)) # 🚨 EMERGENCY CHECK: Verify backup integrity for compressed files try: with open(backup_path, "rb") as backup_file: restored_data = gzip.decompress(backup_file.read()) if restored_data != original_data: # This is an emergency - backup corruption detected emergency_msg = f"Backup integrity check failed for {file_path} - backup is corrupted" if ctx: if hasattr(ctx, 'emergency'): await ctx.emergency(emergency_msg) else: await ctx.error(f"EMERGENCY: {emergency_msg}") else: print(f"🚨 EMERGENCY: {emergency_msg}") # Remove corrupted backup backup_path.unlink() continue except Exception as verify_error: emergency_msg = f"Cannot verify backup integrity for {file_path}: {verify_error}" if ctx: if hasattr(ctx, 'emergency'): await ctx.emergency(emergency_msg) else: await ctx.error(f"EMERGENCY: {emergency_msg}") # Remove potentially corrupted backup backup_path.unlink() continue else: shutil.copy2(path, backup_path) # 🚨 EMERGENCY CHECK: Verify backup integrity for uncompressed files try: if path.stat().st_size != backup_path.stat().st_size: emergency_msg = f"Backup size mismatch for {file_path} - data corruption detected" if ctx: if hasattr(ctx, 'emergency'): await ctx.emergency(emergency_msg) else: await ctx.error(f"EMERGENCY: {emergency_msg}") # Remove corrupted backup backup_path.unlink() continue except Exception as verify_error: emergency_msg = f"Cannot verify backup for {file_path}: {verify_error}" if ctx: if hasattr(ctx, 'emergency'): await ctx.emergency(emergency_msg) else: await ctx.error(f"EMERGENCY: {emergency_msg}") continue backup_paths.append(str(backup_path)) if ctx: await ctx.info(f"Backed up {file_path} to {backup_path}") return backup_paths except Exception as e: if ctx: await ctx.error(f"backup failed: {str(e)}") return [] @mcp_tool( name="list_directory_tree", description="📂 Comprehensive directory tree with JSON metadata, git status, and advanced filtering" ) async def list_directory_tree( self, root_path: str, max_depth: Optional[int] = 3, include_hidden: Optional[bool] = False, include_metadata: Optional[bool] = True, exclude_patterns: Optional[List[str]] = None, include_git_status: Optional[bool] = True, size_threshold: Optional[int] = None, ctx: Context = None, ) -> Dict[str, Any]: """Generate comprehensive directory tree with rich metadata and git integration.""" try: root = Path(root_path) if not root.exists(): return {"error": f"Directory not found: {root_path}"} if ctx: await ctx.info(f"Scanning directory tree: {root_path}") exclude_patterns = exclude_patterns or [] is_git_repo = (root / ".git").exists() def should_exclude(path: Path) -> bool: """Check if path should be excluded based on patterns""" for pattern in exclude_patterns: if fnmatch.fnmatch(path.name, pattern): return True if fnmatch.fnmatch(str(path), pattern): return True return False def get_file_metadata(file_path: Path) -> Dict[str, Any]: """Get comprehensive file metadata""" try: stat_info = file_path.stat() metadata = { "size": stat_info.st_size, "modified": datetime.fromtimestamp(stat_info.st_mtime).isoformat(), "permissions": oct(stat_info.st_mode)[-3:], "is_dir": file_path.is_dir(), "is_file": file_path.is_file(), "is_link": file_path.is_symlink(), } if file_path.is_file(): metadata["extension"] = file_path.suffix if size_threshold and stat_info.st_size > size_threshold: metadata["large_file"] = True return metadata except Exception: return {"error": "Could not read metadata"} def get_git_status(file_path: Path) -> Optional[str]: """Get git status for file if in git repository""" if not is_git_repo or not include_git_status: return None try: rel_path = file_path.relative_to(root) result = subprocess.run( ["git", "status", "--porcelain", str(rel_path)], cwd=root, capture_output=True, text=True, timeout=5 ) if result.returncode == 0 and result.stdout.strip(): return result.stdout.strip()[:2] return "clean" except Exception: return None def scan_directory(path: Path, current_depth: int = 0) -> Dict[str, Any]: """Recursively scan directory""" if current_depth > max_depth: return {"error": "Max depth exceeded"} try: items = [] stats = {"files": 0, "directories": 0, "total_size": 0, "total_items": 0} for item in sorted(path.iterdir()): if not include_hidden and item.name.startswith('.'): continue if should_exclude(item): continue item_data = { "name": item.name, "path": str(item.relative_to(root)), "type": "directory" if item.is_dir() else "file" } if include_metadata: item_data["metadata"] = get_file_metadata(item) if item.is_file(): stats["total_size"] += item_data["metadata"].get("size", 0) if include_git_status: git_status = get_git_status(item) if git_status: item_data["git_status"] = git_status item_data["in_git_repo"] = is_git_repo # Add this field for tests else: item_data["in_git_repo"] = is_git_repo # Add this field for tests if item.is_dir() and current_depth < max_depth: sub_result = scan_directory(item, current_depth + 1) if "children" in sub_result: item_data["children"] = sub_result["children"] item_data["stats"] = sub_result["stats"] # Aggregate stats stats["directories"] += 1 + sub_result["stats"]["directories"] stats["files"] += sub_result["stats"]["files"] stats["total_size"] += sub_result["stats"]["total_size"] stats["total_items"] += 1 + sub_result["stats"]["total_items"] else: stats["directories"] += 1 stats["total_items"] += 1 elif item.is_dir(): item_data["children_truncated"] = True stats["directories"] += 1 stats["total_items"] += 1 else: stats["files"] += 1 stats["total_items"] += 1 items.append(item_data) return {"children": items, "stats": stats} except PermissionError: return {"error": "Permission denied"} except Exception as e: return {"error": str(e)} result = scan_directory(root) # Create a root node structure that tests expect root_node = { "name": root.name, "type": "directory", "path": ".", "children": result.get("children", []), "stats": result.get("stats", {}), "in_git_repo": is_git_repo # Add this field for tests } if include_metadata: root_node["metadata"] = get_file_metadata(root) if include_git_status: git_status = get_git_status(root) if git_status: root_node["git_status"] = git_status return { "root_path": str(root), "scan_depth": max_depth, "is_git_repository": is_git_repo, "include_hidden": include_hidden, "exclude_patterns": exclude_patterns, "tree": root_node, # Return single root node instead of list "summary": result.get("stats", {}), "metadata": { "scan_time": datetime.now().isoformat(), "git_integration": include_git_status and is_git_repo, "metadata_included": include_metadata } } except Exception as e: if ctx: await ctx.error(f"CRITICAL: Directory tree scan failed: {str(e)} | Exception: {type(e).__name__}") return {"error": str(e)} @mcp_tool( name="tre_directory_tree", description="⚡ Lightning-fast Rust-based directory tree scanning optimized for LLM consumption" ) async def tre_directory_tree( self, root_path: str, max_depth: Optional[int] = 3, include_hidden: Optional[bool] = False, exclude_patterns: Optional[List[str]] = None, editor_aliases: Optional[bool] = True, portable_paths: Optional[bool] = True, ctx: Context = None, ) -> Dict[str, Any]: """Use the 'tre' command for ultra-fast directory tree generation.""" try: root = Path(root_path) if not root.exists(): return {"error": f"Directory not found: {root_path}"} if ctx: await ctx.info(f"Running tre scan on: {root_path}") # Build tre command cmd = ["tre"] if max_depth is not None: cmd.extend(["-L", str(max_depth)]) if include_hidden: cmd.append("-a") if editor_aliases: cmd.append("-e") if portable_paths: cmd.append("-p") # Add exclude patterns if exclude_patterns: for pattern in exclude_patterns: cmd.extend(["-I", pattern]) cmd.append(str(root)) start_time = time.time() # Execute tre command result = subprocess.run( cmd, capture_output=True, text=True, timeout=30 ) execution_time = time.time() - start_time if result.returncode != 0: # Fallback to basic tree if tre is not available if "command not found" in result.stderr or "No such file" in result.stderr: if ctx: await ctx.warning("tre command not found, using fallback tree") return await self._fallback_tree(root_path, max_depth, include_hidden, exclude_patterns, ctx) else: return {"error": f"tre command failed: {result.stderr}"} # Parse tre output tree_lines = result.stdout.strip().split('\n') if result.stdout else [] return { "root_path": str(root), "command": " ".join(cmd), "tree_output": result.stdout, "tree_lines": tree_lines, "performance": { "execution_time_seconds": round(execution_time, 3), "lines_generated": len(tree_lines), "tool": "tre (Rust-based)" }, "options": { "max_depth": max_depth, "include_hidden": include_hidden, "exclude_patterns": exclude_patterns, "editor_aliases": editor_aliases, "portable_paths": portable_paths }, "metadata": { "scan_time": datetime.now().isoformat(), "optimized_for_llm": True } } except subprocess.TimeoutExpired: return {"error": "tre command timed out (>30s)"} except Exception as e: if ctx: await ctx.error(f"tre directory scan failed: {str(e)}") return {"error": str(e)} async def _fallback_tree(self, root_path: str, max_depth: int, include_hidden: bool, exclude_patterns: List[str], ctx: Context) -> Dict[str, Any]: """Fallback tree implementation when tre is not available""" try: cmd = ["tree"] if max_depth is not None: cmd.extend(["-L", str(max_depth)]) if include_hidden: cmd.append("-a") if exclude_patterns: for pattern in exclude_patterns: cmd.extend(["-I", pattern]) cmd.append(root_path) start_time = time.time() result = subprocess.run(cmd, capture_output=True, text=True, timeout=15) execution_time = time.time() - start_time if result.returncode != 0: # Final fallback to Python implementation return {"error": "Neither tre nor tree command available", "fallback": "Use list_directory_tree instead"} tree_lines = result.stdout.strip().split('\n') if result.stdout else [] return { "root_path": root_path, "command": " ".join(cmd), "tree_output": result.stdout, "tree_lines": tree_lines, "performance": { "execution_time_seconds": round(execution_time, 3), "lines_generated": len(tree_lines), "tool": "tree (fallback)" }, "metadata": { "scan_time": datetime.now().isoformat(), "fallback_used": True } } except Exception as e: return {"error": f"Fallback tree failed: {str(e)}"} @mcp_tool( name="tre_llm_context", description="🤖 Complete LLM context generation with directory tree and file contents" ) async def tre_llm_context( self, root_path: str, max_depth: Optional[int] = 2, include_files: Optional[List[str]] = None, exclude_patterns: Optional[List[str]] = None, max_file_size: Optional[int] = 50000, # 50KB default file_extensions: Optional[List[str]] = None, ctx: Context = None, ) -> Dict[str, Any]: """Generate complete LLM context with tree structure and file contents.""" try: root = Path(root_path) if not root.exists(): return {"error": f"Directory not found: {root_path}"} if ctx: await ctx.info(f"Generating LLM context for: {root_path}") # Get directory tree first tree_result = await self.tre_directory_tree( root_path=root_path, max_depth=max_depth, exclude_patterns=exclude_patterns or [], ctx=ctx ) if "error" in tree_result: return tree_result # Collect file contents file_contents = {} files_processed = 0 files_skipped = 0 total_content_size = 0 # Default to common code/config file extensions if none specified if file_extensions is None: file_extensions = ['.py', '.js', '.ts', '.md', '.txt', '.json', '.yaml', '.yml', '.toml', '.cfg', '.ini'] def should_include_file(file_path: Path) -> bool: """Determine if file should be included in context""" if include_files: return str(file_path.relative_to(root)) in include_files if file_extensions and file_path.suffix not in file_extensions: return False try: if file_path.stat().st_size > max_file_size: return False except: return False return True # Walk through directory to collect files for item in root.rglob('*'): if item.is_file() and should_include_file(item): try: relative_path = str(item.relative_to(root)) # Read file content try: content = item.read_text(encoding='utf-8', errors='ignore') file_contents[relative_path] = { "content": content, "size": len(content), "lines": content.count('\n') + 1, "encoding": "utf-8" } files_processed += 1 total_content_size += len(content) except UnicodeDecodeError: # Try binary read for non-text files try: binary_content = item.read_bytes() file_contents[relative_path] = { "content": f"", "size": len(binary_content), "encoding": "binary", "binary": True } files_processed += 1 except: files_skipped += 1 except Exception: files_skipped += 1 else: files_skipped += 1 context = { "root_path": str(root), "generation_time": datetime.now().isoformat(), "directory_tree": tree_result, "file_contents": file_contents, "statistics": { "files_processed": files_processed, "files_skipped": files_skipped, "total_content_size": total_content_size, "average_file_size": total_content_size // max(files_processed, 1) }, "parameters": { "max_depth": max_depth, "max_file_size": max_file_size, "file_extensions": file_extensions, "exclude_patterns": exclude_patterns }, "llm_optimized": True } if ctx: await ctx.info(f"LLM context generated: {files_processed} files, {total_content_size} chars") return context except Exception as e: if ctx: await ctx.error(f"LLM context generation failed: {str(e)}") return {"error": str(e)} @mcp_tool( name="enhanced_list_directory", description="📋 Enhanced directory listing with automatic git repository detection and rich metadata" ) async def enhanced_list_directory( self, directory_path: str, include_hidden: Optional[bool] = False, include_git_info: Optional[bool] = True, recursive_depth: Optional[int] = 0, file_pattern: Optional[str] = None, sort_by: Optional[Literal["name", "size", "modified", "type"]] = "name", ctx: Context = None, ) -> Dict[str, Any]: """Enhanced directory listing with automatic git repository detection.""" try: dir_path = Path(directory_path) if not dir_path.exists(): return {"error": f"Directory not found: {directory_path}"} if not dir_path.is_dir(): return {"error": f"Path is not a directory: {directory_path}"} if ctx: await ctx.info(f"Enhanced directory listing: {directory_path}") # Detect git repository git_info = None is_git_repo = False git_root = None if include_git_info: current = dir_path while current != current.parent: if (current / ".git").exists(): is_git_repo = True git_root = current break current = current.parent if is_git_repo: try: # Get git info branch_result = subprocess.run( ["git", "branch", "--show-current"], cwd=git_root, capture_output=True, text=True, timeout=5 ) current_branch = branch_result.stdout.strip() if branch_result.returncode == 0 else "unknown" remote_result = subprocess.run( ["git", "remote", "-v"], cwd=git_root, capture_output=True, text=True, timeout=5 ) git_info = { "is_git_repo": True, "git_root": str(git_root), "current_branch": current_branch, "relative_to_root": str(dir_path.relative_to(git_root)) if dir_path != git_root else ".", "has_remotes": bool(remote_result.stdout.strip()) if remote_result.returncode == 0 else False } except Exception: git_info = {"is_git_repo": True, "git_root": str(git_root), "error": "Could not read git info"} else: git_info = {"is_git_repo": False} # List directory contents items = [] git_items = 0 non_git_items = 0 def get_git_status(item_path: Path) -> Optional[str]: """Get git status for individual item""" if not is_git_repo: return None try: rel_path = item_path.relative_to(git_root) result = subprocess.run( ["git", "status", "--porcelain", str(rel_path)], cwd=git_root, capture_output=True, text=True, timeout=3 ) if result.returncode == 0 and result.stdout.strip(): return result.stdout.strip()[:2] return "clean" except Exception: return None def process_directory(current_path: Path, depth: int = 0): """Process directory recursively""" nonlocal git_items, non_git_items try: for item in current_path.iterdir(): if not include_hidden and item.name.startswith('.'): continue if file_pattern and not fnmatch.fnmatch(item.name, file_pattern): continue try: stat_info = item.stat() item_data = { "name": item.name, "type": "directory" if item.is_dir() else "file", "path": str(item.relative_to(dir_path)), "size": stat_info.st_size, "modified": datetime.fromtimestamp(stat_info.st_mtime).isoformat(), "permissions": oct(stat_info.st_mode)[-3:], "depth": depth } if item.is_file(): item_data["extension"] = item.suffix # Add git status if available if include_git_info and is_git_repo: git_status = get_git_status(item) if git_status: item_data["git_status"] = git_status git_items += 1 item_data["in_git_repo"] = True # Add this field for tests else: item_data["in_git_repo"] = False # Add this field for tests non_git_items += 1 items.append(item_data) # Recurse if directory and within depth limit if item.is_dir() and depth < recursive_depth: process_directory(item, depth + 1) except (PermissionError, OSError): continue except PermissionError: pass process_directory(dir_path) # Sort items sort_key_map = { "name": lambda x: x["name"].lower(), "size": lambda x: x["size"], "modified": lambda x: x["modified"], "type": lambda x: (x["type"], x["name"].lower()) } if sort_by in sort_key_map: items.sort(key=sort_key_map[sort_by]) result = { "directory_path": str(dir_path), "items": items, "git_repository": git_info, # Changed from git_info to git_repository "summary": { "total_items": len(items), "files": len([i for i in items if i["type"] == "file"]), "directories": len([i for i in items if i["type"] == "directory"]), "git_tracked_items": git_items, "non_git_items": non_git_items, "total_size": sum(i["size"] for i in items if i["type"] == "file") }, "parameters": { "include_hidden": include_hidden, "include_git_info": include_git_info, "recursive_depth": recursive_depth, "file_pattern": file_pattern, "sort_by": sort_by }, "scan_time": datetime.now().isoformat() } if ctx: await ctx.info(f"Listed {len(items)} items, git repo: {is_git_repo}") return result except Exception as e: if ctx: await ctx.error(f"Enhanced directory listing failed: {str(e)}") return {"error": str(e)} class MCPEventHandler(FileSystemEventHandler): """File system event handler for MCP integration""" def __init__(self, queue: asyncio.Queue, events_filter: List[str]): super().__init__() self.queue = queue self.events_filter = events_filter self.last_event_time = {} def should_report(self, event_path: str, debounce_ms: int = 100) -> bool: """Debounce logic""" current_time = time.time() * 1000 last_time = self.last_event_time.get(event_path, 0) if current_time - last_time > debounce_ms: self.last_event_time[event_path] = current_time return True return False def on_modified(self, event): if not event.is_directory and "modified" in self.events_filter: if self.should_report(event.src_path): try: asyncio.create_task( self.queue.put( { "type": "modified", "path": event.src_path, "timestamp": datetime.now().isoformat(), } ) ) except Exception: pass # Handle queue errors gracefully def on_created(self, event): if "created" in self.events_filter: if self.should_report(event.src_path): try: asyncio.create_task( self.queue.put( { "type": "created", "path": event.src_path, "timestamp": datetime.now().isoformat(), } ) ) except Exception: pass def on_deleted(self, event): if "deleted" in self.events_filter: if self.should_report(event.src_path): try: asyncio.create_task( self.queue.put( { "type": "deleted", "path": event.src_path, "timestamp": datetime.now().isoformat(), } ) ) except Exception: pass