diff --git a/CHANGELOG.md b/CHANGELOG.md index 2baac1e..614760e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - **Session Isolation:** `docker_stop` now validates container belongs to current session before stopping. `docker_cleanup` defaults to `session_only=True` to prevent cross-session interference. - **Background Discovery Thread:** Fixed timeout from 30s to 0.5s for port scanning, reducing discovery cycle from 300s+ to ~15s. - **Typedef/Variable Type Resolution:** Fixed `handle_typedef_create` and `handle_variable_rename` to use shared `resolve_data_type()` for builtin types (int, char, etc.). +- **DockerMixin Inheritance:** Fixed crash when `DockerMixin` called `get_instance_port()` — was inheriting from wrong base class. +- **Deprecated asyncio API:** Replaced `asyncio.get_event_loop()` with `asyncio.get_running_loop()` for Python 3.10+ compatibility. +- **HTTP Client Data Mutation:** `safe_post`, `safe_put`, and `safe_patch` no longer mutate the caller's data dict via `.pop()`. +- **Race Condition in Discovery:** Initial instance discovery in `main()` now uses `_instances_lock` for thread safety. +- **Silent Exception Handling:** Added debug logging to PortPool exception handlers and analysis fallback paths. +- **File Descriptor Leak:** Fixed potential leak in `PortPool._try_acquire_port()` if write operations fail after lock acquisition. +- **Hash Algorithm Consistency:** Changed query hash from MD5 to SHA-256 in pagination module for consistency with cursor ID generation. +- **Lazy PortPool Initialization:** `PortPool` now created on first use, avoiding `/tmp/ghydramcp-ports` directory creation when Docker tools are never used. +- **Logging Configuration:** `configure_logging()` now called during server startup — debug messages actually work now. +- **Type Hint Consistency:** Aligned `filtering.py` to use `List[T]` from typing module like rest of codebase. +- **Parameter Naming:** Renamed `project_fields` to `fields` in `structs_get()` for consistency with other tools. +- **Import Path:** Fixed `logging.py` to import `Context` from `fastmcp` (not deprecated `mcp.server.fastmcp` path). + +### Added +- **Debug Logging Environment Variable:** Set `GHYDRAMCP_DEBUG=1` to enable DEBUG-level logging for troubleshooting. ## [2025.12.1] - 2025-12-01 diff --git a/src/ghydramcp/core/filtering.py b/src/ghydramcp/core/filtering.py index e845fa6..b8ab917 100644 --- a/src/ghydramcp/core/filtering.py +++ b/src/ghydramcp/core/filtering.py @@ -7,7 +7,7 @@ enforcement to prevent oversized MCP tool results. import json import re import time -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from ..config import get_config @@ -15,7 +15,7 @@ from ..config import get_config TOKEN_ESTIMATION_RATIO = 4.0 -def project_fields(items: list, fields: list[str]) -> list: +def project_fields(items: List[Any], fields: List[str]) -> List[Any]: """Select only specified keys from each item (jq-style projection). Works on dicts and strings. For dicts, returns only the requested @@ -42,7 +42,7 @@ def project_fields(items: list, fields: list[str]) -> list: return projected -def apply_grep(items: list, pattern: str, ignorecase: bool = True) -> list: +def apply_grep(items: List[Any], pattern: str, ignorecase: bool = True) -> List[Any]: """Filter items by regex pattern across all string values. Searches all string-coercible values in each item. For dicts, @@ -90,13 +90,33 @@ def _matches(item: Any, pattern: re.Pattern, depth: int = 0) -> bool: def _estimate_tokens(data: Any) -> int: - """Estimate token count from serialized JSON size.""" + """Estimate token count from serialized JSON size. + + Uses a simple heuristic: ~4 characters per token on average. + This matches the TOKEN_ESTIMATION_RATIO constant. + + Args: + data: Any JSON-serializable data structure + + Returns: + Estimated token count + """ text = json.dumps(data, default=str) return int(len(text) / TOKEN_ESTIMATION_RATIO) -def _extract_available_fields(items: list) -> list[str]: - """Extract the set of field names from the first few dict items.""" +def _extract_available_fields(items: List[Any]) -> List[str]: + """Extract the set of field names from the first few dict items. + + Samples up to 5 items to discover available keys, useful for + suggesting field projections to reduce response size. + + Args: + items: List of items (only dicts are examined) + + Returns: + Sorted list of unique field names (excludes internal _links) + """ fields = set() for item in items[:5]: if isinstance(item, dict): @@ -107,11 +127,11 @@ def _extract_available_fields(items: list) -> list[str]: def estimate_and_guard( - data: list, + data: List[Any], tool_name: str, budget: Optional[int] = None, query_hints: Optional[Dict[str, Any]] = None, -) -> Dict[str, Any]: +) -> Optional[Dict[str, Any]]: """Check if data exceeds token budget; return guard response if so. If data fits within budget, returns None (caller should proceed @@ -164,7 +184,17 @@ def estimate_and_guard( def _format_tokens(n: int) -> str: - """Format token count for display (e.g. 45000 -> '45k').""" + """Format token count for human-readable display. + + Large numbers are abbreviated with 'k' suffix for readability + in error messages and hints. + + Args: + n: Token count + + Returns: + Formatted string (e.g., 45000 -> '45k', 500 -> '500') + """ if n >= 1000: return "%dk" % (n // 1000) return str(n) @@ -172,7 +202,7 @@ def _format_tokens(n: int) -> str: def _build_hints( tool_name: str, - available_fields: list[str], + available_fields: List[str], query_hints: Optional[Dict[str, Any]] = None, ) -> str: """Build actionable hint text for the guard message.""" diff --git a/src/ghydramcp/core/http_client.py b/src/ghydramcp/core/http_client.py index b9c1620..0cdf413 100644 --- a/src/ghydramcp/core/http_client.py +++ b/src/ghydramcp/core/http_client.py @@ -244,6 +244,7 @@ def safe_post( text_payload = None if isinstance(data, dict): + data = data.copy() # Don't mutate caller's dict headers = data.pop("headers", None) json_payload = data else: @@ -265,7 +266,11 @@ def safe_put(port: int, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]: Returns: Response dict """ - headers = data.pop("headers", None) if isinstance(data, dict) else None + if isinstance(data, dict): + data = data.copy() # Don't mutate caller's dict + headers = data.pop("headers", None) + else: + headers = None return _make_request("PUT", port, endpoint, json_data=data, headers=headers) @@ -280,7 +285,11 @@ def safe_patch(port: int, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any] Returns: Response dict """ - headers = data.pop("headers", None) if isinstance(data, dict) else None + if isinstance(data, dict): + data = data.copy() # Don't mutate caller's dict + headers = data.pop("headers", None) + else: + headers = None return _make_request("PATCH", port, endpoint, json_data=data, headers=headers) diff --git a/src/ghydramcp/core/logging.py b/src/ghydramcp/core/logging.py index 3666303..37c8242 100644 --- a/src/ghydramcp/core/logging.py +++ b/src/ghydramcp/core/logging.py @@ -8,7 +8,7 @@ import logging from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: - from mcp.server.fastmcp import Context + from fastmcp import Context # Standard Python logger as fallback logger = logging.getLogger("ghydramcp") diff --git a/src/ghydramcp/core/pagination.py b/src/ghydramcp/core/pagination.py index 3af1d47..c4dbfb5 100644 --- a/src/ghydramcp/core/pagination.py +++ b/src/ghydramcp/core/pagination.py @@ -186,8 +186,8 @@ class CursorManager: item for item in data if self._matches_grep(item, pattern) ] - # Create query hash - query_hash = hashlib.md5( + # Create query hash (SHA-256 for consistency with cursor ID generation) + query_hash = hashlib.sha256( json.dumps(query_params, sort_keys=True, default=str).encode() ).hexdigest()[:12] diff --git a/src/ghydramcp/mixins/analysis.py b/src/ghydramcp/mixins/analysis.py index fef6055..c65730b 100644 --- a/src/ghydramcp/mixins/analysis.py +++ b/src/ghydramcp/mixins/analysis.py @@ -9,6 +9,7 @@ from fastmcp import Context from fastmcp.contrib.mcp_mixin import mcp_tool from ..config import get_config +from ..core.logging import logger from .base import GhydraMixinBase @@ -380,13 +381,18 @@ class AnalysisMixin(GhydraMixinBase): return {"success": False, "error": {"code": "NO_INSTANCE", "message": str(e)}} # Try setting as function comment first - try: - payload = {"comment": comment} - response = self.safe_patch(port, f"functions/{address}", payload) - if response.get("success", False): - return self.simplify_response(response) - except Exception: - pass + payload = {"comment": comment} + response = self.safe_patch(port, f"functions/{address}", payload) + if response.get("success", False): + return self.simplify_response(response) + + # Log why function comment failed before falling back + error = response.get("error", {}) + logger.debug( + "Function comment at %s failed (%s), falling back to pre-comment", + address, + error.get("code", "UNKNOWN"), + ) # Fallback to pre-comment return self.comments_set( diff --git a/src/ghydramcp/mixins/base.py b/src/ghydramcp/mixins/base.py index e191026..0f465c8 100644 --- a/src/ghydramcp/mixins/base.py +++ b/src/ghydramcp/mixins/base.py @@ -182,27 +182,33 @@ class GhydraMixinBase(MCPMixin): return "default" # Convenience methods for subclasses - def safe_get(self, port: int, endpoint: str, params: Optional[Dict] = None) -> Dict: + def safe_get( + self, port: int, endpoint: str, params: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: """Make GET request to Ghidra instance.""" return safe_get(port, endpoint, params) - def safe_post(self, port: int, endpoint: str, data: Any) -> Dict: + def safe_post(self, port: int, endpoint: str, data: Any) -> Dict[str, Any]: """Make POST request to Ghidra instance.""" return safe_post(port, endpoint, data) - def safe_put(self, port: int, endpoint: str, data: Dict) -> Dict: + def safe_put( + self, port: int, endpoint: str, data: Dict[str, Any] + ) -> Dict[str, Any]: """Make PUT request to Ghidra instance.""" return safe_put(port, endpoint, data) - def safe_patch(self, port: int, endpoint: str, data: Dict) -> Dict: + def safe_patch( + self, port: int, endpoint: str, data: Dict[str, Any] + ) -> Dict[str, Any]: """Make PATCH request to Ghidra instance.""" return safe_patch(port, endpoint, data) - def safe_delete(self, port: int, endpoint: str) -> Dict: + def safe_delete(self, port: int, endpoint: str) -> Dict[str, Any]: """Make DELETE request to Ghidra instance.""" return safe_delete(port, endpoint) - def simplify_response(self, response: Dict) -> Dict: + def simplify_response(self, response: Dict[str, Any]) -> Dict[str, Any]: """Simplify HATEOAS response.""" return simplify_response(response) diff --git a/src/ghydramcp/mixins/docker.py b/src/ghydramcp/mixins/docker.py index aed393f..3f56a36 100644 --- a/src/ghydramcp/mixins/docker.py +++ b/src/ghydramcp/mixins/docker.py @@ -19,7 +19,10 @@ from pathlib import Path from typing import Any, Dict, List, Optional from fastmcp import Context -from fastmcp.contrib.mcp_mixin import MCPMixin, mcp_tool +from fastmcp.contrib.mcp_mixin import mcp_tool + +from ghydramcp.core.logging import logger +from ghydramcp.mixins.base import GhydraMixinBase # Port pool configuration (32 ports should handle many concurrent sessions) PORT_POOL_START = 8192 @@ -63,6 +66,7 @@ class PortPool: True if port was acquired, False if already in use """ lock_path = self._lock_file(port) + fd = None try: # Open or create the lock file @@ -77,14 +81,20 @@ class PortPool: return False # Write session info to the lock file - os.ftruncate(fd, 0) - os.lseek(fd, 0, os.SEEK_SET) - lock_data = json.dumps({ - "session_id": session_id, - "pid": os.getpid(), - "timestamp": time.time(), - }) - os.write(fd, lock_data.encode()) + # If this fails, we need to close fd to release the lock + try: + os.ftruncate(fd, 0) + os.lseek(fd, 0, os.SEEK_SET) + lock_data = json.dumps({ + "session_id": session_id, + "pid": os.getpid(), + "timestamp": time.time(), + }) + os.write(fd, lock_data.encode()) + except Exception: + # Write failed - release the lock + os.close(fd) + raise # Keep the file descriptor open to maintain the lock # Store it so we can release later @@ -94,7 +104,8 @@ class PortPool: return True - except Exception: + except Exception as e: + logger.debug("Failed to acquire port %d: %s", port, e) return False def allocate(self, session_id: str) -> Optional[int]: @@ -134,7 +145,8 @@ class PortPool: lock_path.unlink() return True - except Exception: + except Exception as e: + logger.debug("Failed to release port %d: %s", port, e) return False def get_allocated_ports(self) -> Dict[int, Dict[str, Any]]: @@ -191,13 +203,13 @@ class PortPool: except (IOError, OSError): # Still locked by another process os.close(fd) - except Exception: - pass + except Exception as e: + logger.debug("Failed to check stale lock for port %d: %s", port, e) return cleaned -class DockerMixin(MCPMixin): +class DockerMixin(GhydraMixinBase): """Docker container management for GhydraMCP. Provides tools to start, stop, and manage Ghidra containers @@ -226,7 +238,7 @@ class DockerMixin(MCPMixin): """Initialize Docker mixin with session isolation.""" self._check_docker_available() self._session_id = str(uuid.uuid4())[:8] - self._port_pool = PortPool() + self._port_pool = None # Lazy-init to avoid side effects self._session_containers = {} @property @@ -236,6 +248,17 @@ class DockerMixin(MCPMixin): self._session_id = str(uuid.uuid4())[:8] return self._session_id + @property + def port_pool(self) -> PortPool: + """Get the port pool, creating it on first access. + + Lazy initialization avoids creating /tmp/ghydramcp-ports + until Docker tools are actually used. + """ + if self._port_pool is None: + self._port_pool = PortPool() + return self._port_pool + def _check_docker_available(self) -> bool: """Check if Docker is available on the system.""" return shutil.which("docker") is not None @@ -276,8 +299,7 @@ class DockerMixin(MCPMixin): Returns: CompletedProcess result """ - loop = asyncio.get_event_loop() - return await loop.run_in_executor( + return await asyncio.get_running_loop().run_in_executor( None, self._run_docker_cmd_sync, args, check, capture ) @@ -463,9 +485,8 @@ class DockerMixin(MCPMixin): # List containers from this session only result["session_containers"] = await self._find_containers_by_label(session_only=True) - # Get port pool status - if self._port_pool: - result["port_pool"]["allocated"] = self._port_pool.get_allocated_ports() + # Get port pool status (lazy-init creates pool on first access) + result["port_pool"]["allocated"] = self.port_pool.get_allocated_ports() # Also check by name pattern for containers without labels try: @@ -561,11 +582,11 @@ class DockerMixin(MCPMixin): return {"error": f"Binary not found: {binary_path}"} # Always allocate from pool to prevent conflicts between sessions - port = self._port_pool.allocate(self.session_id) + port = self.port_pool.allocate(self.session_id) if port is None: return { "error": "Port pool exhausted (8192-8223). Stop some containers first.", - "allocated_ports": self._port_pool.get_allocated_ports(), + "allocated_ports": self.port_pool.get_allocated_ports(), } # Generate container name if not specified @@ -581,7 +602,7 @@ class DockerMixin(MCPMixin): ["ps", "-a", "-q", "-f", f"name=^{name}$"], check=False ) if check_result.stdout.strip(): - self._port_pool.release(port) + self.port_pool.release(port) return { "error": f"Container '{name}' already exists. Stop it first with docker_stop." } @@ -591,7 +612,7 @@ class DockerMixin(MCPMixin): ["ps", "-q", "-f", f"publish={port}"], check=False ) if port_check.stdout.strip(): - self._port_pool.release(port) + self.port_pool.release(port) return { "error": f"Port {port} is already in use by another container" } @@ -647,7 +668,7 @@ class DockerMixin(MCPMixin): } except subprocess.CalledProcessError as e: - self._port_pool.release(port) + self.port_pool.release(port) return {"error": f"Failed to start container: {e.stderr or e.stdout}"} @mcp_tool( @@ -710,7 +731,7 @@ class DockerMixin(MCPMixin): # Release the port back to the pool if container_port: - self._port_pool.release(container_port) + self.port_pool.release(container_port) # Remove from session tracking self._session_containers = { @@ -893,8 +914,7 @@ class DockerMixin(MCPMixin): Health status and API info if available """ port = self.get_instance_port(port) - loop = asyncio.get_event_loop() - return await loop.run_in_executor( + return await asyncio.get_running_loop().run_in_executor( None, self._sync_health_check, port, timeout ) @@ -1060,9 +1080,8 @@ class DockerMixin(MCPMixin): pass # Clean up stale port locks - if self._port_pool: - stale_ports = self._port_pool.cleanup_stale_locks(max_age_hours * 3600) - result["ports_cleaned"] = stale_ports + stale_ports = self.port_pool.cleanup_stale_locks(max_age_hours * 3600) + result["ports_cleaned"] = stale_ports return result @@ -1086,8 +1105,8 @@ class DockerMixin(MCPMixin): "containers": self._session_containers, "allocated_ports": { port: info - for port, info in self._port_pool.get_allocated_ports().items() + for port, info in self.port_pool.get_allocated_ports().items() if info.get("session_id") == self.session_id - } if self._port_pool else {}, + }, "port_pool_range": f"{PORT_POOL_START}-{PORT_POOL_END}", } diff --git a/src/ghydramcp/mixins/structs.py b/src/ghydramcp/mixins/structs.py index 1463ca8..6a01a87 100644 --- a/src/ghydramcp/mixins/structs.py +++ b/src/ghydramcp/mixins/structs.py @@ -99,7 +99,7 @@ class StructsMixin(GhydraMixinBase): grep: Optional[str] = None, grep_ignorecase: bool = True, return_all: bool = False, - project_fields: Optional[List[str]] = None, + fields: Optional[List[str]] = None, ctx: Optional[Context] = None, ) -> Dict[str, Any]: """Get detailed information about a struct with field pagination. @@ -111,7 +111,7 @@ class StructsMixin(GhydraMixinBase): grep: Regex pattern to filter fields grep_ignorecase: Case-insensitive grep (default: True) return_all: Return all fields without pagination - project_fields: Field names to keep per struct field item. Reduces response size. + fields: Field names to keep per struct field item. Reduces response size. ctx: FastMCP context (auto-injected) Returns: @@ -145,17 +145,17 @@ class StructsMixin(GhydraMixinBase): # Extract struct info and fields struct_info = {} - fields = [] + struct_fields = [] if isinstance(result, dict): for key, value in result.items(): if key == "fields" and isinstance(value, list): - fields = value + struct_fields = value else: struct_info[key] = value # If few fields and no grep, return as-is - if len(fields) <= 10 and not grep: + if len(struct_fields) <= 10 and not grep: return simplified query_params = { @@ -166,7 +166,7 @@ class StructsMixin(GhydraMixinBase): # Paginate fields paginated = self.filtered_paginate( - data=fields, + data=struct_fields, query_params=query_params, tool_name="structs_get", session_id=session_id, @@ -174,7 +174,7 @@ class StructsMixin(GhydraMixinBase): grep=grep, grep_ignorecase=grep_ignorecase, return_all=return_all, - fields=project_fields, + fields=fields, ) # Merge struct metadata with paginated fields (skip if guarded) diff --git a/src/ghydramcp/server.py b/src/ghydramcp/server.py index 959e4b1..c14391c 100644 --- a/src/ghydramcp/server.py +++ b/src/ghydramcp/server.py @@ -14,6 +14,7 @@ from typing import Optional from fastmcp import FastMCP from .config import GhydraConfig, get_config, set_config +from .core.logging import configure_logging from .mixins import ( AnalysisMixin, BookmarksMixin, @@ -154,8 +155,14 @@ def _handle_sigint(signum, frame): def main(): """Main entry point for the GhydraMCP server.""" + import logging + import os import shutil + # Configure logging early (DEBUG if GHYDRAMCP_DEBUG is set) + log_level = logging.DEBUG if os.environ.get("GHYDRAMCP_DEBUG") else logging.INFO + configure_logging(log_level) + try: from importlib.metadata import version package_version = version("ghydramcp") @@ -191,12 +198,13 @@ def main(): try: response = safe_get(port, "") if response.get("success", False): - GhydraMixinBase._instances[port] = { - "url": f"http://{config.ghidra_host}:{port}", - "project": response.get("project", ""), - "file": response.get("file", ""), - "discovered_at": time.time(), - } + with GhydraMixinBase._instances_lock: + GhydraMixinBase._instances[port] = { + "url": f"http://{config.ghidra_host}:{port}", + "project": response.get("project", ""), + "file": response.get("file", ""), + "discovered_at": time.time(), + } found += 1 print(f" ✓ Found instance on port {port}", file=sys.stderr) except Exception: