fix: Address code review issues across core modules
Some checks are pending
Build Ghidra Plugin / build (push) Waiting to run

- http_client: Defensive copy before .pop() to avoid mutating caller's dict
- analysis.py: Add debug logging for fallback paths instead of silent swallow
- docker.py: Add debug logging to PortPool exception handlers
- docker.py: Fix file descriptor leak in _try_acquire_port with inner try/except
- docker.py: Lazy PortPool initialization via property to avoid side effects
- server.py: Wrap initial discovery in _instances_lock for thread safety
- server.py: Call configure_logging() at startup with GHYDRAMCP_DEBUG support
- pagination.py: Use SHA-256 instead of MD5 for query hash consistency
- base.py: Add proper type annotations (Dict[str, Any])
- filtering.py: Use List[str] from typing for consistency
- filtering.py: Add docstrings to private helper methods
- structs.py: Rename project_fields param to fields for API consistency
- logging.py: Fix import path from deprecated mcp.server.fastmcp to fastmcp
This commit is contained in:
Ryan Malloy 2026-02-06 04:50:47 -07:00
parent 04f3011413
commit d1750cb339
10 changed files with 167 additions and 74 deletions

View File

@ -68,6 +68,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- **Session Isolation:** `docker_stop` now validates container belongs to current session before stopping. `docker_cleanup` defaults to `session_only=True` to prevent cross-session interference.
- **Background Discovery Thread:** Fixed timeout from 30s to 0.5s for port scanning, reducing discovery cycle from 300s+ to ~15s.
- **Typedef/Variable Type Resolution:** Fixed `handle_typedef_create` and `handle_variable_rename` to use shared `resolve_data_type()` for builtin types (int, char, etc.).
- **DockerMixin Inheritance:** Fixed crash when `DockerMixin` called `get_instance_port()` — was inheriting from wrong base class.
- **Deprecated asyncio API:** Replaced `asyncio.get_event_loop()` with `asyncio.get_running_loop()` for Python 3.10+ compatibility.
- **HTTP Client Data Mutation:** `safe_post`, `safe_put`, and `safe_patch` no longer mutate the caller's data dict via `.pop()`.
- **Race Condition in Discovery:** Initial instance discovery in `main()` now uses `_instances_lock` for thread safety.
- **Silent Exception Handling:** Added debug logging to PortPool exception handlers and analysis fallback paths.
- **File Descriptor Leak:** Fixed potential leak in `PortPool._try_acquire_port()` if write operations fail after lock acquisition.
- **Hash Algorithm Consistency:** Changed query hash from MD5 to SHA-256 in pagination module for consistency with cursor ID generation.
- **Lazy PortPool Initialization:** `PortPool` now created on first use, avoiding `/tmp/ghydramcp-ports` directory creation when Docker tools are never used.
- **Logging Configuration:** `configure_logging()` now called during server startup — debug messages actually work now.
- **Type Hint Consistency:** Aligned `filtering.py` to use `List[T]` from typing module like rest of codebase.
- **Parameter Naming:** Renamed `project_fields` to `fields` in `structs_get()` for consistency with other tools.
- **Import Path:** Fixed `logging.py` to import `Context` from `fastmcp` (not deprecated `mcp.server.fastmcp` path).
### Added
- **Debug Logging Environment Variable:** Set `GHYDRAMCP_DEBUG=1` to enable DEBUG-level logging for troubleshooting.
## [2025.12.1] - 2025-12-01

View File

@ -7,7 +7,7 @@ enforcement to prevent oversized MCP tool results.
import json
import re
import time
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional
from ..config import get_config
@ -15,7 +15,7 @@ from ..config import get_config
TOKEN_ESTIMATION_RATIO = 4.0
def project_fields(items: list, fields: list[str]) -> list:
def project_fields(items: List[Any], fields: List[str]) -> List[Any]:
"""Select only specified keys from each item (jq-style projection).
Works on dicts and strings. For dicts, returns only the requested
@ -42,7 +42,7 @@ def project_fields(items: list, fields: list[str]) -> list:
return projected
def apply_grep(items: list, pattern: str, ignorecase: bool = True) -> list:
def apply_grep(items: List[Any], pattern: str, ignorecase: bool = True) -> List[Any]:
"""Filter items by regex pattern across all string values.
Searches all string-coercible values in each item. For dicts,
@ -90,13 +90,33 @@ def _matches(item: Any, pattern: re.Pattern, depth: int = 0) -> bool:
def _estimate_tokens(data: Any) -> int:
"""Estimate token count from serialized JSON size."""
"""Estimate token count from serialized JSON size.
Uses a simple heuristic: ~4 characters per token on average.
This matches the TOKEN_ESTIMATION_RATIO constant.
Args:
data: Any JSON-serializable data structure
Returns:
Estimated token count
"""
text = json.dumps(data, default=str)
return int(len(text) / TOKEN_ESTIMATION_RATIO)
def _extract_available_fields(items: list) -> list[str]:
"""Extract the set of field names from the first few dict items."""
def _extract_available_fields(items: List[Any]) -> List[str]:
"""Extract the set of field names from the first few dict items.
Samples up to 5 items to discover available keys, useful for
suggesting field projections to reduce response size.
Args:
items: List of items (only dicts are examined)
Returns:
Sorted list of unique field names (excludes internal _links)
"""
fields = set()
for item in items[:5]:
if isinstance(item, dict):
@ -107,11 +127,11 @@ def _extract_available_fields(items: list) -> list[str]:
def estimate_and_guard(
data: list,
data: List[Any],
tool_name: str,
budget: Optional[int] = None,
query_hints: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
) -> Optional[Dict[str, Any]]:
"""Check if data exceeds token budget; return guard response if so.
If data fits within budget, returns None (caller should proceed
@ -164,7 +184,17 @@ def estimate_and_guard(
def _format_tokens(n: int) -> str:
"""Format token count for display (e.g. 45000 -> '45k')."""
"""Format token count for human-readable display.
Large numbers are abbreviated with 'k' suffix for readability
in error messages and hints.
Args:
n: Token count
Returns:
Formatted string (e.g., 45000 -> '45k', 500 -> '500')
"""
if n >= 1000:
return "%dk" % (n // 1000)
return str(n)
@ -172,7 +202,7 @@ def _format_tokens(n: int) -> str:
def _build_hints(
tool_name: str,
available_fields: list[str],
available_fields: List[str],
query_hints: Optional[Dict[str, Any]] = None,
) -> str:
"""Build actionable hint text for the guard message."""

View File

@ -244,6 +244,7 @@ def safe_post(
text_payload = None
if isinstance(data, dict):
data = data.copy() # Don't mutate caller's dict
headers = data.pop("headers", None)
json_payload = data
else:
@ -265,7 +266,11 @@ def safe_put(port: int, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
Returns:
Response dict
"""
headers = data.pop("headers", None) if isinstance(data, dict) else None
if isinstance(data, dict):
data = data.copy() # Don't mutate caller's dict
headers = data.pop("headers", None)
else:
headers = None
return _make_request("PUT", port, endpoint, json_data=data, headers=headers)
@ -280,7 +285,11 @@ def safe_patch(port: int, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]
Returns:
Response dict
"""
headers = data.pop("headers", None) if isinstance(data, dict) else None
if isinstance(data, dict):
data = data.copy() # Don't mutate caller's dict
headers = data.pop("headers", None)
else:
headers = None
return _make_request("PATCH", port, endpoint, json_data=data, headers=headers)

View File

@ -8,7 +8,7 @@ import logging
from typing import TYPE_CHECKING, Optional
if TYPE_CHECKING:
from mcp.server.fastmcp import Context
from fastmcp import Context
# Standard Python logger as fallback
logger = logging.getLogger("ghydramcp")

View File

@ -186,8 +186,8 @@ class CursorManager:
item for item in data if self._matches_grep(item, pattern)
]
# Create query hash
query_hash = hashlib.md5(
# Create query hash (SHA-256 for consistency with cursor ID generation)
query_hash = hashlib.sha256(
json.dumps(query_params, sort_keys=True, default=str).encode()
).hexdigest()[:12]

View File

@ -9,6 +9,7 @@ from fastmcp import Context
from fastmcp.contrib.mcp_mixin import mcp_tool
from ..config import get_config
from ..core.logging import logger
from .base import GhydraMixinBase
@ -380,13 +381,18 @@ class AnalysisMixin(GhydraMixinBase):
return {"success": False, "error": {"code": "NO_INSTANCE", "message": str(e)}}
# Try setting as function comment first
try:
payload = {"comment": comment}
response = self.safe_patch(port, f"functions/{address}", payload)
if response.get("success", False):
return self.simplify_response(response)
except Exception:
pass
payload = {"comment": comment}
response = self.safe_patch(port, f"functions/{address}", payload)
if response.get("success", False):
return self.simplify_response(response)
# Log why function comment failed before falling back
error = response.get("error", {})
logger.debug(
"Function comment at %s failed (%s), falling back to pre-comment",
address,
error.get("code", "UNKNOWN"),
)
# Fallback to pre-comment
return self.comments_set(

View File

@ -182,27 +182,33 @@ class GhydraMixinBase(MCPMixin):
return "default"
# Convenience methods for subclasses
def safe_get(self, port: int, endpoint: str, params: Optional[Dict] = None) -> Dict:
def safe_get(
self, port: int, endpoint: str, params: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""Make GET request to Ghidra instance."""
return safe_get(port, endpoint, params)
def safe_post(self, port: int, endpoint: str, data: Any) -> Dict:
def safe_post(self, port: int, endpoint: str, data: Any) -> Dict[str, Any]:
"""Make POST request to Ghidra instance."""
return safe_post(port, endpoint, data)
def safe_put(self, port: int, endpoint: str, data: Dict) -> Dict:
def safe_put(
self, port: int, endpoint: str, data: Dict[str, Any]
) -> Dict[str, Any]:
"""Make PUT request to Ghidra instance."""
return safe_put(port, endpoint, data)
def safe_patch(self, port: int, endpoint: str, data: Dict) -> Dict:
def safe_patch(
self, port: int, endpoint: str, data: Dict[str, Any]
) -> Dict[str, Any]:
"""Make PATCH request to Ghidra instance."""
return safe_patch(port, endpoint, data)
def safe_delete(self, port: int, endpoint: str) -> Dict:
def safe_delete(self, port: int, endpoint: str) -> Dict[str, Any]:
"""Make DELETE request to Ghidra instance."""
return safe_delete(port, endpoint)
def simplify_response(self, response: Dict) -> Dict:
def simplify_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
"""Simplify HATEOAS response."""
return simplify_response(response)

View File

@ -19,7 +19,10 @@ from pathlib import Path
from typing import Any, Dict, List, Optional
from fastmcp import Context
from fastmcp.contrib.mcp_mixin import MCPMixin, mcp_tool
from fastmcp.contrib.mcp_mixin import mcp_tool
from ghydramcp.core.logging import logger
from ghydramcp.mixins.base import GhydraMixinBase
# Port pool configuration (32 ports should handle many concurrent sessions)
PORT_POOL_START = 8192
@ -63,6 +66,7 @@ class PortPool:
True if port was acquired, False if already in use
"""
lock_path = self._lock_file(port)
fd = None
try:
# Open or create the lock file
@ -77,14 +81,20 @@ class PortPool:
return False
# Write session info to the lock file
os.ftruncate(fd, 0)
os.lseek(fd, 0, os.SEEK_SET)
lock_data = json.dumps({
"session_id": session_id,
"pid": os.getpid(),
"timestamp": time.time(),
})
os.write(fd, lock_data.encode())
# If this fails, we need to close fd to release the lock
try:
os.ftruncate(fd, 0)
os.lseek(fd, 0, os.SEEK_SET)
lock_data = json.dumps({
"session_id": session_id,
"pid": os.getpid(),
"timestamp": time.time(),
})
os.write(fd, lock_data.encode())
except Exception:
# Write failed - release the lock
os.close(fd)
raise
# Keep the file descriptor open to maintain the lock
# Store it so we can release later
@ -94,7 +104,8 @@ class PortPool:
return True
except Exception:
except Exception as e:
logger.debug("Failed to acquire port %d: %s", port, e)
return False
def allocate(self, session_id: str) -> Optional[int]:
@ -134,7 +145,8 @@ class PortPool:
lock_path.unlink()
return True
except Exception:
except Exception as e:
logger.debug("Failed to release port %d: %s", port, e)
return False
def get_allocated_ports(self) -> Dict[int, Dict[str, Any]]:
@ -191,13 +203,13 @@ class PortPool:
except (IOError, OSError):
# Still locked by another process
os.close(fd)
except Exception:
pass
except Exception as e:
logger.debug("Failed to check stale lock for port %d: %s", port, e)
return cleaned
class DockerMixin(MCPMixin):
class DockerMixin(GhydraMixinBase):
"""Docker container management for GhydraMCP.
Provides tools to start, stop, and manage Ghidra containers
@ -226,7 +238,7 @@ class DockerMixin(MCPMixin):
"""Initialize Docker mixin with session isolation."""
self._check_docker_available()
self._session_id = str(uuid.uuid4())[:8]
self._port_pool = PortPool()
self._port_pool = None # Lazy-init to avoid side effects
self._session_containers = {}
@property
@ -236,6 +248,17 @@ class DockerMixin(MCPMixin):
self._session_id = str(uuid.uuid4())[:8]
return self._session_id
@property
def port_pool(self) -> PortPool:
"""Get the port pool, creating it on first access.
Lazy initialization avoids creating /tmp/ghydramcp-ports
until Docker tools are actually used.
"""
if self._port_pool is None:
self._port_pool = PortPool()
return self._port_pool
def _check_docker_available(self) -> bool:
"""Check if Docker is available on the system."""
return shutil.which("docker") is not None
@ -276,8 +299,7 @@ class DockerMixin(MCPMixin):
Returns:
CompletedProcess result
"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
return await asyncio.get_running_loop().run_in_executor(
None, self._run_docker_cmd_sync, args, check, capture
)
@ -463,9 +485,8 @@ class DockerMixin(MCPMixin):
# List containers from this session only
result["session_containers"] = await self._find_containers_by_label(session_only=True)
# Get port pool status
if self._port_pool:
result["port_pool"]["allocated"] = self._port_pool.get_allocated_ports()
# Get port pool status (lazy-init creates pool on first access)
result["port_pool"]["allocated"] = self.port_pool.get_allocated_ports()
# Also check by name pattern for containers without labels
try:
@ -561,11 +582,11 @@ class DockerMixin(MCPMixin):
return {"error": f"Binary not found: {binary_path}"}
# Always allocate from pool to prevent conflicts between sessions
port = self._port_pool.allocate(self.session_id)
port = self.port_pool.allocate(self.session_id)
if port is None:
return {
"error": "Port pool exhausted (8192-8223). Stop some containers first.",
"allocated_ports": self._port_pool.get_allocated_ports(),
"allocated_ports": self.port_pool.get_allocated_ports(),
}
# Generate container name if not specified
@ -581,7 +602,7 @@ class DockerMixin(MCPMixin):
["ps", "-a", "-q", "-f", f"name=^{name}$"], check=False
)
if check_result.stdout.strip():
self._port_pool.release(port)
self.port_pool.release(port)
return {
"error": f"Container '{name}' already exists. Stop it first with docker_stop."
}
@ -591,7 +612,7 @@ class DockerMixin(MCPMixin):
["ps", "-q", "-f", f"publish={port}"], check=False
)
if port_check.stdout.strip():
self._port_pool.release(port)
self.port_pool.release(port)
return {
"error": f"Port {port} is already in use by another container"
}
@ -647,7 +668,7 @@ class DockerMixin(MCPMixin):
}
except subprocess.CalledProcessError as e:
self._port_pool.release(port)
self.port_pool.release(port)
return {"error": f"Failed to start container: {e.stderr or e.stdout}"}
@mcp_tool(
@ -710,7 +731,7 @@ class DockerMixin(MCPMixin):
# Release the port back to the pool
if container_port:
self._port_pool.release(container_port)
self.port_pool.release(container_port)
# Remove from session tracking
self._session_containers = {
@ -893,8 +914,7 @@ class DockerMixin(MCPMixin):
Health status and API info if available
"""
port = self.get_instance_port(port)
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
return await asyncio.get_running_loop().run_in_executor(
None, self._sync_health_check, port, timeout
)
@ -1060,9 +1080,8 @@ class DockerMixin(MCPMixin):
pass
# Clean up stale port locks
if self._port_pool:
stale_ports = self._port_pool.cleanup_stale_locks(max_age_hours * 3600)
result["ports_cleaned"] = stale_ports
stale_ports = self.port_pool.cleanup_stale_locks(max_age_hours * 3600)
result["ports_cleaned"] = stale_ports
return result
@ -1086,8 +1105,8 @@ class DockerMixin(MCPMixin):
"containers": self._session_containers,
"allocated_ports": {
port: info
for port, info in self._port_pool.get_allocated_ports().items()
for port, info in self.port_pool.get_allocated_ports().items()
if info.get("session_id") == self.session_id
} if self._port_pool else {},
},
"port_pool_range": f"{PORT_POOL_START}-{PORT_POOL_END}",
}

View File

@ -99,7 +99,7 @@ class StructsMixin(GhydraMixinBase):
grep: Optional[str] = None,
grep_ignorecase: bool = True,
return_all: bool = False,
project_fields: Optional[List[str]] = None,
fields: Optional[List[str]] = None,
ctx: Optional[Context] = None,
) -> Dict[str, Any]:
"""Get detailed information about a struct with field pagination.
@ -111,7 +111,7 @@ class StructsMixin(GhydraMixinBase):
grep: Regex pattern to filter fields
grep_ignorecase: Case-insensitive grep (default: True)
return_all: Return all fields without pagination
project_fields: Field names to keep per struct field item. Reduces response size.
fields: Field names to keep per struct field item. Reduces response size.
ctx: FastMCP context (auto-injected)
Returns:
@ -145,17 +145,17 @@ class StructsMixin(GhydraMixinBase):
# Extract struct info and fields
struct_info = {}
fields = []
struct_fields = []
if isinstance(result, dict):
for key, value in result.items():
if key == "fields" and isinstance(value, list):
fields = value
struct_fields = value
else:
struct_info[key] = value
# If few fields and no grep, return as-is
if len(fields) <= 10 and not grep:
if len(struct_fields) <= 10 and not grep:
return simplified
query_params = {
@ -166,7 +166,7 @@ class StructsMixin(GhydraMixinBase):
# Paginate fields
paginated = self.filtered_paginate(
data=fields,
data=struct_fields,
query_params=query_params,
tool_name="structs_get",
session_id=session_id,
@ -174,7 +174,7 @@ class StructsMixin(GhydraMixinBase):
grep=grep,
grep_ignorecase=grep_ignorecase,
return_all=return_all,
fields=project_fields,
fields=fields,
)
# Merge struct metadata with paginated fields (skip if guarded)

View File

@ -14,6 +14,7 @@ from typing import Optional
from fastmcp import FastMCP
from .config import GhydraConfig, get_config, set_config
from .core.logging import configure_logging
from .mixins import (
AnalysisMixin,
BookmarksMixin,
@ -154,8 +155,14 @@ def _handle_sigint(signum, frame):
def main():
"""Main entry point for the GhydraMCP server."""
import logging
import os
import shutil
# Configure logging early (DEBUG if GHYDRAMCP_DEBUG is set)
log_level = logging.DEBUG if os.environ.get("GHYDRAMCP_DEBUG") else logging.INFO
configure_logging(log_level)
try:
from importlib.metadata import version
package_version = version("ghydramcp")
@ -191,12 +198,13 @@ def main():
try:
response = safe_get(port, "")
if response.get("success", False):
GhydraMixinBase._instances[port] = {
"url": f"http://{config.ghidra_host}:{port}",
"project": response.get("project", ""),
"file": response.get("file", ""),
"discovered_at": time.time(),
}
with GhydraMixinBase._instances_lock:
GhydraMixinBase._instances[port] = {
"url": f"http://{config.ghidra_host}:{port}",
"project": response.get("project", ""),
"file": response.get("file", ""),
"discovered_at": time.time(),
}
found += 1
print(f" ✓ Found instance on port {port}", file=sys.stderr)
except Exception: