diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index ba80818..225f154 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -44,10 +44,13 @@ case "${MCGHIDRA_MODE}" in echo " docker run -p 8192:8192 -v ./samples:/binaries -e PROJECT_NAME=malware mcghidra /binaries/sample.exe" echo "" echo "Environment variables:" - echo " MCGHIDRA_PORT - HTTP API port (default: 8192)" - echo " MCGHIDRA_MAXMEM - Max JVM heap (default: 2G)" - echo " PROJECT_NAME - Ghidra project name (default: MCGhidra)" - echo " PROJECT_DIR - Project directory (default: /projects)" + echo " MCGHIDRA_PORT - HTTP API port (default: 8192)" + echo " MCGHIDRA_MAXMEM - Max JVM heap (default: 2G)" + echo " PROJECT_NAME - Ghidra project name (default: MCGhidra)" + echo " PROJECT_DIR - Project directory (default: /projects)" + echo " GHIDRA_LANGUAGE - Processor language ID (e.g., ARM:LE:32:v4t)" + echo " GHIDRA_BASE_ADDRESS - Base address for raw binaries (e.g., 0x00000000)" + echo " GHIDRA_LOADER - Loader type (e.g., BinaryLoader for raw firmware)" echo "" echo "Starting in wait mode..." echo "Container will stay running for debugging or manual operation." @@ -81,6 +84,36 @@ case "${MCGHIDRA_MODE}" in -postScript "MCGhidraServer.py" "${MCGHIDRA_PORT}" ) + # Optional: processor/language for raw binaries + if [ -n "${GHIDRA_LANGUAGE}" ]; then + if ! echo "${GHIDRA_LANGUAGE}" | grep -qE '^[A-Za-z0-9_]+:[A-Z]{2}:[0-9]+:[A-Za-z0-9._-]+$'; then + echo "ERROR: Invalid GHIDRA_LANGUAGE format: ${GHIDRA_LANGUAGE}" + echo "Expected: ARCH:ENDIAN:SIZE:VARIANT (e.g., ARM:LE:32:v4t)" + exit 1 + fi + ANALYZE_ARGS+=(-processor "${GHIDRA_LANGUAGE}") + fi + + # Optional: base address + if [ -n "${GHIDRA_BASE_ADDRESS}" ]; then + if ! echo "${GHIDRA_BASE_ADDRESS}" | grep -qE '^(0x)?[0-9a-fA-F]+$'; then + echo "ERROR: Invalid GHIDRA_BASE_ADDRESS format: ${GHIDRA_BASE_ADDRESS}" + echo "Expected hex: 0x00000000 or 00000000" + exit 1 + fi + ANALYZE_ARGS+=(-loader-baseAddr "${GHIDRA_BASE_ADDRESS}") + fi + + # Optional: explicit loader (e.g., BinaryLoader for raw firmware) + if [ -n "${GHIDRA_LOADER}" ]; then + if ! echo "${GHIDRA_LOADER}" | grep -qE '^[A-Za-z0-9_]+$'; then + echo "ERROR: Invalid GHIDRA_LOADER format: ${GHIDRA_LOADER}" + echo "Expected alphanumeric name (e.g., BinaryLoader)" + exit 1 + fi + ANALYZE_ARGS+=(-loader "${GHIDRA_LOADER}") + fi + # Add any extra arguments passed ANALYZE_ARGS+=("$@") diff --git a/pyproject.toml b/pyproject.toml index 3dd893f..f947709 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcghidra" -version = "2026.3.6" +version = "2026.3.6.1" description = "Reverse engineering bridge: multi-instance Ghidra plugin with HATEOAS REST API and MCP server for decompilation, analysis & binary manipulation" readme = "README.md" requires-python = ">=3.11" diff --git a/src/mcghidra/mixins/docker.py b/src/mcghidra/mixins/docker.py index d6c01ad..ae4c319 100644 --- a/src/mcghidra/mixins/docker.py +++ b/src/mcghidra/mixins/docker.py @@ -11,6 +11,7 @@ import asyncio import fcntl import json import os +import re import shutil import subprocess import time @@ -378,6 +379,28 @@ class DockerMixin(MCGhidraMixinBase): f"{self.LABEL_PREFIX}.pid": str(os.getpid()), } + @staticmethod + def _validate_ghidra_language(language: str) -> bool: + """Validate Ghidra language ID format (e.g., ARM:LE:32:v4t).""" + return bool(re.match(r'^[A-Za-z0-9_]+:[A-Z]{2}:[0-9]+:[A-Za-z0-9._-]+$', language)) + + @staticmethod + def _validate_hex_address(address: str) -> bool: + """Validate hex address format (e.g., 0x00000000 or 00000000).""" + if not re.match(r'^(0x)?[0-9a-fA-F]+$', address): + return False + addr_str = address[2:] if address.startswith("0x") else address + try: + val = int(addr_str, 16) + return 0 <= val <= 0xFFFFFFFFFFFFFFFF + except ValueError: + return False + + @staticmethod + def _validate_loader_name(loader: str) -> bool: + """Validate Ghidra loader name (alphanumeric + underscore).""" + return bool(re.match(r'^[A-Za-z0-9_]+$', loader)) + async def _find_containers_by_label( self, label_filter: Optional[str] = None, @@ -553,6 +576,9 @@ class DockerMixin(MCGhidraMixinBase): binary_path: str, memory: str = "2G", name: Optional[str] = None, + language: Optional[str] = None, + base_address: Optional[str] = None, + loader: Optional[str] = None, ctx: Optional[Context] = None, ) -> Dict[str, Any]: """Start a MCGhidra Docker container for binary analysis. @@ -569,6 +595,9 @@ class DockerMixin(MCGhidraMixinBase): binary_path: Path to the binary file to analyze memory: Max JVM heap memory (default: 2G) name: Container name (auto-generated if not specified) + language: Ghidra processor language ID for raw binaries (e.g., "ARM:LE:32:v4t") + base_address: Base address for raw binaries (e.g., "0x00000000") + loader: Ghidra loader type (e.g., "BinaryLoader"). Auto-set when language is specified. Returns: Container info including ID, name, port, and API URL @@ -641,6 +670,38 @@ class DockerMixin(MCGhidraMixinBase): for k, v in labels.items(): label_args.extend(["-l", f"{k}={v}"]) + # Validate firmware import parameters + if language and not self._validate_ghidra_language(language): + self.port_pool.release(port) + return { + "error": f"Invalid language format: {language}", + "hint": "Expected ARCH:ENDIAN:SIZE:VARIANT (e.g., ARM:LE:32:v4t)", + } + if base_address and not self._validate_hex_address(base_address): + self.port_pool.release(port) + return { + "error": f"Invalid base address: {base_address}", + "hint": "Expected hex format: 0x00000000 or 00000000", + } + if loader and not self._validate_loader_name(loader): + self.port_pool.release(port) + return { + "error": f"Invalid loader name: {loader}", + "hint": "Expected alphanumeric name (e.g., BinaryLoader)", + } + + # Build environment variable arguments + env_args = ["-e", f"MCGHIDRA_MAXMEM={memory}"] + if language: + env_args.extend(["-e", f"GHIDRA_LANGUAGE={language}"]) + # Auto-set BinaryLoader when language is explicitly specified + if not loader: + loader = "BinaryLoader" + if base_address: + env_args.extend(["-e", f"GHIDRA_BASE_ADDRESS={base_address}"]) + if loader: + env_args.extend(["-e", f"GHIDRA_LOADER={loader}"]) + # Start the container run_result = await self._run_docker_cmd( [ @@ -652,8 +713,7 @@ class DockerMixin(MCGhidraMixinBase): f"{port}:8192", "-v", f"{binary_file.parent}:/binaries:ro", - "-e", - f"MCGHIDRA_MAXMEM={memory}", + *env_args, *label_args, "mcghidra:latest", f"/binaries/{binary_file.name}", @@ -663,14 +723,19 @@ class DockerMixin(MCGhidraMixinBase): container_id = run_result.stdout.strip() # Track the container in this session - self._session_containers[container_id] = { + session_info: Dict[str, Any] = { "name": name, "port": port, "binary": str(binary_file), "memory": memory, } + if language: + session_info["language"] = language + if base_address: + session_info["base_address"] = base_address + self._session_containers[container_id] = session_info - return { + result_info: Dict[str, Any] = { "success": True, "session_id": self.session_id, "container_id": container_id[:12], @@ -684,6 +749,11 @@ class DockerMixin(MCGhidraMixinBase): f"Use docker_logs('{name}') to monitor progress." ), } + if language: + result_info["language"] = language + if base_address: + result_info["base_address"] = base_address + return result_info except subprocess.CalledProcessError as e: if port is not None: @@ -968,6 +1038,9 @@ class DockerMixin(MCGhidraMixinBase): async def docker_auto_start( self, binary_path: str, + language: Optional[str] = None, + base_address: Optional[str] = None, + loader: Optional[str] = None, ctx: Optional[Context] = None, ) -> Dict[str, Any]: """Automatically start a Docker container with intelligent port allocation. @@ -985,6 +1058,9 @@ class DockerMixin(MCGhidraMixinBase): Args: binary_path: Path to the binary to analyze + language: Ghidra processor language ID for raw binaries (e.g., "ARM:LE:32:v4t") + base_address: Base address for raw binaries (e.g., "0x00000000") + loader: Ghidra loader type (e.g., "BinaryLoader"). Auto-set when language is specified. Returns: Instance connection info with session ID and port details. @@ -1031,7 +1107,11 @@ class DockerMixin(MCGhidraMixinBase): # Start a new container (port auto-allocated from pool) start_result = await self.docker_start( - binary_path=binary_path, ctx=ctx + binary_path=binary_path, + language=language, + base_address=base_address, + loader=loader, + ctx=ctx, ) if not start_result.get("success"): diff --git a/uv.lock b/uv.lock index f2d2a32..1e173fb 100644 --- a/uv.lock +++ b/uv.lock @@ -572,7 +572,7 @@ wheels = [ [[package]] name = "mcghidra" -version = "2026.2.11" +version = "2026.3.6.1" source = { editable = "." } dependencies = [ { name = "fastmcp" },