diff --git a/.gitignore b/.gitignore index c745086..b0b9227 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,10 @@ hs_err_pid* replay_pid* /__pycache__ + +# Docker volumes and binaries +/binaries/* +!/binaries/.gitkeep + +# Ghidra source (fetched separately) +/ghidra-src/ diff --git a/docker/Dockerfile b/docker/Dockerfile index f5b4ade..2a9b1db 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -110,20 +110,16 @@ RUN mkdir -p /opt/ghidra/Ghidra/Extensions \ RUN mkdir -p /projects /binaries /home/ghidra/.ghidra \ && chown -R ghidra:ghidra /projects /binaries /home/ghidra -# Copy GhydraMCP scripts to the BSim module's scripts directory -# BSim is a working feature module with proper OSGi bundle configuration for scripts -COPY docker/GhydraMCPServer.java /opt/ghidra/Ghidra/Features/BSim/ghidra_scripts/ -COPY docker/ImportRawARM.java /opt/ghidra/Ghidra/Features/BSim/ghidra_scripts/ -COPY docker/TestScript.java /opt/ghidra/Ghidra/Features/BSim/ghidra_scripts/ +# Copy GhydraMCP Python scripts to user scripts directory +# Python/Jython scripts don't require OSGi bundle registration - they work without issue +RUN mkdir -p /home/ghidra/ghidra_scripts +COPY docker/GhydraMCPServer.py /home/ghidra/ghidra_scripts/ +COPY docker/ImportRawARM.java /home/ghidra/ghidra_scripts/ -# Set proper ownership, permissions, and timestamp to match Ghidra installation -# Ghidra appears to validate scripts by timestamp - newer files may be rejected -RUN chown ghidra:ghidra /opt/ghidra/Ghidra/Features/BSim/ghidra_scripts/GhydraMCPServer.java \ - /opt/ghidra/Ghidra/Features/BSim/ghidra_scripts/ImportRawARM.java \ - /opt/ghidra/Ghidra/Features/BSim/ghidra_scripts/TestScript.java \ - && touch -t 202508261420 /opt/ghidra/Ghidra/Features/BSim/ghidra_scripts/GhydraMCPServer.java \ - && touch -t 202508261420 /opt/ghidra/Ghidra/Features/BSim/ghidra_scripts/ImportRawARM.java \ - && touch -t 202508261420 /opt/ghidra/Ghidra/Features/BSim/ghidra_scripts/TestScript.java +# Set proper ownership and permissions +RUN chown -R ghidra:ghidra /home/ghidra/ghidra_scripts \ + && chmod 755 /home/ghidra/ghidra_scripts/*.py 2>/dev/null || true \ + && chmod 755 /home/ghidra/ghidra_scripts/*.java 2>/dev/null || true # Copy entrypoint script (755 so ghidra user can read and execute) COPY docker/entrypoint.sh /entrypoint.sh diff --git a/docker/ImportRawARM.java b/docker/ImportRawARM.java new file mode 100644 index 0000000..60fdb69 --- /dev/null +++ b/docker/ImportRawARM.java @@ -0,0 +1,204 @@ +// Import and analyze raw ARM firmware binary +// This script imports a raw binary file with specified ARM processor and load address +// @author GhydraMCP +// @category Binary.Import +// @keybinding +// @menupath +// @toolbar + +import ghidra.app.script.GhidraScript; +import ghidra.app.util.bin.ByteArrayProvider; +import ghidra.app.util.importer.MessageLog; +import ghidra.app.util.opinion.BinaryLoader; +import ghidra.app.util.opinion.LoadSpec; +import ghidra.framework.model.DomainFile; +import ghidra.framework.model.DomainFolder; +import ghidra.program.model.address.Address; +import ghidra.program.model.lang.LanguageCompilerSpecPair; +import ghidra.program.model.listing.Program; +import ghidra.util.task.TaskMonitor; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.List; + +public class ImportRawARM extends GhidraScript { + + @Override + public void run() throws Exception { + String[] args = getScriptArgs(); + + if (args.length < 2) { + println("========================================"); + println(" Import Raw ARM Firmware Binary"); + println("========================================"); + println(""); + println("Usage: -postScript ImportRawARM.java []"); + println(""); + println("Arguments:"); + println(" binary_path - Path to raw binary file"); + println(" load_address - Base address in hex (e.g., 0x00000000)"); + println(" language_id - ARM language ID (default: ARM:LE:32:v5t)"); + println(""); + println("Common ARM Language IDs:"); + println(" ARM:LE:32:v4 - ARMv4 little-endian"); + println(" ARM:LE:32:v5 - ARMv5 little-endian"); + println(" ARM:LE:32:v5t - ARMv5T little-endian (Thumb)"); + println(" ARM:LE:32:v6 - ARMv6 little-endian"); + println(" ARM:LE:32:v7 - ARMv7 little-endian"); + println(" ARM:LE:32:Cortex - ARM Cortex"); + println(" ARM:BE:32:v5t - ARMv5T big-endian"); + println(""); + println("Example:"); + println(" analyzeHeadless /projects MyProject \\"); + println(" -postScript ImportRawARM.java /binaries/firmware.bin 0x00000000 ARM:LE:32:v5t"); + println(""); + return; + } + + String binaryPath = args[0]; + String loadAddressStr = args[1]; + String languageIDStr = args.length > 2 ? args[2] : "ARM:LE:32:v5t"; + + // Parse load address + long loadAddress; + if (loadAddressStr.toLowerCase().startsWith("0x")) { + loadAddress = Long.parseLong(loadAddressStr.substring(2), 16); + } else { + loadAddress = Long.parseLong(loadAddressStr, 16); + } + + File binaryFile = new File(binaryPath); + if (!binaryFile.exists()) { + printerr("ERROR: Binary file not found: " + binaryPath); + return; + } + + long fileSize = binaryFile.length(); + + println("========================================"); + println(" Importing Raw ARM Firmware"); + println("========================================"); + println(" Binary: " + binaryFile.getName()); + println(" Size: " + fileSize + " bytes (0x" + Long.toHexString(fileSize) + ")"); + println(" Load Address: 0x" + String.format("%08X", loadAddress)); + println(" Language: " + languageIDStr); + println("========================================"); + println(""); + + try { + // Read the binary file + byte[] bytes = Files.readAllBytes(binaryFile.toPath()); + ByteArrayProvider provider = new ByteArrayProvider(bytes); + + // Parse language string (format: "ARM:LE:32:v5t") + String[] parts = languageIDStr.split(":"); + if (parts.length < 4) { + printerr("ERROR: Invalid language ID format. Expected format: ARCH:ENDIAN:SIZE:VARIANT"); + printerr(" Example: ARM:LE:32:v5t"); + return; + } + + // Create language compiler spec pair + LanguageCompilerSpecPair lcsPair = new LanguageCompilerSpecPair(languageIDStr, "default"); + + // Use BinaryLoader to import + BinaryLoader loader = new BinaryLoader(); + + // Create load spec with our language + LoadSpec loadSpec = new LoadSpec(loader, loadAddress, lcsPair, false); + + // Get project folder + DomainFolder rootFolder = state.getProject().getProjectData().getRootFolder(); + + String programName = binaryFile.getName(); + + println("Creating program: " + programName); + println("Using language: " + languageIDStr); + println(""); + + // Import using BinaryLoader + MessageLog log = new MessageLog(); + List programs = loader.load( + provider, + programName, + rootFolder, + loadSpec, + List.of(), // options + log, + this, // consumer + monitor + ); + + if (programs == null || programs.isEmpty()) { + printerr("ERROR: Failed to load binary"); + println(""); + println("Loader messages:"); + println(log.toString()); + return; + } + + Program program = programs.get(0); + + println("Import successful!"); + println(" Program: " + program.getName()); + println(" Base Address: " + program.getImageBase()); + println(" Memory blocks: " + program.getMemory().getBlocks().length); + println(""); + + // Set entry point at load address + Address entryAddr = program.getAddressFactory().getDefaultAddressSpace().getAddress(loadAddress); + + int txId = program.startTransaction("Set Entry Point"); + try { + program.getSymbolTable().addExternalEntryPoint(entryAddr); + program.getSymbolTable().createLabel(entryAddr, "entry", + ghidra.program.model.symbol.SourceType.ANALYSIS); + program.endTransaction(txId, true); + + println("Entry point set at: " + entryAddr); + println(""); + } catch (Exception e) { + program.endTransaction(txId, false); + printerr("Warning: Could not set entry point: " + e.getMessage()); + } + + // Save the program + try { + program.save("ARM firmware import", monitor); + println("Program saved successfully"); + println(""); + } catch (Exception e) { + printerr("Warning: Could not save program: " + e.getMessage()); + } + + // Run analysis + println("Starting auto-analysis..."); + println("(This may take a while for large binaries)"); + println(""); + + analyzeAll(program); + + println(""); + println("========================================"); + println(" Import Complete!"); + println("========================================"); + println(" Program: " + program.getName()); + println(" Functions found: " + program.getFunctionManager().getFunctionCount()); + println(" Defined data: " + program.getListing().getNumDefinedData()); + println("========================================"); + println(""); + + // Set as current program for subsequent scripts + state.setCurrentProgram(program); + + } catch (IOException e) { + printerr("ERROR: Failed to read binary file: " + e.getMessage()); + e.printStackTrace(); + } catch (Exception e) { + printerr("ERROR: Import failed: " + e.getMessage()); + e.printStackTrace(); + } + } +} diff --git a/pyproject.toml b/pyproject.toml index 871f90a..defa090 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,21 +1,42 @@ [project] name = "ghydramcp" -version = "2025.12.1" +version = "2025.12.3" description = "AI-assisted reverse engineering bridge: a multi-instance Ghidra plugin exposed via a HATEOAS REST API plus an MCP Python bridge for decompilation, analysis & binary manipulation" readme = "README.md" requires-python = ">=3.11" +authors = [ + {name = "Ryan Malloy", email = "ryan@supported.systems"} +] dependencies = [ "mcp>=1.22.0", "requests>=2.32.3", + "fastmcp>=2.0.0", ] [project.scripts] -ghydramcp = "bridge_mcp_hydra:main" +ghydramcp = "ghydramcp:main" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["."] -only-include = ["bridge_mcp_hydra.py"] +packages = ["src/ghydramcp"] + +[tool.hatch.build] +sources = ["src"] + +[tool.hatch.build.targets.sdist] +include = [ + "/src", + "/README.md", + "/pyproject.toml", +] + +[tool.ruff] +line-length = 100 +target-version = "py311" + +[tool.ruff.lint] +select = ["E", "F", "I", "W"] +ignore = ["E501"] # Line too long - handled by formatter diff --git a/src/main/resources/Module.manifest b/src/main/resources/Module.manifest index 8be1e50..2906516 100644 --- a/src/main/resources/Module.manifest +++ b/src/main/resources/Module.manifest @@ -1,3 +1,9 @@ -Manifest-Version: 1.0 -GHIDRA_MODULE_NAME: GhydraMCP -GHIDRA_MODULE_DESC: A multi-headed REST interface for Ghidra for use with MCP agents. +# GhydraMCP Module Manifest +# +# This file lists third-party libraries bundled with this extension and their licenses. +# Module metadata (name, description, version) is defined in extension.properties. +# +# Format: MODULE FILE LICENSE: lib/filename.jar License Name +# +# Currently, GhydraMCP has no bundled third-party libraries. +# Gson is provided by Ghidra itself.