Ryan Malloy 3818599b94 Fix port allocation to ignore stopped instances
Port allocator was counting stopped QEMU instances as occupying ports,
preventing new starts after stop-all. Only running instances now block
port reuse. Stopped instances with flash images on disk are preserved
for esp_qemu_flash; truly stale entries are purged.
2026-01-29 16:23:36 -07:00

622 lines
24 KiB
Python

"""
QEMU Emulation Manager Component
Manages Espressif QEMU fork instances for virtual ESP32 device emulation.
Each instance exposes a virtual serial port over TCP that esptool can connect
to via socket://localhost:PORT, making QEMU devices transparent to all
existing flash/chip operations.
Boot modes:
- "normal": Boots from flash (runs firmware). Use for testing app behavior.
- "download": GPIO strap forces ROM into serial bootloader mode.
esptool can connect and flash/read/identify the chip just like real hardware.
"""
import asyncio
import binascii
import logging
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
from fastmcp import Context, FastMCP
from ..config import ESPToolServerConfig
logger = logging.getLogger(__name__)
# Chip type to QEMU machine/binary/efuse mapping.
# Derived from ESP-IDF's tools/idf_py_actions/qemu_ext.py.
# Note: ESP32-S2 is not supported by the Espressif QEMU fork (no machine type exists).
CHIP_MACHINES: dict[str, dict[str, Any]] = {
"esp32": {
"machine": "esp32",
"arch": "xtensa",
"memory": "4M",
"efuse_device": "nvram.esp32.efuse",
"wdt_driver": "timer.esp32.timg",
"download_strap": "esp32.gpio",
"download_strap_value": "0x0f",
"default_efuse": binascii.unhexlify(
"00000000000000000000000000800000000000000000100000000000000000000000000000000000"
"00000000000000000000000000000000000000000000000000000000000000000000000000000000"
"00000000000000000000000000000000000000000000000000000000000000000000000000000000"
"00000000"
),
},
"esp32s3": {
"machine": "esp32s3",
"arch": "xtensa",
"memory": "4M",
"efuse_device": "nvram.esp32c3.efuse", # QEMU-201: shares esp32c3 efuse device
"wdt_driver": "timer.esp32s3.timg",
"download_strap": "esp32s3.gpio",
"download_strap_value": "0x07",
"default_efuse": binascii.unhexlify(
"00000000000000000000000000000000000000000000000000000000000000000000000000000c00"
+ "00" * 920
+ "000000000000000000000000000000000000000000000000"
),
},
"esp32c3": {
"machine": "esp32c3",
"arch": "riscv",
"efuse_device": "nvram.esp32c3.efuse",
"wdt_driver": "timer.esp32c3.timg",
"download_strap": "esp32c3.gpio",
"download_strap_value": "0x02",
"default_efuse": binascii.unhexlify(
"00000000000000000000000000000000000000000000000000000000000000000000000000000c00"
+ "00" * 920
+ "000000000000000000000000000000000000000000000000"
),
},
}
@dataclass
class QemuInstance:
"""Tracks a running QEMU process"""
instance_id: str
chip_type: str
tcp_port: int
flash_image: Path
flash_size_mb: int
process: asyncio.subprocess.Process | None = None
started_at: float = 0.0
pid: int | None = None
extra_args: list[str] = field(default_factory=list)
boot_mode: str = "normal"
efuse_image: Path | None = None
@property
def socket_uri(self) -> str:
return f"socket://localhost:{self.tcp_port}"
@property
def is_running(self) -> bool:
return self.process is not None and self.process.returncode is None
class QemuManager:
"""Manages QEMU ESP32 emulation instances"""
def __init__(self, app: FastMCP, config: ESPToolServerConfig) -> None:
self.app = app
self.config = config
self.instances: dict[str, QemuInstance] = {}
self._next_id = 1
self._register_tools()
def _get_qemu_binary(self, arch: str) -> str | None:
"""Get the QEMU binary path for the given architecture"""
if arch == "xtensa":
return self.config.qemu_xtensa_path
elif arch == "riscv":
return self.config.qemu_riscv_path
return None
def _allocate_port(self) -> int | None:
"""Find the next available TCP port for a QEMU instance"""
used_ports = {inst.tcp_port for inst in self.instances.values() if inst.is_running}
for offset in range(self.config.qemu_max_instances):
port = self.config.qemu_base_port + offset
if port not in used_ports:
return port
return None
def _generate_id(self) -> str:
instance_id = f"qemu-{self._next_id}"
self._next_id += 1
return instance_id
def _register_tools(self) -> None:
"""Register QEMU management tools with FastMCP"""
@self.app.tool("esp_qemu_start")
async def qemu_start(
context: Context,
chip_type: str = "esp32",
flash_image: str | None = None,
flash_size_mb: int = 4,
tcp_port: int | None = None,
boot_mode: str = "download",
extra_args: list[str] | None = None,
) -> dict[str, Any]:
"""Start a virtual ESP device using QEMU emulation. No physical hardware needed.
Returns a socket URI (socket://localhost:PORT) that works with all
esptool operations: esp_detect_chip, esp_flash_firmware, esp_scan_ports, etc.
Virtual devices also appear automatically in esp_scan_ports results.
Boot modes:
- "download" (default): Device starts in serial bootloader. Use this for
esptool interactions like flashing, chip identification, and flash reading.
- "normal": Device boots from flash and runs firmware. Use this to observe
application output after flashing.
Typical workflow:
1. esp_qemu_start (download mode) -> get socket URI
2. esp_flash_firmware with socket URI -> flash your firmware
3. esp_qemu_stop -> stop the instance
4. esp_qemu_start (normal mode, same flash image) -> boot firmware
Args:
chip_type: Target chip (esp32, esp32s3, esp32c3)
flash_image: Path to existing flash image file. Creates a blank erased
flash (all 0xFF) if not specified.
flash_size_mb: Flash size in MB when creating blank images (default: 4)
tcp_port: TCP port for virtual serial (auto-assigned from pool if not specified)
boot_mode: "download" for esptool interaction (default), "normal" to boot from flash
extra_args: Additional QEMU command-line arguments
"""
return await self._start_impl(
context, chip_type, flash_image, flash_size_mb, tcp_port, boot_mode, extra_args
)
@self.app.tool("esp_qemu_stop")
async def qemu_stop(
context: Context, instance_id: str | None = None
) -> dict[str, Any]:
"""Stop a running QEMU virtual device. Terminates the QEMU process and frees the TCP port.
The flash image is preserved on disk, so the instance can be restarted
with esp_qemu_start using the same flash_image path (e.g., to switch
from download mode to normal boot mode after flashing).
Args:
instance_id: Instance ID to stop (stops all running instances if not specified)
"""
return await self._stop_impl(context, instance_id)
@self.app.tool("esp_qemu_list")
async def qemu_list(context: Context) -> dict[str, Any]:
"""List all QEMU virtual device instances with their status, chip type, port, and boot mode.
Returns running and stopped instances. Use this to find instance IDs
for esp_qemu_stop or esp_qemu_status, or to get socket URIs for esptool operations."""
return await self._list_impl(context)
@self.app.tool("esp_qemu_status")
async def qemu_status(
context: Context, instance_id: str | None = None
) -> dict[str, Any]:
"""Get detailed status of a QEMU virtual device including uptime, PID, socket URI,
boot mode, and flash/efuse image paths.
Args:
instance_id: Instance to inspect (returns first running instance if not specified)
"""
return await self._status_impl(context, instance_id)
@self.app.tool("esp_qemu_flash")
async def qemu_flash(
context: Context,
instance_id: str,
firmware_path: str,
address: str = "0x0",
) -> dict[str, Any]:
"""Write a firmware binary directly into a QEMU instance's flash image file.
This is an offline operation — the instance must be stopped first.
It patches the raw flash image at the given offset, then you can restart
with esp_qemu_start in "normal" boot mode to run the firmware.
For most use cases, prefer using esp_flash_firmware with the instance's
socket URI while it's running in download mode — that uses esptool's
full flash protocol including verification. Use this tool only when you
need direct image manipulation (e.g., pre-loading a merged binary).
Args:
instance_id: Target QEMU instance (must be stopped)
firmware_path: Path to firmware binary to write into the flash image
address: Flash address offset as hex string (default: "0x0")
"""
return await self._flash_impl(context, instance_id, firmware_path, address)
async def _start_impl(
self,
context: Context,
chip_type: str,
flash_image: str | None,
flash_size_mb: int,
tcp_port: int | None,
boot_mode: str = "download",
extra_args: list[str] | None = None,
) -> dict[str, Any]:
"""Start a QEMU instance"""
if boot_mode not in ("download", "normal"):
return {
"success": False,
"error": f"Invalid boot_mode: {boot_mode}. Use 'download' or 'normal'.",
}
# Validate chip type
chip_key = chip_type.lower().replace("-", "").replace("_", "")
if chip_key not in CHIP_MACHINES:
return {
"success": False,
"error": f"Unsupported chip type: {chip_type}",
"supported_chips": list(CHIP_MACHINES.keys()),
}
machine_info = CHIP_MACHINES[chip_key]
qemu_binary = self._get_qemu_binary(machine_info["arch"])
if not qemu_binary or not Path(qemu_binary).exists():
return {
"success": False,
"error": f"QEMU binary not found for {machine_info['arch']} architecture",
"hint": "Install via: python3 $IDF_PATH/tools/idf_tools.py install qemu-xtensa qemu-riscv32",
}
# Check instance limit
running = sum(1 for inst in self.instances.values() if inst.is_running)
if running >= self.config.qemu_max_instances:
return {
"success": False,
"error": f"Maximum QEMU instances reached ({self.config.qemu_max_instances})",
"running_instances": running,
}
# Allocate port
if tcp_port is None:
tcp_port = self._allocate_port()
if tcp_port is None:
return {"success": False, "error": "No available TCP ports"}
else:
# Check port not already in use by us
used_ports = {inst.tcp_port for inst in self.instances.values() if inst.is_running}
if tcp_port in used_ports:
return {"success": False, "error": f"Port {tcp_port} already in use"}
# Prepare flash image
resources_dir = Path(__file__).parent.parent / "resources" / "qemu"
resources_dir.mkdir(parents=True, exist_ok=True)
if flash_image:
flash_path = Path(flash_image)
if not flash_path.exists():
return {"success": False, "error": f"Flash image not found: {flash_image}"}
else:
flash_path = resources_dir / f"flash_{chip_key}_{tcp_port}.bin"
if not flash_path.exists():
_create_blank_flash(flash_path, flash_size_mb)
# Prepare efuse image (required for download mode and proper chip identification)
efuse_path: Path | None = None
if "default_efuse" in machine_info:
efuse_path = resources_dir / f"efuse_{chip_key}_{tcp_port}.bin"
if not efuse_path.exists():
efuse_path.write_bytes(machine_info["default_efuse"])
instance_id = self._generate_id()
# Build QEMU command
cmd = [
qemu_binary,
"-nographic",
"-monitor", "none",
"-machine", machine_info["machine"],
]
# Add memory size if specified
if "memory" in machine_info:
cmd.extend(["-m", machine_info["memory"]])
# Flash image drive
cmd.extend(["-drive", f"file={flash_path},if=mtd,format=raw"])
# eFuse emulation (enables chip_id, MAC, and revision reporting)
if efuse_path and "efuse_device" in machine_info:
cmd.extend([
"-drive", f"file={efuse_path},if=none,format=raw,id=efuse",
"-global", f"driver={machine_info['efuse_device']},property=drive,value=efuse",
])
# Disable watchdog timer (QEMU timing doesn't match real hardware)
if "wdt_driver" in machine_info:
cmd.extend([
"-global", f"driver={machine_info['wdt_driver']},property=wdt_disable,value=true",
])
# Download mode: GPIO strap tells ROM to enter serial bootloader
if boot_mode == "download" and "download_strap" in machine_info:
cmd.extend([
"-global",
f"driver={machine_info['download_strap']},property=strap_mode,value={machine_info['download_strap_value']}",
])
# TCP serial port
cmd.extend(["-serial", f"tcp::{tcp_port},server,nowait"])
if extra_args:
cmd.extend(extra_args)
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
# Brief pause to let QEMU bind the TCP port
await asyncio.sleep(0.5)
if proc.returncode is not None:
stderr = (await proc.stderr.read()).decode() if proc.stderr else ""
return {
"success": False,
"error": f"QEMU exited immediately (code {proc.returncode})",
"stderr": stderr[:500],
}
instance = QemuInstance(
instance_id=instance_id,
chip_type=chip_key,
tcp_port=tcp_port,
flash_image=flash_path,
flash_size_mb=flash_size_mb,
process=proc,
started_at=time.time(),
pid=proc.pid,
extra_args=extra_args or [],
boot_mode=boot_mode,
efuse_image=efuse_path,
)
self.instances[instance_id] = instance
logger.info(
f"Started QEMU {chip_key} instance {instance_id} on port {tcp_port} "
f"(PID {proc.pid}, mode={boot_mode})"
)
return {
"success": True,
"instance_id": instance_id,
"chip_type": chip_key,
"tcp_port": tcp_port,
"socket_uri": instance.socket_uri,
"flash_image": str(flash_path),
"boot_mode": boot_mode,
"pid": proc.pid,
"hint": (
f"Use port='{instance.socket_uri}' with other esp_ tools to interact with this virtual device"
if boot_mode == "download"
else f"Instance booting from flash. Connect to serial output at {instance.socket_uri}"
),
}
except FileNotFoundError:
return {"success": False, "error": f"QEMU binary not found: {qemu_binary}"}
except Exception as e:
return {"success": False, "error": f"Failed to start QEMU: {e}"}
async def _stop_impl(
self, context: Context, instance_id: str | None
) -> dict[str, Any]:
"""Stop one or all QEMU instances"""
if instance_id:
instance = self.instances.get(instance_id)
if not instance:
return {
"success": False,
"error": f"Instance not found: {instance_id}",
"available": list(self.instances.keys()),
}
stopped = [await self._kill_instance(instance)]
else:
stopped = []
for inst in list(self.instances.values()):
stopped.append(await self._kill_instance(inst))
# Purge stopped instances that have no flash image on disk (nothing to reuse)
stale = [
k for k, v in self.instances.items()
if not v.is_running and not v.flash_image.exists()
]
for k in stale:
del self.instances[k]
return {
"success": True,
"stopped": [s for s in stopped if s],
"remaining": sum(1 for inst in self.instances.values() if inst.is_running),
}
async def _kill_instance(self, instance: QemuInstance) -> str | None:
"""Kill a single QEMU process, return its ID if it was running"""
if instance.process and instance.process.returncode is None:
instance.process.terminate()
try:
await asyncio.wait_for(instance.process.wait(), timeout=5.0)
except asyncio.TimeoutError:
instance.process.kill()
await instance.process.wait()
logger.info(f"Stopped QEMU instance {instance.instance_id}")
return instance.instance_id
return None
async def _list_impl(self, context: Context) -> dict[str, Any]:
"""List all instances"""
instances_info = []
for inst in self.instances.values():
instances_info.append({
"instance_id": inst.instance_id,
"chip_type": inst.chip_type,
"tcp_port": inst.tcp_port,
"socket_uri": inst.socket_uri,
"running": inst.is_running,
"pid": inst.pid,
"uptime_seconds": round(time.time() - inst.started_at, 1) if inst.is_running else 0,
})
return {
"success": True,
"instances": instances_info,
"total": len(instances_info),
"running": sum(1 for i in instances_info if i["running"]),
"max_instances": self.config.qemu_max_instances,
}
async def _status_impl(
self, context: Context, instance_id: str | None
) -> dict[str, Any]:
"""Detailed status of one instance"""
if instance_id:
instance = self.instances.get(instance_id)
else:
# Pick first running instance
running = [i for i in self.instances.values() if i.is_running]
instance = running[0] if running else None
if not instance:
return {
"success": False,
"error": "No instance found" if not instance_id else f"Instance not found: {instance_id}",
"available": list(self.instances.keys()),
}
return {
"success": True,
"instance_id": instance.instance_id,
"chip_type": instance.chip_type,
"machine": CHIP_MACHINES.get(instance.chip_type, {}).get("machine"),
"tcp_port": instance.tcp_port,
"socket_uri": instance.socket_uri,
"flash_image": str(instance.flash_image),
"flash_size_mb": instance.flash_size_mb,
"boot_mode": instance.boot_mode,
"running": instance.is_running,
"pid": instance.pid,
"started_at": instance.started_at,
"uptime_seconds": round(time.time() - instance.started_at, 1) if instance.is_running else 0,
"extra_args": instance.extra_args,
}
async def _flash_impl(
self,
context: Context,
instance_id: str,
firmware_path: str,
address: str,
) -> dict[str, Any]:
"""Write a firmware binary into a QEMU instance's flash image"""
instance = self.instances.get(instance_id)
if not instance:
return {"success": False, "error": f"Instance not found: {instance_id}"}
if instance.is_running:
return {
"success": False,
"error": "Instance must be stopped before flashing. Use esp_qemu_stop first.",
}
fw_path = Path(firmware_path)
if not fw_path.exists():
return {"success": False, "error": f"Firmware not found: {firmware_path}"}
try:
offset = int(address, 16) if address.startswith("0x") else int(address)
except ValueError:
return {"success": False, "error": f"Invalid address: {address}"}
flash_path = instance.flash_image
if not flash_path.exists():
return {"success": False, "error": f"Flash image missing: {flash_path}"}
try:
firmware_data = fw_path.read_bytes()
flash_data = bytearray(flash_path.read_bytes())
end = offset + len(firmware_data)
if end > len(flash_data):
return {
"success": False,
"error": f"Firmware ({len(firmware_data)} bytes at offset {offset:#x}) exceeds flash size ({len(flash_data)} bytes)",
}
flash_data[offset:end] = firmware_data
flash_path.write_bytes(bytes(flash_data))
return {
"success": True,
"instance_id": instance_id,
"firmware_path": firmware_path,
"address": f"0x{offset:08x}",
"bytes_written": len(firmware_data),
"flash_image": str(flash_path),
"hint": "Use esp_qemu_start to restart the instance with the new firmware",
}
except Exception as e:
return {"success": False, "error": f"Flash write failed: {e}"}
def get_running_ports(self) -> list[dict[str, Any]]:
"""Return socket URIs of running instances for scan integration"""
return [
{
"port": inst.socket_uri,
"chip_type": inst.chip_type,
"instance_id": inst.instance_id,
"source": "qemu",
}
for inst in self.instances.values()
if inst.is_running
]
async def health_check(self) -> dict[str, Any]:
"""Component health check"""
return {
"status": "healthy",
"qemu_xtensa_available": bool(
self.config.qemu_xtensa_path and Path(self.config.qemu_xtensa_path).exists()
),
"qemu_riscv_available": bool(
self.config.qemu_riscv_path and Path(self.config.qemu_riscv_path).exists()
),
"running_instances": sum(1 for i in self.instances.values() if i.is_running),
"max_instances": self.config.qemu_max_instances,
}
async def shutdown(self) -> None:
"""Gracefully stop all instances on server shutdown"""
for inst in list(self.instances.values()):
await self._kill_instance(inst)
self.instances.clear()
def _create_blank_flash(path: Path, size_mb: int) -> None:
"""Create a blank (all 0xFF) flash image, matching erased NOR flash state"""
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "wb") as f:
# Write in 1MB chunks to avoid huge memory allocation
chunk = b"\xff" * (1024 * 1024)
for _ in range(size_mb):
f.write(chunk)