Ryan Malloy 9d232305c6 Implement firmware builder, OTA manager, and production tools
Replace all remaining stub implementations with real functionality:

- firmware_builder: elf2image conversion and image-info analysis
- ota_manager: package creation (zip+manifest), HTTP deploy via curl,
  rollback by erasing otadata partition
- production_tools: factory programming (erase/flash/verify pipeline),
  batch parallel programming, QC test suites (basic + extended)
2026-01-31 09:02:34 -07:00

336 lines
11 KiB
Python

"""
OTA Manager Component
Handles Over-The-Air update operations including package creation,
deployment, rollback, and update management.
"""
import asyncio
import hashlib
import json
import logging
import time
import zipfile
from pathlib import Path
from typing import Any
from fastmcp import Context, FastMCP
from ..config import ESPToolServerConfig
logger = logging.getLogger(__name__)
class OTAManager:
"""ESP Over-The-Air update management"""
def __init__(self, app: FastMCP, config: ESPToolServerConfig) -> None:
self.app = app
self.config = config
self._register_tools()
async def _run_esptool(
self,
port: str,
args: list[str],
timeout: float = 30.0,
) -> dict[str, Any]:
"""Run esptool as an async subprocess."""
cmd = [self.config.esptool_path, "--port", port, *args]
proc = None
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
output = (stdout or b"").decode() + (stderr or b"").decode()
if proc.returncode != 0:
return {"success": False, "error": output.strip()[:500]}
return {"success": True, "output": output}
except asyncio.TimeoutError:
if proc and proc.returncode is None:
proc.kill()
await proc.wait()
return {"success": False, "error": f"Timeout after {timeout}s"}
except FileNotFoundError:
return {"success": False, "error": f"esptool not found at {self.config.esptool_path}"}
except Exception as e:
if proc and proc.returncode is None:
proc.kill()
await proc.wait()
return {"success": False, "error": str(e)}
def _register_tools(self) -> None:
"""Register OTA management tools"""
@self.app.tool("esp_ota_package_create")
async def create_ota_package(
context: Context, firmware_path: str, version: str, output_path: str
) -> dict[str, Any]:
"""Create OTA update package"""
return await self._package_create_impl(context, firmware_path, version, output_path)
@self.app.tool("esp_ota_deploy")
async def deploy_ota_update(
context: Context, package_path: str, target_url: str
) -> dict[str, Any]:
"""Deploy OTA update to device"""
return await self._deploy_impl(context, package_path, target_url)
@self.app.tool("esp_ota_rollback")
async def rollback_ota(context: Context, port: str | None = None) -> dict[str, Any]:
"""Rollback to previous firmware version"""
return await self._rollback_impl(context, port)
async def _package_create_impl(
self,
context: Context,
firmware_path: str,
version: str,
output_path: str,
) -> dict[str, Any]:
"""Create an OTA update package (zip with firmware + manifest).
The package contains:
- firmware.bin: The raw application binary
- manifest.json: Metadata (version, SHA-256, size, timestamp)
"""
fw = Path(firmware_path)
if not fw.exists():
return {"success": False, "error": f"Firmware file not found: {firmware_path}"}
fw_data = fw.read_bytes()
fw_sha256 = hashlib.sha256(fw_data).hexdigest()
manifest = {
"version": version,
"firmware_name": fw.name,
"firmware_size": len(fw_data),
"firmware_sha256": fw_sha256,
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
}
out = Path(output_path)
try:
with zipfile.ZipFile(out, "w", compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr("firmware.bin", fw_data)
zf.writestr("manifest.json", json.dumps(manifest, indent=2))
except OSError as e:
return {"success": False, "error": f"Failed to create package: {e}"}
return {
"success": True,
"output_path": str(out),
"package_size_bytes": out.stat().st_size,
"manifest": manifest,
}
async def _deploy_impl(
self,
context: Context,
package_path: str,
target_url: str,
) -> dict[str, Any]:
"""Deploy an OTA package to a device via HTTP POST.
Extracts firmware.bin from the package and POSTs it to the
device's OTA endpoint (e.g. http://192.168.1.100/ota/update).
The target device must be running an HTTP OTA server (like
esp_https_ota or a custom handler).
"""
pkg = Path(package_path)
if not pkg.exists():
return {"success": False, "error": f"Package not found: {package_path}"}
# Extract firmware from package
try:
with zipfile.ZipFile(pkg, "r") as zf:
if "firmware.bin" not in zf.namelist():
return {"success": False, "error": "Package missing firmware.bin"}
fw_data = zf.read("firmware.bin")
manifest = None
if "manifest.json" in zf.namelist():
manifest = json.loads(zf.read("manifest.json"))
except zipfile.BadZipFile:
return {"success": False, "error": "Invalid zip package"}
# POST firmware to device
# Using curl as an async subprocess since it's universally available
# and handles HTTP/HTTPS without Python dependency issues
import tempfile
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp:
tmp.write(fw_data)
tmp_path = tmp.name
try:
proc = await asyncio.create_subprocess_exec(
"curl",
"--silent",
"--show-error",
"--max-time", "120",
"--write-out", "%{http_code}",
"--output", "/dev/null",
"--data-binary", f"@{tmp_path}",
"--header", "Content-Type: application/octet-stream",
target_url,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=130.0)
http_code = (stdout or b"").decode().strip()
curl_error = (stderr or b"").decode().strip()
if proc.returncode != 0:
return {
"success": False,
"error": f"HTTP request failed: {curl_error}",
"target_url": target_url,
}
status_ok = http_code.startswith("2")
result: dict[str, Any] = {
"success": status_ok,
"target_url": target_url,
"http_status": http_code,
"firmware_size_bytes": len(fw_data),
}
if manifest:
result["version"] = manifest.get("version")
if not status_ok:
result["error"] = f"Device returned HTTP {http_code}"
return result
except asyncio.TimeoutError:
return {"success": False, "error": "OTA deploy timed out (130s)", "target_url": target_url}
except FileNotFoundError:
return {"success": False, "error": "curl not found — required for OTA deploy"}
finally:
import os
try:
os.unlink(tmp_path)
except OSError:
pass
async def _rollback_impl(
self,
context: Context,
port: str | None,
) -> dict[str, Any]:
"""Rollback OTA by erasing the otadata partition.
When the otadata partition is erased (all 0xFF), the bootloader
falls back to the factory app or ota_0 — effectively rolling back
to the first-flashed firmware. This works because the otadata
partition tracks which OTA slot is active.
For more precise control, use esp_partition_analyze to find the
otadata offset, then esp_flash_erase to clear just that region.
"""
if not port:
return {"success": False, "error": "Port is required for OTA rollback"}
# First, read the partition table to find the otadata partition
# We need the partition manager's analyze logic, but we can just
# read the partition table directly with esptool
import struct
import tempfile
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp:
tmp_path = tmp.name
try:
# Read partition table from 0x8000
result = await self._run_esptool(
port,
["read-flash", "0x8000", "0xC00", tmp_path],
timeout=60.0,
)
if not result["success"]:
return {"success": False, "error": f"Cannot read partition table: {result['error']}", "port": port}
raw = Path(tmp_path).read_bytes()
# Find otadata partition (type=data/0x01, subtype=ota/0x00)
otadata_offset = None
otadata_size = None
for i in range(0, len(raw) - 32 + 1, 32):
entry = raw[i : i + 32]
magic = struct.unpack_from("<H", entry, 0)[0]
if magic == 0xFFFF:
break
if magic != 0x50AA:
continue
ptype = entry[2]
subtype = entry[3]
# data type (0x01) + ota subtype (0x00)
if ptype == 0x01 and subtype == 0x00:
otadata_offset = struct.unpack_from("<I", entry, 4)[0]
otadata_size = struct.unpack_from("<I", entry, 8)[0]
break
finally:
import os
try:
os.unlink(tmp_path)
except OSError:
pass
if otadata_offset is None:
return {
"success": False,
"error": "No otadata partition found — device may not use OTA layout",
"port": port,
}
# Erase the otadata region
result = await self._run_esptool(
port,
[
"erase-region",
f"0x{otadata_offset:x}",
f"0x{otadata_size:x}",
],
timeout=30.0,
)
if not result["success"]:
return {
"success": False,
"error": f"Failed to erase otadata: {result['error']}",
"port": port,
}
return {
"success": True,
"port": port,
"otadata_offset": f"0x{otadata_offset:x}",
"otadata_size": f"0x{otadata_size:x}",
"message": (
"OTA data partition erased. On next boot, the device will "
"fall back to the factory app or ota_0 slot."
),
}
async def health_check(self) -> dict[str, Any]:
"""Component health check"""
return {"status": "healthy", "note": "OTA manager ready"}