MCP server for AI-assisted debugging of DOS binaries via GDB protocol. Features: - GDB remote protocol client for DOSBox-X debugging - 16 debugging tools: launch, attach, breakpoint management, registers, memory read/write, disassemble, step, continue, etc. - Docker container with DOSBox-X for consistent environment - Support for DOS segment:offset addressing - Comprehensive test suite (49 tests) Primary use case: Reverse engineering the unpublished Bezier algorithm in RIPTERM.EXE for the RIPscrip graphics protocol project.
287 lines
9.0 KiB
Python
287 lines
9.0 KiB
Python
"""Tests for utility functions."""
|
|
|
|
import pytest
|
|
|
|
from dosbox_mcp.utils import (
|
|
calculate_checksum,
|
|
decode_hex,
|
|
encode_hex,
|
|
escape_binary,
|
|
format_address,
|
|
hexdump,
|
|
parse_address,
|
|
parse_registers_x86,
|
|
parse_stop_reply,
|
|
signal_name,
|
|
unescape_binary,
|
|
)
|
|
|
|
|
|
class TestParseAddress:
|
|
"""Tests for parse_address function."""
|
|
|
|
def test_segment_offset_format(self):
|
|
"""Test segment:offset address parsing."""
|
|
# Standard segment:offset
|
|
assert parse_address("1000:0100") == 0x10100
|
|
assert parse_address("F000:FFF0") == 0xFFFF0 # BIOS reset vector
|
|
assert parse_address("0000:0000") == 0x00000
|
|
|
|
def test_segment_offset_lowercase(self):
|
|
"""Test lowercase segment:offset."""
|
|
assert parse_address("a000:0000") == 0xA0000 # Video memory
|
|
|
|
def test_flat_hex_with_prefix(self):
|
|
"""Test 0x prefixed addresses."""
|
|
assert parse_address("0x12345") == 0x12345
|
|
assert parse_address("0xFFFF0") == 0xFFFF0
|
|
|
|
def test_flat_hex_with_suffix(self):
|
|
"""Test h-suffixed addresses."""
|
|
assert parse_address("12345h") == 0x12345
|
|
assert parse_address("FFFF0h") == 0xFFFF0
|
|
|
|
def test_plain_hex(self):
|
|
"""Test plain hex (assumed)."""
|
|
assert parse_address("12345") == 0x12345
|
|
assert parse_address("100") == 0x100
|
|
|
|
def test_decimal_format(self):
|
|
"""Test decimal addresses with # prefix."""
|
|
assert parse_address("#65536") == 65536
|
|
assert parse_address("#1048576") == 1048576 # 1MB
|
|
|
|
def test_whitespace_handling(self):
|
|
"""Test that whitespace is stripped."""
|
|
assert parse_address(" 1000:0100 ") == 0x10100
|
|
|
|
def test_invalid_segment_offset(self):
|
|
"""Test invalid segment:offset format."""
|
|
with pytest.raises(ValueError):
|
|
parse_address("1000:2000:3000")
|
|
|
|
|
|
class TestFormatAddress:
|
|
"""Tests for format_address function."""
|
|
|
|
def test_flat_format(self):
|
|
"""Test flat hex format."""
|
|
assert format_address(0x10100, "flat") == "10100"
|
|
assert format_address(0x00100, "flat") == "00100"
|
|
|
|
def test_segoff_format(self):
|
|
"""Test segment:offset format."""
|
|
# Note: This uses canonical form with minimal offset
|
|
result = format_address(0x10100, "segoff")
|
|
assert ":" in result
|
|
|
|
def test_both_format(self):
|
|
"""Test combined format."""
|
|
result = format_address(0x10100, "both")
|
|
assert "10100" in result
|
|
assert ":" in result
|
|
|
|
|
|
class TestChecksum:
|
|
"""Tests for GDB checksum calculation."""
|
|
|
|
def test_simple_checksum(self):
|
|
"""Test checksum of simple strings."""
|
|
# 'g' = 0x67 = 103
|
|
assert calculate_checksum("g") == "67"
|
|
|
|
def test_command_checksum(self):
|
|
"""Test checksum of actual GDB commands."""
|
|
# "?" = 0x3F = 63
|
|
assert calculate_checksum("?") == "3f"
|
|
|
|
# "c" = 0x63 = 99
|
|
assert calculate_checksum("c") == "63"
|
|
|
|
# "s" = 0x73 = 115
|
|
assert calculate_checksum("s") == "73"
|
|
|
|
def test_checksum_wrapping(self):
|
|
"""Test checksum modulo 256."""
|
|
# Create string that wraps
|
|
long_str = "A" * 300 # 65 * 300 = 19500, mod 256 = 60 = 0x3c
|
|
result = calculate_checksum(long_str)
|
|
expected = (65 * 300) % 256
|
|
assert result == f"{expected:02x}"
|
|
|
|
|
|
class TestHexEncoding:
|
|
"""Tests for hex encoding/decoding."""
|
|
|
|
def test_encode_hex(self):
|
|
"""Test bytes to hex encoding."""
|
|
assert encode_hex(b"\x00\x01\x02") == "000102"
|
|
assert encode_hex(b"ABC") == "414243"
|
|
|
|
def test_decode_hex(self):
|
|
"""Test hex to bytes decoding."""
|
|
assert decode_hex("000102") == b"\x00\x01\x02"
|
|
assert decode_hex("414243") == b"ABC"
|
|
|
|
def test_roundtrip(self):
|
|
"""Test encode/decode roundtrip."""
|
|
original = b"\x90\x90\xcc\xcd\x21" # NOP NOP INT3 INT 21
|
|
assert decode_hex(encode_hex(original)) == original
|
|
|
|
|
|
class TestBinaryEscaping:
|
|
"""Tests for GDB binary escaping."""
|
|
|
|
def test_escape_special_chars(self):
|
|
"""Test that special characters are escaped."""
|
|
# $ (0x24), # (0x23), } (0x7d), * (0x2a)
|
|
data = bytes([0x24, 0x23, 0x7d, 0x2a])
|
|
escaped = escape_binary(data)
|
|
|
|
# Each byte should become 0x7d followed by XOR with 0x20
|
|
assert escaped == bytes([
|
|
0x7d, 0x24 ^ 0x20, # $
|
|
0x7d, 0x23 ^ 0x20, # #
|
|
0x7d, 0x7d ^ 0x20, # }
|
|
0x7d, 0x2a ^ 0x20, # *
|
|
])
|
|
|
|
def test_escape_normal_chars(self):
|
|
"""Test that normal characters are not escaped."""
|
|
data = b"ABC123"
|
|
assert escape_binary(data) == data
|
|
|
|
def test_unescape(self):
|
|
"""Test unescaping."""
|
|
escaped = bytes([0x7d, 0x04]) # Escaped 0x24 ($)
|
|
assert unescape_binary(escaped) == bytes([0x24])
|
|
|
|
def test_escape_unescape_roundtrip(self):
|
|
"""Test escape/unescape roundtrip."""
|
|
original = bytes([0x24, 0x23, 0x7d, 0x2a, 0x41, 0x42])
|
|
assert unescape_binary(escape_binary(original)) == original
|
|
|
|
|
|
class TestParseStopReply:
|
|
"""Tests for parsing GDB stop replies."""
|
|
|
|
def test_signal_reply(self):
|
|
"""Test simple signal reply."""
|
|
stop_type, info = parse_stop_reply("S05")
|
|
assert stop_type == "signal"
|
|
assert info["signal"] == 5 # SIGTRAP
|
|
|
|
def test_signal_with_info(self):
|
|
"""Test signal reply with additional info."""
|
|
stop_type, info = parse_stop_reply("T05thread:01;")
|
|
assert stop_type == "signal"
|
|
assert info["signal"] == 5
|
|
assert info["thread"] == "01"
|
|
|
|
def test_exit_reply(self):
|
|
"""Test exit reply."""
|
|
stop_type, info = parse_stop_reply("W00")
|
|
assert stop_type == "exit"
|
|
assert info["code"] == 0
|
|
|
|
def test_terminated_reply(self):
|
|
"""Test terminated by signal."""
|
|
stop_type, info = parse_stop_reply("X09")
|
|
assert stop_type == "terminated"
|
|
assert info["signal"] == 9 # SIGKILL
|
|
|
|
def test_empty_reply(self):
|
|
"""Test empty reply."""
|
|
stop_type, info = parse_stop_reply("")
|
|
assert stop_type == "unknown"
|
|
|
|
def test_unknown_reply(self):
|
|
"""Test unknown reply format."""
|
|
stop_type, info = parse_stop_reply("QQQ")
|
|
assert stop_type == "unknown"
|
|
assert "raw" in info
|
|
|
|
|
|
class TestParseRegisters:
|
|
"""Tests for parsing x86 register dump."""
|
|
|
|
def test_parse_registers(self):
|
|
"""Test parsing register hex dump."""
|
|
# Create a mock register dump
|
|
# EAX=12345678, ECX=0, EDX=0, EBX=0, ESP=0, EBP=0, ESI=0, EDI=0
|
|
# EIP=00001000, EFLAGS=00000202
|
|
# CS=0100, SS=0200, DS=0300, ES=0400, FS=0, GS=0
|
|
|
|
# Little-endian hex for each register
|
|
hex_data = (
|
|
"78563412" # EAX = 0x12345678
|
|
"00000000" # ECX = 0
|
|
"00000000" # EDX = 0
|
|
"00000000" # EBX = 0
|
|
"00100000" # ESP = 0x1000
|
|
"00000000" # EBP = 0
|
|
"00000000" # ESI = 0
|
|
"00000000" # EDI = 0
|
|
"00100000" # EIP = 0x1000
|
|
"02020000" # EFLAGS = 0x202
|
|
"00010000" # CS = 0x100
|
|
"00020000" # SS = 0x200
|
|
"00030000" # DS = 0x300
|
|
"00040000" # ES = 0x400
|
|
"00000000" # FS = 0
|
|
"00000000" # GS = 0
|
|
)
|
|
|
|
regs = parse_registers_x86(hex_data)
|
|
|
|
assert regs["eax"] == 0x12345678
|
|
assert regs["ecx"] == 0
|
|
assert regs["esp"] == 0x1000
|
|
assert regs["eip"] == 0x1000
|
|
assert regs["eflags"] == 0x202
|
|
assert regs["cs"] == 0x100
|
|
assert regs["ds"] == 0x300
|
|
|
|
|
|
class TestSignalNames:
|
|
"""Tests for signal name lookup."""
|
|
|
|
def test_known_signals(self):
|
|
"""Test known signal names."""
|
|
assert signal_name(5) == "SIGTRAP"
|
|
assert signal_name(11) == "SIGSEGV"
|
|
assert signal_name(9) == "SIGKILL"
|
|
|
|
def test_unknown_signal(self):
|
|
"""Test unknown signal."""
|
|
assert signal_name(99) == "SIG99"
|
|
|
|
|
|
class TestHexdump:
|
|
"""Tests for hexdump formatting."""
|
|
|
|
def test_simple_hexdump(self):
|
|
"""Test basic hexdump output."""
|
|
data = b"Hello, World!"
|
|
dump = hexdump(data, address=0x100)
|
|
|
|
assert "00100" in dump
|
|
assert "48 65 6c 6c" in dump # "Hell"
|
|
assert "|Hello, World!|" in dump
|
|
|
|
def test_hexdump_with_unprintable(self):
|
|
"""Test hexdump with unprintable characters."""
|
|
data = b"\x00\x01\x02ABC\xff"
|
|
dump = hexdump(data, address=0)
|
|
|
|
assert "00 01 02" in dump
|
|
assert "|...ABC.|" in dump
|
|
|
|
def test_hexdump_multiline(self):
|
|
"""Test multiline hexdump."""
|
|
data = bytes(range(32))
|
|
dump = hexdump(data, width=16)
|
|
|
|
lines = dump.strip().split('\n')
|
|
assert len(lines) == 2
|