refactor: consolidate utils, constants, models (A1-A8)

This commit is contained in:
Ryan Malloy 2026-02-08 11:34:07 -07:00
commit 4bd9ce19af
7 changed files with 312 additions and 233 deletions

View File

@ -2,8 +2,8 @@
"project": "mcilspy-code-review-fixes",
"created": "2025-02-08T00:00:00Z",
"domains": {
"security": { "status": "ready", "branch": "fix/security", "priority": 1 },
"architecture": { "status": "ready", "branch": "fix/architecture", "priority": 2 },
"security": { "status": "merged", "branch": "fix/security", "priority": 1 },
"architecture": { "status": "merged", "branch": "fix/architecture", "priority": 2 },
"performance": { "status": "ready", "branch": "fix/performance", "priority": 3 },
"testing": { "status": "ready", "branch": "fix/testing", "priority": 4 }
},

49
src/mcilspy/constants.py Normal file
View File

@ -0,0 +1,49 @@
"""Constants and configuration values for mcilspy.
This module centralizes all timeouts, limits, and magic numbers used throughout
the codebase. Import from here rather than hardcoding values.
"""
# =============================================================================
# Subprocess Timeouts
# =============================================================================
# Maximum time to wait for ilspycmd decompilation (in seconds)
# Large assemblies or corrupted files may take longer
DECOMPILE_TIMEOUT_SECONDS: float = 300.0 # 5 minutes
# =============================================================================
# Output Limits
# =============================================================================
# Maximum characters to display from subprocess output in error messages
MAX_ERROR_OUTPUT_CHARS: int = 1000
# Maximum line length when displaying code snippets (truncate longer lines)
MAX_LINE_LENGTH: int = 200
# Maximum unparsed lines to log before suppressing (avoid log spam)
MAX_UNPARSED_LOG_LINES: int = 3
# Preview length for unparsed line debug messages
UNPARSED_LINE_PREVIEW_LENGTH: int = 100
# =============================================================================
# Search Limits
# =============================================================================
# Default maximum results for search operations
DEFAULT_MAX_SEARCH_RESULTS: int = 100
# Maximum matches to display per type in grouped results
MAX_MATCHES_PER_TYPE: int = 20
# =============================================================================
# Tool Identifiers (for ilspycmd CLI)
# =============================================================================
# Default entity types for list operations
DEFAULT_ENTITY_TYPES: list[str] = ["class"]
# All supported entity types
ALL_ENTITY_TYPES: list[str] = ["class", "interface", "struct", "delegate", "enum"]

View File

@ -9,6 +9,11 @@ import tempfile
from pathlib import Path
from typing import Any
from .constants import (
DECOMPILE_TIMEOUT_SECONDS,
MAX_UNPARSED_LOG_LINES,
UNPARSED_LINE_PREVIEW_LENGTH,
)
from .models import (
AssemblyInfo,
AssemblyInfoRequest,
@ -19,6 +24,7 @@ from .models import (
ListTypesResponse,
TypeInfo,
)
from .utils import find_ilspycmd_path
logger = logging.getLogger(__name__)
@ -28,7 +34,27 @@ MAX_OUTPUT_BYTES = 50_000_000 # 50 MB
class ILSpyWrapper:
"""Wrapper class for ILSpy command line tool."""
"""Wrapper class for ILSpy command line tool.
This class encapsulates all interactions with the ilspycmd CLI tool.
While the wrapper is stateless in terms of decompilation operations
(each call is independent), it exists as a class to:
1. Cache the ilspycmd path lookup - Finding the executable involves
checking PATH and several common installation locations, which
is relatively expensive. Caching this on instantiation avoids
repeated filesystem operations.
2. Provide a single point of configuration - If ilspycmd is not found,
we fail fast at wrapper creation rather than on each tool call.
3. Enable future extensions - The class structure allows adding
connection pooling, result caching, or other optimizations without
changing the API.
The wrapper should be created once and reused across tool calls.
See get_wrapper() in server.py for the recommended usage pattern.
"""
def __init__(self, ilspycmd_path: str | None = None) -> None:
"""Initialize the wrapper.
@ -36,48 +62,12 @@ class ILSpyWrapper:
Args:
ilspycmd_path: Path to ilspycmd executable. If None, will try to find it in PATH.
"""
self.ilspycmd_path = ilspycmd_path or self._find_ilspycmd()
self.ilspycmd_path = ilspycmd_path or find_ilspycmd_path()
if not self.ilspycmd_path:
raise RuntimeError(
"ILSpyCmd not found. Please install it with: dotnet tool install --global ilspycmd"
)
def _find_ilspycmd(self) -> str | None:
"""Find ilspycmd executable in PATH or common install locations.
Checks:
1. Standard PATH (via shutil.which)
2. ~/.dotnet/tools (default location for 'dotnet tool install --global')
3. Platform-specific locations
"""
# Try standard PATH first
for cmd_name in ["ilspycmd", "ilspycmd.exe"]:
path = shutil.which(cmd_name)
if path:
return path
# Check common dotnet tools locations (not always in PATH)
home = os.path.expanduser("~")
dotnet_tools_paths = [
os.path.join(home, ".dotnet", "tools", "ilspycmd"),
os.path.join(home, ".dotnet", "tools", "ilspycmd.exe"),
]
# Windows-specific paths
if os.name == "nt":
userprofile = os.environ.get("USERPROFILE", "")
if userprofile:
dotnet_tools_paths.extend([
os.path.join(userprofile, ".dotnet", "tools", "ilspycmd.exe"),
])
for tool_path in dotnet_tools_paths:
if os.path.isfile(tool_path) and os.access(tool_path, os.X_OK):
logger.info(f"Found ilspycmd at {tool_path} (not in PATH)")
return tool_path
return None
async def _run_command(
self, args: list[str], input_data: str | None = None
) -> tuple[int, str, str]:
@ -107,17 +97,18 @@ class ILSpyWrapper:
input_bytes = input_data.encode("utf-8") if input_data else None
# Timeout after 5 minutes to prevent hanging on malicious/corrupted assemblies
# Timeout to prevent hanging on malicious/corrupted assemblies
try:
stdout_bytes, stderr_bytes = await asyncio.wait_for(
process.communicate(input=input_bytes),
timeout=300.0 # 5 minutes
timeout=DECOMPILE_TIMEOUT_SECONDS,
)
except asyncio.TimeoutError:
logger.warning("Command timed out after 5 minutes, killing process")
timeout_mins = DECOMPILE_TIMEOUT_SECONDS / 60
logger.warning(f"Command timed out after {timeout_mins:.0f} minutes, killing process")
process.kill()
await process.wait() # Ensure process is cleaned up
return -1, "", "Command timed out after 5 minutes. The assembly may be corrupted or too complex."
return -1, "", f"Command timed out after {timeout_mins:.0f} minutes. The assembly may be corrupted or too complex."
# Truncate output if it exceeds the limit to prevent memory exhaustion
stdout_truncated = False
@ -369,10 +360,10 @@ class ILSpyWrapper:
else:
# Log unexpected lines (but don't fail - ilspycmd may output warnings/info)
unparsed_count += 1
if unparsed_count <= 3: # Avoid log spam
logger.debug(f"Skipping unparsed line from ilspycmd: {line[:100]}")
if unparsed_count <= MAX_UNPARSED_LOG_LINES:
logger.debug(f"Skipping unparsed line from ilspycmd: {line[:UNPARSED_LINE_PREVIEW_LENGTH]}")
if unparsed_count > 3:
if unparsed_count > MAX_UNPARSED_LOG_LINES:
logger.debug(f"Skipped {unparsed_count} unparsed lines total")
return types

View File

@ -4,107 +4,39 @@ Provides access to all 34+ CLR metadata tables without requiring ilspycmd.
This enables searching for methods, fields, properties, events, and resources
that are not exposed via the ilspycmd CLI.
This module contains CPU-bound synchronous code for parsing .NET PE metadata.
For heavy workloads with many concurrent requests, consider running these
operations in a thread pool (e.g., asyncio.to_thread) to avoid blocking
the event loop.
Note: dnfile provides flag attributes as boolean properties (e.g., mdPublic, fdStatic)
rather than traditional IntFlag enums, so we use those directly.
"""
import logging
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
import dnfile
from dnfile.mdtable import TypeDefRow
from .models import (
AssemblyMetadata,
EventInfo,
FieldInfo,
MethodInfo,
PropertyInfo,
ResourceInfo,
)
logger = logging.getLogger(__name__)
# Maximum assembly file size to load (in megabytes)
# Prevents memory exhaustion from extremely large or malicious assemblies
MAX_ASSEMBLY_SIZE_MB = 500
@dataclass
class MethodInfo:
"""Information about a method in an assembly."""
name: str
full_name: str
declaring_type: str
namespace: str | None
return_type: str | None = None
is_public: bool = False
is_static: bool = False
is_virtual: bool = False
is_abstract: bool = False
parameters: list[str] = field(default_factory=list)
@dataclass
class FieldInfo:
"""Information about a field in an assembly."""
name: str
full_name: str
declaring_type: str
namespace: str | None
field_type: str | None = None
is_public: bool = False
is_static: bool = False
is_literal: bool = False # Constant
default_value: str | None = None
@dataclass
class PropertyInfo:
"""Information about a property in an assembly."""
name: str
full_name: str
declaring_type: str
namespace: str | None
property_type: str | None = None
has_getter: bool = False
has_setter: bool = False
@dataclass
class EventInfo:
"""Information about an event in an assembly."""
name: str
full_name: str
declaring_type: str
namespace: str | None
event_type: str | None = None
@dataclass
class ResourceInfo:
"""Information about an embedded resource."""
name: str
size: int
is_public: bool = True
@dataclass
class AssemblyMetadata:
"""Complete assembly metadata from dnfile."""
name: str
version: str
culture: str | None = None
public_key_token: str | None = None
target_framework: str | None = None
type_count: int = 0
method_count: int = 0
field_count: int = 0
property_count: int = 0
event_count: int = 0
resource_count: int = 0
referenced_assemblies: list[str] = field(default_factory=list)
class AssemblySizeError(ValueError):
"""Raised when an assembly exceeds the maximum allowed size."""

View File

@ -161,3 +161,84 @@ class AssemblyInfo(BaseModel):
runtime_version: str | None = None
is_signed: bool = False
has_debug_info: bool = False
# =============================================================================
# Metadata Reader Models (dnfile-based direct metadata access)
# =============================================================================
class MethodInfo(BaseModel):
"""Information about a method in an assembly."""
name: str
full_name: str
declaring_type: str
namespace: str | None = None
return_type: str | None = None
is_public: bool = False
is_static: bool = False
is_virtual: bool = False
is_abstract: bool = False
parameters: list[str] = Field(default_factory=list)
class FieldInfo(BaseModel):
"""Information about a field in an assembly."""
name: str
full_name: str
declaring_type: str
namespace: str | None = None
field_type: str | None = None
is_public: bool = False
is_static: bool = False
is_literal: bool = False # Constant
default_value: str | None = None
class PropertyInfo(BaseModel):
"""Information about a property in an assembly."""
name: str
full_name: str
declaring_type: str
namespace: str | None = None
property_type: str | None = None
has_getter: bool = False
has_setter: bool = False
class EventInfo(BaseModel):
"""Information about an event in an assembly."""
name: str
full_name: str
declaring_type: str
namespace: str | None = None
event_type: str | None = None
class ResourceInfo(BaseModel):
"""Information about an embedded resource."""
name: str
size: int = 0
is_public: bool = True
class AssemblyMetadata(BaseModel):
"""Complete assembly metadata from dnfile."""
name: str
version: str
culture: str | None = None
public_key_token: str | None = None
target_framework: str | None = None
type_count: int = 0
method_count: int = 0
field_count: int = 0
property_count: int = 0
event_count: int = 0
resource_count: int = 0
referenced_assemblies: list[str] = Field(default_factory=list)

View File

@ -5,12 +5,19 @@ import platform
import re
import shutil
from contextlib import asynccontextmanager
from dataclasses import dataclass
from mcp.server.fastmcp import Context, FastMCP
from .constants import (
ALL_ENTITY_TYPES,
DEFAULT_MAX_SEARCH_RESULTS,
MAX_ERROR_OUTPUT_CHARS,
MAX_LINE_LENGTH,
MAX_MATCHES_PER_TYPE,
)
from .ilspy_wrapper import ILSpyWrapper
from .models import EntityType, LanguageVersion
from .utils import find_ilspycmd_path
# Setup logging
log_level = os.getenv("LOGLEVEL", "INFO").upper()
@ -21,27 +28,21 @@ logging.basicConfig(
logger = logging.getLogger(__name__)
@dataclass
class AppState:
"""Application state shared across tools via lifespan context."""
wrapper: ILSpyWrapper | None = None
# Module-level cached wrapper instance (lazily initialized)
# This avoids complex lifespan context access and provides simple caching
_cached_wrapper: ILSpyWrapper | None = None
@asynccontextmanager
async def app_lifespan(server: FastMCP):
"""Initialize application state on startup, cleanup on shutdown.
The ILSpyWrapper is lazily initialized on first use to avoid
failing startup if ilspycmd isn't installed (dnfile-based tools
still work without it).
"""
state = AppState()
"""Initialize application state on startup, cleanup on shutdown."""
global _cached_wrapper
logger.info("mcilspy server starting up")
try:
yield {"app_state": state}
yield {}
finally:
logger.info("mcilspy server shutting down")
_cached_wrapper = None # Clear cache on shutdown
# Create FastMCP server with lifespan
@ -49,12 +50,14 @@ mcp = FastMCP("mcilspy", lifespan=app_lifespan)
def get_wrapper(ctx: Context | None = None) -> ILSpyWrapper:
"""Get ILSpy wrapper instance from context or create new one.
"""Get ILSpy wrapper instance, creating one if needed.
The wrapper is cached at module level for efficiency. It's lazily
initialized on first use to avoid failing startup if ilspycmd
isn't installed (dnfile-based tools still work without it).
Args:
ctx: Optional FastMCP context. If provided, uses lifespan state
for caching. If None, creates a new instance (for backwards
compatibility with non-tool callers).
ctx: Optional FastMCP context (unused, kept for API compatibility)
Returns:
ILSpyWrapper instance
@ -62,22 +65,10 @@ def get_wrapper(ctx: Context | None = None) -> ILSpyWrapper:
Raises:
RuntimeError: If ilspycmd is not installed
"""
if ctx is not None:
# Use lifespan state for caching (access via request_context)
try:
lifespan_ctx = ctx.request_context.lifespan_context
if lifespan_ctx is not None:
state: AppState = lifespan_ctx.get("app_state")
if state is not None:
if state.wrapper is None:
state.wrapper = ILSpyWrapper()
return state.wrapper
except (AttributeError, ValueError):
# Context not fully initialized, fall through to create new instance
pass
# Fallback: create new instance (no caching)
return ILSpyWrapper()
global _cached_wrapper
if _cached_wrapper is None:
_cached_wrapper = ILSpyWrapper()
return _cached_wrapper
def _format_error(error: Exception, context: str = "") -> str:
@ -147,31 +138,29 @@ def _validate_assembly_path(assembly_path: str) -> str:
return resolved_path
def _find_ilspycmd_path() -> str | None:
"""Find ilspycmd in PATH or common install locations."""
# Check PATH first
path = shutil.which("ilspycmd")
if path:
return path
def _compile_search_pattern(
pattern: str, case_sensitive: bool, use_regex: bool
) -> tuple[re.Pattern | None, str | None]:
"""Compile a search pattern into a regex, handling errors.
# Check common dotnet tools locations (often not in PATH for MCP servers)
home = os.path.expanduser("~")
candidates = [
os.path.join(home, ".dotnet", "tools", "ilspycmd"),
os.path.join(home, ".dotnet", "tools", "ilspycmd.exe"),
]
Args:
pattern: The search pattern string
case_sensitive: Whether matching should be case-sensitive
use_regex: Whether to treat pattern as a regular expression
# Windows-specific
if os.name == "nt":
userprofile = os.environ.get("USERPROFILE", "")
if userprofile:
candidates.append(os.path.join(userprofile, ".dotnet", "tools", "ilspycmd.exe"))
Returns:
Tuple of (compiled_pattern, error_message). If use_regex is False,
returns (None, None). If regex compilation fails, returns
(None, error_message). Otherwise returns (pattern, None).
"""
if not use_regex:
return None, None
for candidate in candidates:
if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
return candidate
return None
try:
flags = 0 if case_sensitive else re.IGNORECASE
return re.compile(pattern, flags), None
except re.error as e:
return None, f"Invalid regex pattern: {e}"
async def _check_dotnet_tools() -> dict:
@ -202,7 +191,7 @@ async def _check_dotnet_tools() -> dict:
pass
# Check if ilspycmd is available (check PATH and common locations)
ilspy_path = _find_ilspycmd_path()
ilspy_path = find_ilspycmd_path()
if ilspy_path:
result["ilspycmd_available"] = True
result["ilspycmd_path"] = ilspy_path
@ -380,7 +369,7 @@ async def _try_install_dotnet_sdk(ctx: Context | None = None) -> tuple[bool, str
return False, (
f"❌ Installation failed (exit code {proc.returncode}).\n\n"
f"Command: `{' '.join(cmd_args)}`\n\n"
f"Output:\n```\n{output[-1000:]}\n```\n\n"
f"Output:\n```\n{output[-MAX_ERROR_OUTPUT_CHARS:]}\n```\n\n"
"Try running the command manually with sudo if needed."
)
@ -628,11 +617,21 @@ async def decompile_assembly(
# Use simplified request object (no complex pydantic validation needed)
from .models import DecompileRequest
# Validate language version with helpful error message
try:
lang_ver = LanguageVersion(language_version)
except ValueError:
valid_versions = [v.value for v in LanguageVersion]
return (
f"Invalid language version: '{language_version}'\n\n"
f"Valid options: {', '.join(valid_versions)}"
)
request = DecompileRequest(
assembly_path=validated_path,
output_dir=output_dir,
type_name=type_name,
language_version=LanguageVersion(language_version),
language_version=lang_ver,
create_project=create_project,
show_il_code=show_il_code or show_il_sequence_points,
remove_dead_code=remove_dead_code,
@ -906,7 +905,7 @@ async def search_types(
# Default to search all entity types
if entity_types is None:
entity_types = ["class", "interface", "struct", "delegate", "enum"]
entity_types = ALL_ENTITY_TYPES.copy()
# Convert to EntityType enums
entity_type_enums = []
@ -926,14 +925,9 @@ async def search_types(
return response.error_message or "Failed to list types"
# Compile regex if needed
if use_regex:
try:
flags = 0 if case_sensitive else re.IGNORECASE
search_pattern = re.compile(pattern, flags)
except re.error as e:
return f"Invalid regex pattern: {e}"
else:
search_pattern = None
search_pattern, regex_error = _compile_search_pattern(pattern, case_sensitive, use_regex)
if regex_error:
return regex_error
# Filter types by pattern and namespace
matching_types = []
@ -996,7 +990,7 @@ async def search_strings(
pattern: str,
case_sensitive: bool = False,
use_regex: bool = False,
max_results: int = 100,
max_results: int = DEFAULT_MAX_SEARCH_RESULTS,
ctx: Context | None = None,
) -> str:
"""Search for string literals in assembly code.
@ -1046,12 +1040,9 @@ async def search_strings(
source_code = response.source_code or ""
# Compile regex if needed
if use_regex:
try:
flags = 0 if case_sensitive else re.IGNORECASE
search_pattern = re.compile(pattern, flags)
except re.error as e:
return f"Invalid regex pattern: {e}"
search_pattern, regex_error = _compile_search_pattern(pattern, case_sensitive, use_regex)
if regex_error:
return regex_error
# Search for string literals containing the pattern
# In IL, strings appear as: ldstr "string value"
@ -1090,7 +1081,7 @@ async def search_strings(
matches.append(
{
"line_num": i + 1,
"line": line.strip()[:200], # Truncate long lines
"line": line.strip()[:MAX_LINE_LENGTH], # Truncate long lines
"type": current_type or "Unknown",
"method": current_method,
}
@ -1116,12 +1107,12 @@ async def search_strings(
for type_name, type_matches in sorted(by_type.items()):
content += f"## {type_name}\n\n"
for match in type_matches[:20]: # Limit per type
for match in type_matches[:MAX_MATCHES_PER_TYPE]:
method_info = f" in `{match['method']}()`" if match["method"] else ""
content += f"- Line {match['line_num']}{method_info}:\n"
content += f" ```\n {match['line']}\n ```\n"
if len(type_matches) > 20:
content += f" ... and {len(type_matches) - 20} more matches in this type\n"
if len(type_matches) > MAX_MATCHES_PER_TYPE:
content += f" ... and {len(type_matches) - MAX_MATCHES_PER_TYPE} more matches in this type\n"
content += "\n"
content += "\n**TIP**: Use `decompile_assembly` with `type_name` to see the full context of interesting matches."
@ -1191,14 +1182,9 @@ async def search_methods(
return "No methods found in assembly (or assembly has no metadata)"
# Compile regex if needed
if use_regex:
try:
flags = 0 if case_sensitive else re.IGNORECASE
search_pattern = re.compile(pattern, flags)
except re.error as e:
return f"Invalid regex pattern: {e}"
else:
search_pattern = None
search_pattern, regex_error = _compile_search_pattern(pattern, case_sensitive, use_regex)
if regex_error:
return regex_error
# Filter by pattern
matching_methods = []
@ -1315,14 +1301,9 @@ async def search_fields(
return "No fields found in assembly"
# Compile regex if needed
if use_regex:
try:
flags = 0 if case_sensitive else re.IGNORECASE
search_pattern = re.compile(pattern, flags)
except re.error as e:
return f"Invalid regex pattern: {e}"
else:
search_pattern = None
search_pattern, regex_error = _compile_search_pattern(pattern, case_sensitive, use_regex)
if regex_error:
return regex_error
# Filter by pattern
matching_fields = []
@ -1428,14 +1409,9 @@ async def search_properties(
return "No properties found in assembly"
# Compile regex if needed
if use_regex:
try:
flags = 0 if case_sensitive else re.IGNORECASE
search_pattern = re.compile(pattern, flags)
except re.error as e:
return f"Invalid regex pattern: {e}"
else:
search_pattern = None
search_pattern, regex_error = _compile_search_pattern(pattern, case_sensitive, use_regex)
if regex_error:
return regex_error
# Filter by pattern
matching_props = []

50
src/mcilspy/utils.py Normal file
View File

@ -0,0 +1,50 @@
"""Shared utility functions for mcilspy.
This module contains common utilities used across the codebase to avoid
code duplication and ensure consistent behavior.
"""
import logging
import os
import shutil
logger = logging.getLogger(__name__)
def find_ilspycmd_path() -> str | None:
"""Find ilspycmd executable in PATH or common install locations.
This is the single source of truth for locating the ilspycmd binary.
It checks:
1. Standard PATH (via shutil.which)
2. ~/.dotnet/tools (default location for 'dotnet tool install --global')
3. Platform-specific locations (Windows %USERPROFILE%)
Returns:
Path to ilspycmd executable if found, None otherwise
"""
# Check PATH first (handles both ilspycmd and ilspycmd.exe)
for cmd_name in ["ilspycmd", "ilspycmd.exe"]:
path = shutil.which(cmd_name)
if path:
return path
# Check common dotnet tools locations (not always in PATH for MCP servers)
home = os.path.expanduser("~")
candidates = [
os.path.join(home, ".dotnet", "tools", "ilspycmd"),
os.path.join(home, ".dotnet", "tools", "ilspycmd.exe"),
]
# Windows-specific: also check USERPROFILE if different from ~
if os.name == "nt":
userprofile = os.environ.get("USERPROFILE", "")
if userprofile and userprofile != home:
candidates.append(os.path.join(userprofile, ".dotnet", "tools", "ilspycmd.exe"))
for candidate in candidates:
if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
logger.info(f"Found ilspycmd at {candidate} (not in PATH)")
return candidate
return None