mcptesta/examples/templates/stress_template.yaml
Ryan Malloy bea4a2e5d3 Initial release: MCPTesta v1.0.0 🧪
Community-driven testing excellence for the MCP ecosystem

MCPTesta is a comprehensive testing framework for FastMCP servers that brings
scientific rigor and enterprise-grade capabilities to MCP protocol testing.

🎯 Core Features:
• Comprehensive FastMCP server testing with advanced protocol support
• Parallel execution with intelligent dependency resolution
• Flexible CLI and YAML configuration system
• Rich reporting: console, HTML, JSON, and JUnit formats
• Advanced MCP protocol features: notifications, cancellation, progress tracking
• Production-ready Docker environment with caddy-docker-proxy integration

🧪 Advanced Testing Capabilities:
• Multi-transport support (stdio, SSE, WebSocket)
• Authentication testing (Bearer tokens, OAuth flows)
• Stress testing and performance validation
• Memory profiling and leak detection
• CI/CD integration with comprehensive reporting

🎨 Professional Assets:
• Complete logo package with lab experiment theme
• Comprehensive documentation with Diátaxis framework
• Community-focused branding and messaging
• Multi-platform favicon and social media assets

📚 Documentation:
• Getting started tutorials and comprehensive guides
• Complete CLI and YAML reference documentation
• Architecture explanations and testing strategies
• Team collaboration and security compliance guides

🚀 Ready for:
• Community contributions and external development
• Enterprise deployment and production use
• Integration with existing FastMCP workflows
• Extension and customization for specific needs

Built with modern Python practices using uv, FastMCP, and Starlight documentation.
Designed for developers who demand scientific precision in their testing tools.

Repository: https://git.supported.systems/mcp/mcptesta
Documentation: https://mcptesta.l.supported.systems
2025-09-20 03:20:49 -06:00

550 lines
17 KiB
YAML

# MCPTesta Stress Testing Configuration Template
#
# Specialized template for comprehensive stress testing and performance validation.
# Designed to push FastMCP servers to their limits and identify bottlenecks.
#
# Stress Testing Categories:
# - Load testing with various patterns
# - Performance benchmarking
# - Resource exhaustion testing
# - Concurrency and parallelism limits
# - Memory and CPU pressure testing
# - Network stress and bandwidth testing
# Stress testing optimized configuration
config:
parallel_workers: 16 # High concurrency for stress testing
output_directory: "./stress_test_results"
output_format: "all"
global_timeout: 1800 # 30 minutes for long-running stress tests
max_concurrent_operations: 100
# Stress testing specific features
enable_stress_testing: true
enable_memory_profiling: true
enable_performance_profiling: true
enable_resource_monitoring: true
features:
test_notifications: true
test_cancellation: true
test_progress: true
test_sampling: true
# Aggressive retry policy for stress conditions
retry_policy:
max_retries: 1 # Minimal retries to avoid masking stress failures
backoff_factor: 1.0
retry_on_errors: ["ConnectionError"]
# Performance monitoring configuration
monitoring:
enable_real_time_metrics: true
metrics_collection_interval: 1 # Collect metrics every second
performance_thresholds:
max_latency_ms: 5000 # Allow higher latency under stress
max_memory_mb: 2048
max_cpu_percent: 95
resource_sampling_rate: 0.1 # Sample 10% of operations for detailed metrics
# Multiple server instances for distributed load testing
servers:
- name: "stress_target_1"
command: "${STRESS_SERVER_1_CMD:python -m my_fastmcp_server --performance-mode --instance 1}"
transport: "stdio"
timeout: 60
enabled: true
env_vars:
PERFORMANCE_MODE: "true"
MAX_CONNECTIONS: "1000"
BUFFER_SIZE: "65536"
GC_THRESHOLD: "high"
- name: "stress_target_2"
command: "${STRESS_SERVER_2_CMD:python -m my_fastmcp_server --performance-mode --instance 2}"
transport: "stdio"
timeout: 60
enabled: true
env_vars:
PERFORMANCE_MODE: "true"
INSTANCE_ID: "2"
- name: "stress_target_3"
command: "${STRESS_SERVER_3_CMD:python -m my_fastmcp_server --performance-mode --instance 3}"
transport: "stdio"
timeout: 60
enabled: false # Enable for multi-instance testing
# Comprehensive stress testing suites
test_suites:
- name: "Baseline Performance Measurement"
description: "Establish performance baseline before stress testing"
enabled: true
tags: ["baseline", "performance"]
parallel: false # Sequential for accurate baseline
timeout: 300
tests:
- name: "single_operation_latency"
description: "Measure single operation latency"
test_type: "tool_call"
target: "echo"
parameters:
message: "baseline_test"
retry_count: 1000 # Multiple samples for statistical significance
timeout: 120
tags: ["latency", "baseline"]
- name: "throughput_measurement"
description: "Measure maximum throughput"
test_type: "tool_call"
target: "echo"
parameters:
message: "throughput_test"
retry_count: 10000
enable_progress: true
timeout: 300
tags: ["throughput", "baseline"]
- name: "resource_usage_baseline"
description: "Measure baseline resource usage"
test_type: "tool_call"
target: "resource_monitor"
parameters:
duration: 60
metrics: ["cpu", "memory", "io", "network"]
timeout: 90
tags: ["resources", "baseline"]
- name: "Load Pattern Testing"
description: "Test various load patterns and traffic shapes"
enabled: true
tags: ["load", "patterns"]
parallel: true
timeout: 900
tests:
- name: "constant_load_test"
description: "Sustained constant load testing"
test_type: "tool_call"
target: "echo"
parameters:
message: "constant_load_${ITERATION}"
retry_count: 50000 # 50k operations
timeout: 600
tags: ["constant", "sustained"]
- name: "spike_load_test"
description: "Sudden traffic spike testing"
test_type: "tool_call"
target: "spike_handler"
parameters:
spike_factor: 10
spike_duration: 30
baseline_rps: 100
enable_progress: true
timeout: 120
tags: ["spike", "burst"]
- name: "ramp_up_test"
description: "Gradual load ramp-up testing"
test_type: "tool_call"
target: "ramp_processor"
parameters:
start_rps: 1
end_rps: 1000
ramp_duration: 300
hold_duration: 60
enable_progress: true
timeout: 480
tags: ["ramp", "gradual"]
- name: "oscillating_load_test"
description: "Oscillating load pattern testing"
test_type: "tool_call"
target: "oscillator"
parameters:
min_rps: 10
max_rps: 500
period_seconds: 60
cycles: 10
enable_progress: true
timeout: 720
tags: ["oscillating", "variable"]
- name: "Concurrency Stress Testing"
description: "High concurrency and parallelism stress testing"
enabled: true
tags: ["concurrency", "parallel"]
parallel: true
timeout: 600
tests:
- name: "maximum_concurrent_connections"
description: "Test maximum concurrent connection limits"
test_type: "tool_call"
target: "connection_holder"
parameters:
hold_duration: 120
connection_type: "persistent"
retry_count: 1000 # Attempt 1000 concurrent connections
timeout: 180
tags: ["connections", "limits"]
- name: "thread_pool_exhaustion"
description: "Test thread pool exhaustion and recovery"
test_type: "tool_call"
target: "thread_consumer"
parameters:
threads_to_consume: 500
hold_duration: 60
timeout: 120
tags: ["threads", "exhaustion"]
- name: "async_operation_flood"
description: "Flood server with async operations"
test_type: "tool_call"
target: "async_processor"
parameters:
async_operations: 10000
operation_type: "concurrent"
enable_progress: true
timeout: 300
tags: ["async", "flood"]
- name: "request_queue_overflow"
description: "Test request queue overflow handling"
test_type: "tool_call"
target: "queue_filler"
parameters:
queue_size_target: 100000
overflow_strategy: "backpressure"
timeout: 180
tags: ["queue", "overflow"]
- name: "Memory Stress Testing"
description: "Memory-intensive operations and pressure testing"
enabled: true
tags: ["memory", "stress"]
parallel: true
timeout: 800
tests:
- name: "large_payload_processing"
description: "Process increasingly large payloads"
test_type: "tool_call"
target: "payload_processor"
parameters:
payload_sizes: ["1MB", "10MB", "100MB", "500MB"]
processing_type: "memory_intensive"
enable_progress: true
timeout: 600
tags: ["payload", "large"]
- name: "memory_leak_detection"
description: "Long-running test to detect memory leaks"
test_type: "tool_call"
target: "memory_allocator"
parameters:
allocation_pattern: "incremental"
test_duration: 1800 # 30 minutes
leak_detection: true
enable_progress: true
timeout: 2000
tags: ["leaks", "long_running"]
- name: "garbage_collection_pressure"
description: "Create GC pressure and measure impact"
test_type: "tool_call"
target: "gc_stress_tester"
parameters:
allocation_rate: "high"
object_lifetime: "mixed"
gc_frequency_target: 100
timeout: 300
tags: ["gc", "pressure"]
- name: "out_of_memory_recovery"
description: "Test OOM recovery mechanisms"
test_type: "tool_call"
target: "oom_simulator"
parameters:
memory_limit: "512MB"
allocation_strategy: "aggressive"
recovery_validation: true
expected_error: "out of memory"
timeout: 120
tags: ["oom", "recovery"]
- name: "CPU Intensive Stress Testing"
description: "CPU-bound operations and computational stress"
enabled: true
tags: ["cpu", "computational"]
parallel: true
timeout: 600
tests:
- name: "cpu_bound_operations"
description: "CPU-intensive computational tasks"
test_type: "tool_call"
target: "cpu_intensive_task"
parameters:
operation_type: "prime_calculation"
complexity: "high"
iterations: 1000000
retry_count: 10 # Multiple CPU-bound tasks
timeout: 300
tags: ["cpu_bound", "computation"]
- name: "algorithm_complexity_test"
description: "Test algorithmic complexity under load"
test_type: "tool_call"
target: "algorithm_tester"
parameters:
algorithms: ["sorting", "searching", "graph_traversal"]
input_sizes: [1000, 10000, 100000]
complexity_analysis: true
enable_progress: true
timeout: 400
tags: ["algorithms", "complexity"]
- name: "multi_core_utilization"
description: "Test multi-core CPU utilization"
test_type: "tool_call"
target: "parallel_processor"
parameters:
cores_to_utilize: "all"
workload_distribution: "balanced"
cpu_affinity: "round_robin"
timeout: 240
tags: ["multicore", "utilization"]
- name: "I/O Stress Testing"
description: "Intensive I/O operations and bandwidth testing"
enabled: true
tags: ["io", "bandwidth"]
parallel: true
timeout: 700
tests:
- name: "disk_io_stress"
description: "Intensive disk I/O operations"
test_type: "tool_call"
target: "disk_io_tester"
parameters:
io_pattern: "random_write"
file_size: "1GB"
block_size: "4KB"
concurrent_operations: 100
enable_progress: true
timeout: 600
tags: ["disk", "io"]
- name: "network_bandwidth_test"
description: "Network bandwidth saturation testing"
test_type: "tool_call"
target: "bandwidth_tester"
parameters:
data_volume: "10GB"
connection_count: 50
transfer_pattern: "bulk"
enable_progress: true
timeout: 400
tags: ["network", "bandwidth"]
- name: "file_descriptor_exhaustion"
description: "Test file descriptor limit handling"
test_type: "tool_call"
target: "fd_consumer"
parameters:
target_fd_count: 10000
fd_type: "mixed"
cleanup_strategy: "gradual"
timeout: 180
tags: ["file_descriptors", "limits"]
- name: "Error Handling Under Stress"
description: "Error handling and recovery under stress conditions"
enabled: true
tags: ["errors", "recovery", "stress"]
parallel: true
timeout: 400
tests:
- name: "error_flood_test"
description: "Flood server with error-inducing requests"
test_type: "tool_call"
target: "error_generator"
parameters:
error_types: ["invalid_params", "timeout", "resource_unavailable"]
error_rate: 0.5 # 50% error rate
total_operations: 10000
timeout: 300
tags: ["errors", "flood"]
- name: "cascading_failure_stress"
description: "Test cascading failure handling under stress"
test_type: "tool_call"
target: "cascade_simulator"
parameters:
initial_failure_rate: 0.1
cascade_probability: 0.3
recovery_time: 30
timeout: 240
tags: ["cascading", "failures"]
- name: "timeout_storm_test"
description: "Multiple simultaneous timeout scenarios"
test_type: "tool_call"
target: "timeout_generator"
parameters:
timeout_patterns: ["random", "burst", "gradual"]
concurrent_timeouts: 100
timeout: 180
tags: ["timeouts", "storm"]
- name: "Resource Exhaustion Testing"
description: "Systematic resource exhaustion and recovery testing"
enabled: true
tags: ["resources", "exhaustion"]
parallel: true
timeout: 900
tests:
- name: "connection_pool_exhaustion"
description: "Exhaust connection pool resources"
test_type: "tool_call"
target: "connection_exhaustor"
parameters:
pool_size: 100
hold_duration: 300
exhaustion_strategy: "gradual"
timeout: 400
tags: ["connections", "pool"]
- name: "buffer_overflow_test"
description: "Test buffer overflow handling"
test_type: "tool_call"
target: "buffer_tester"
parameters:
buffer_sizes: ["64KB", "1MB", "10MB"]
overflow_data: "random"
safety_mechanisms: true
timeout: 180
tags: ["buffers", "overflow"]
- name: "cache_thrashing_test"
description: "Induce cache thrashing and measure impact"
test_type: "tool_call"
target: "cache_thrasher"
parameters:
cache_size: "100MB"
working_set: "1GB"
access_pattern: "random"
timeout: 300
tags: ["cache", "thrashing"]
- name: "Long Duration Stability Testing"
description: "Extended duration stability and endurance testing"
enabled: true
tags: ["stability", "endurance", "soak"]
parallel: false # Sequential for stability testing
timeout: 7200 # 2 hours
tests:
- name: "soak_test_24h"
description: "24-hour soak test simulation"
test_type: "tool_call"
target: "soak_tester"
parameters:
duration: 3600 # 1 hour for demo (would be 86400 for full 24h)
operations_per_minute: 60
stability_monitoring: true
enable_progress: true
timeout: 3900
tags: ["soak", "24h", "stability"]
- name: "resource_leak_detection"
description: "Long-running resource leak detection"
test_type: "tool_call"
target: "leak_detector"
parameters:
monitoring_duration: 1800 # 30 minutes
leak_types: ["memory", "connections", "file_handles"]
detection_threshold: 0.05 # 5% growth threshold
enable_progress: true
timeout: 2000
tags: ["leaks", "monitoring"]
# Stress testing specific variables
variables:
# Server configurations optimized for stress testing
STRESS_SERVER_1_CMD: "python -m my_fastmcp_server --performance-mode --max-connections 1000 --instance 1"
STRESS_SERVER_2_CMD: "python -m my_fastmcp_server --performance-mode --max-connections 1000 --instance 2"
STRESS_SERVER_3_CMD: "python -m my_fastmcp_server --performance-mode --max-connections 1000 --instance 3"
# Load testing parameters
MAX_RPS: "10000"
STRESS_DURATION: "1800" # 30 minutes
RAMP_DURATION: "300" # 5 minutes
# Resource limits for testing
MAX_MEMORY_MB: "2048"
MAX_CPU_PERCENT: "95"
MAX_CONNECTIONS: "1000"
MAX_FILE_DESCRIPTORS: "10000"
# Payload sizes for testing
SMALL_PAYLOAD: "1KB"
MEDIUM_PAYLOAD: "1MB"
LARGE_PAYLOAD: "100MB"
XLARGE_PAYLOAD: "500MB"
# Test iteration counters
ITERATION: "0"
BATCH_ID: "stress_batch_1"
# Stress Testing Execution Guide:
#
# 1. Baseline Establishment:
# - Always run baseline tests first
# - Document performance metrics before stress testing
# - Establish SLA thresholds
#
# 2. Progressive Load Testing:
# - Start with lower loads and increase gradually
# - Monitor resource utilization continuously
# - Identify breaking points and bottlenecks
#
# 3. Resource Monitoring:
# - Enable all profiling and monitoring features
# - Watch for memory leaks, CPU spikes, I/O bottlenecks
# - Monitor system metrics beyond application metrics
#
# 4. Failure Analysis:
# - Document failure modes and recovery patterns
# - Test error handling under stress conditions
# - Validate graceful degradation mechanisms
#
# 5. Long Duration Testing:
# - Run soak tests to detect stability issues
# - Monitor for gradual resource leaks
# - Validate system behavior over extended periods
#
# Execution Examples:
#
# Full stress test suite:
# mcptesta yaml stress_config.yaml --parallel 16 --timeout 7200
#
# Memory-focused stress testing:
# mcptesta yaml stress_config.yaml --tag memory --enable-memory-profiling
#
# Load pattern testing only:
# mcptesta yaml stress_config.yaml --tag load --tag patterns
#
# Long duration stability testing:
# mcptesta yaml stress_config.yaml --tag stability --tag endurance
#
# CPU stress testing:
# mcptesta yaml stress_config.yaml --tag cpu --tag computational --parallel 8