llm-fusion-mcp/health-check.sh
Ryan Malloy c335ba0e1e
Some checks are pending
🚀 LLM Fusion MCP - CI/CD Pipeline / 🔍 Code Quality & Testing (3.10) (push) Waiting to run
🚀 LLM Fusion MCP - CI/CD Pipeline / 🔍 Code Quality & Testing (3.11) (push) Waiting to run
🚀 LLM Fusion MCP - CI/CD Pipeline / 🔍 Code Quality & Testing (3.12) (push) Waiting to run
🚀 LLM Fusion MCP - CI/CD Pipeline / 🛡️ Security Scanning (push) Blocked by required conditions
🚀 LLM Fusion MCP - CI/CD Pipeline / 🐳 Docker Build & Push (push) Blocked by required conditions
🚀 LLM Fusion MCP - CI/CD Pipeline / 🎉 Create Release (push) Blocked by required conditions
🚀 LLM Fusion MCP - CI/CD Pipeline / 📢 Deployment Notification (push) Blocked by required conditions
Initial commit: LLM Fusion MCP Server
- Unified access to 4 major LLM providers (Gemini, OpenAI, Anthropic, Grok)
- Real-time streaming support across all providers
- Multimodal capabilities (text, images, audio)
- Intelligent document processing with smart chunking
- Production-ready with health monitoring and error handling
- Full OpenAI ecosystem integration (Assistants, DALL-E, Whisper)
- Vector embeddings and semantic similarity
- Session-based API key management
- Built with FastMCP and modern Python tooling

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-05 05:47:51 -06:00

224 lines
5.8 KiB
Bash

#!/bin/bash
# LLM Fusion MCP - Health Check & Monitoring Script
set -e
# Configuration
SERVICE_NAME="llm-fusion-mcp"
HEALTH_ENDPOINT="http://localhost:8000/health"
TIMEOUT=10
CHECK_INTERVAL=30
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_status() {
echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Health check function
check_health() {
local endpoint=$1
local response
local http_code
response=$(curl -s -w "HTTPSTATUS:%{http_code}" --max-time $TIMEOUT "$endpoint" 2>/dev/null || echo "HTTPSTATUS:000")
http_code=$(echo "$response" | grep -o "HTTPSTATUS:[0-9]*" | cut -d: -f2)
if [ "$http_code" = "200" ]; then
return 0
else
return 1
fi
}
# Docker container check
check_container() {
if docker ps --filter "name=${SERVICE_NAME}" --filter "status=running" | grep -q "$SERVICE_NAME"; then
return 0
else
return 1
fi
}
# System resource check
check_resources() {
local container_id
container_id=$(docker ps -q --filter "name=${SERVICE_NAME}")
if [ -n "$container_id" ]; then
local stats
stats=$(docker stats --no-stream --format "table {{.CPUPerc}}\t{{.MemUsage}}" "$container_id" 2>/dev/null | tail -n 1)
if [ -n "$stats" ]; then
local cpu_usage memory_usage
cpu_usage=$(echo "$stats" | awk '{print $1}' | tr -d '%')
memory_usage=$(echo "$stats" | awk '{print $2}')
echo "CPU: ${cpu_usage}%, Memory: ${memory_usage}"
# Alert if CPU > 80%
if (( $(echo "$cpu_usage > 80" | bc -l) )); then
print_warning "High CPU usage: ${cpu_usage}%"
fi
fi
fi
}
# Provider connectivity check
check_providers() {
local response
response=$(curl -s --max-time $TIMEOUT "${HEALTH_ENDPOINT}/providers" 2>/dev/null || echo "{}")
if echo "$response" | grep -q "\"success\":true"; then
local provider_count
provider_count=$(echo "$response" | grep -o "\"configured\":true" | wc -l)
echo "Active providers: $provider_count"
else
print_warning "Provider health check failed"
fi
}
# Main monitoring function
run_monitor() {
print_status "Starting LLM Fusion MCP health monitoring..."
while true; do
echo ""
print_status "=== Health Check Report ==="
# Container status
if check_container; then
print_success "✅ Container is running"
# Resource usage
local resource_info
resource_info=$(check_resources)
if [ -n "$resource_info" ]; then
print_status "📊 Resource usage: $resource_info"
fi
else
print_error "❌ Container is not running"
print_status "Attempting to restart..."
docker-compose restart "$SERVICE_NAME" || print_error "Failed to restart container"
sleep 10
continue
fi
# Health endpoint check
if check_health "$HEALTH_ENDPOINT"; then
print_success "✅ Health endpoint responding"
else
print_error "❌ Health endpoint not responding"
fi
# Provider check
print_status "🔍 Checking AI providers..."
check_providers
# Disk space check
local disk_usage
disk_usage=$(df -h . | tail -1 | awk '{print $5}' | tr -d '%')
if [ "$disk_usage" -gt 85 ]; then
print_warning "⚠️ Low disk space: ${disk_usage}% used"
else
print_status "💾 Disk usage: ${disk_usage}%"
fi
# Log file size check
if [ -d "./logs" ]; then
local log_size
log_size=$(du -sh ./logs 2>/dev/null | cut -f1 || echo "N/A")
print_status "📝 Log directory size: $log_size"
fi
print_status "Next check in ${CHECK_INTERVAL} seconds..."
sleep $CHECK_INTERVAL
done
}
# One-time health check
run_check() {
print_status "Running one-time health check..."
# Container check
if check_container; then
print_success "✅ Container Status: Running"
else
print_error "❌ Container Status: Not Running"
return 1
fi
# Health endpoint
if check_health "$HEALTH_ENDPOINT"; then
print_success "✅ Health Endpoint: OK"
else
print_error "❌ Health Endpoint: Failed"
return 1
fi
# Resource usage
local resource_info
resource_info=$(check_resources)
if [ -n "$resource_info" ]; then
print_status "📊 Resource Usage: $resource_info"
fi
# Provider check
check_providers
print_success "🎉 All checks passed!"
return 0
}
# Usage information
show_usage() {
echo "LLM Fusion MCP Health Check Script"
echo ""
echo "Usage: $0 [COMMAND]"
echo ""
echo "Commands:"
echo " check Run one-time health check"
echo " monitor Start continuous monitoring"
echo " help Show this help message"
echo ""
echo "Environment Variables:"
echo " HEALTH_ENDPOINT Health check URL (default: http://localhost:8000/health)"
echo " CHECK_INTERVAL Monitoring interval in seconds (default: 30)"
echo " TIMEOUT HTTP timeout in seconds (default: 10)"
}
# Main script logic
case "${1:-check}" in
"monitor")
run_monitor
;;
"check")
run_check
;;
"help"|"-h"|"--help")
show_usage
;;
*)
print_error "Unknown command: $1"
show_usage
exit 1
;;
esac