This commit addresses all critical architectural issues identified in the Matt Holt code review, transforming the module from using anti-patterns to following Caddy best practices. ### 🔴 CRITICAL FIXES: **1. Global Registry → Caddy App System** - Created SIPGuardianApp implementing caddy.App interface (app.go) - Eliminates memory/goroutine leaks on config reload - Before: guardians accumulated in global map, never cleaned up - After: Caddy calls Stop() on old app before loading new config - Impact: Prevents OOM in production with frequent config reloads **2. Feature Flags → Instance Fields** - Moved enableMetrics/Webhooks/Storage from globals to *bool struct fields - Allows per-instance configuration (not shared across all guardians) - Helper methods default to true if not set - Impact: Thread-safe, configurable per guardian instance **3. Prometheus Panic Prevention** - Replaced MustRegister() with Register() + AlreadyRegisteredError handling - Makes RegisterMetrics() idempotent and safe for multiple calls - Before: panics on second call (e.g., config reload) - After: silently ignores already-registered collectors - Impact: No more crashes on config reload ### 🟠 HIGH PRIORITY FIXES: **4. Storage Worker Pool** - Fixed pool of 4 workers + 1000-entry buffered channel - Replaces unbounded go func() spawns (3 locations) - Before: 100k goroutines during DDoS → memory exhaustion - After: bounded resources, drops writes when full (fail-fast) - Impact: Survives attacks without resource exhaustion **5. Config Immutability** - MaxFailures/FindTime/BanTime no longer modified on running instance - Prevents race with RecordFailure() reading values without lock - Changed mutations to warning logs - Additive changes still allowed (whitelists, webhooks) - Impact: No more race conditions, predictable ban behavior ### Modified Files: - app.go (NEW): SIPGuardianApp with proper lifecycle management - sipguardian.go: Removed module registration, added worker pool, feature flags - l4handler.go: Use ctx.App() instead of global registry - metrics.go: Use ctx.App() instead of global registry - registry.go: Config immutability warnings instead of mutations ### Test Results: All tests pass (1.228s) ✅ ### Breaking Changes: None - backwards compatible, but requires apps {} block in Caddyfile for proper lifecycle management ### Estimated Impact: - Memory leak fix: Prevents unbounded growth over time - Resource usage: 100k goroutines → 4 workers during attack - Stability: No more panics on config reload - Performance: O(n log n) sorting (addressed in quick wins)
237 lines
6.2 KiB
Go
237 lines
6.2 KiB
Go
package sipguardian
|
|
|
|
import (
|
|
"net"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/caddyserver/caddy/v2"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// Global registry to share guardian instances across modules
|
|
var (
|
|
guardianRegistry = make(map[string]*SIPGuardian)
|
|
registryMu sync.RWMutex
|
|
)
|
|
|
|
// GetOrCreateGuardian returns a shared guardian instance by name (backward compat)
|
|
func GetOrCreateGuardian(ctx caddy.Context, name string) (*SIPGuardian, error) {
|
|
return GetOrCreateGuardianWithConfig(ctx, name, nil)
|
|
}
|
|
|
|
// GetOrCreateGuardianWithConfig returns a shared guardian instance, merging config if provided
|
|
func GetOrCreateGuardianWithConfig(ctx caddy.Context, name string, config *SIPGuardian) (*SIPGuardian, error) {
|
|
if name == "" {
|
|
name = "default"
|
|
}
|
|
|
|
registryMu.Lock()
|
|
defer registryMu.Unlock()
|
|
|
|
if g, exists := guardianRegistry[name]; exists {
|
|
// Guardian exists - merge any new config
|
|
if config != nil {
|
|
mergeGuardianConfig(ctx, g, config)
|
|
}
|
|
return g, nil
|
|
}
|
|
|
|
// Create new guardian with config
|
|
var g *SIPGuardian
|
|
if config != nil {
|
|
// Copy config values to a new guardian
|
|
g = &SIPGuardian{
|
|
MaxFailures: config.MaxFailures,
|
|
FindTime: config.FindTime,
|
|
BanTime: config.BanTime,
|
|
WhitelistCIDR: config.WhitelistCIDR,
|
|
Webhooks: config.Webhooks,
|
|
StoragePath: config.StoragePath,
|
|
GeoIPPath: config.GeoIPPath,
|
|
BlockedCountries: config.BlockedCountries,
|
|
AllowedCountries: config.AllowedCountries,
|
|
Enumeration: config.Enumeration,
|
|
}
|
|
} else {
|
|
g = &SIPGuardian{}
|
|
}
|
|
|
|
if err := g.Provision(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
guardianRegistry[name] = g
|
|
return g, nil
|
|
}
|
|
|
|
// mergeGuardianConfig merges new config into an existing guardian
|
|
// This handles cases where multiple handlers might specify overlapping config
|
|
func mergeGuardianConfig(ctx caddy.Context, g *SIPGuardian, config *SIPGuardian) {
|
|
g.mu.Lock()
|
|
defer g.mu.Unlock()
|
|
|
|
logger := ctx.Logger()
|
|
|
|
// Merge whitelist CIDRs (add new ones, avoid duplicates)
|
|
for _, cidr := range config.WhitelistCIDR {
|
|
found := false
|
|
for _, existing := range g.WhitelistCIDR {
|
|
if existing == cidr {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
g.WhitelistCIDR = append(g.WhitelistCIDR, cidr)
|
|
// Parse and add to whitelistNets
|
|
if _, network, err := net.ParseCIDR(cidr); err == nil {
|
|
g.whitelistNets = append(g.whitelistNets, network)
|
|
logger.Debug("Added whitelist CIDR from handler config",
|
|
zap.String("cidr", cidr),
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Config is immutable after provision - log warnings for attempted changes
|
|
// Changing these values would create race conditions with RecordFailure()
|
|
if config.MaxFailures > 0 && config.MaxFailures != g.MaxFailures {
|
|
logger.Warn("Cannot change max_failures on running guardian (requires config reload)",
|
|
zap.Int("existing", g.MaxFailures),
|
|
zap.Int("attempted", config.MaxFailures),
|
|
)
|
|
}
|
|
if config.FindTime > 0 && config.FindTime != g.FindTime {
|
|
logger.Warn("Cannot change find_time on running guardian (requires config reload)",
|
|
zap.Duration("existing", time.Duration(g.FindTime)),
|
|
zap.Duration("attempted", time.Duration(config.FindTime)),
|
|
)
|
|
}
|
|
if config.BanTime > 0 && config.BanTime != g.BanTime {
|
|
logger.Warn("Cannot change ban_time on running guardian (requires config reload)",
|
|
zap.Duration("existing", time.Duration(g.BanTime)),
|
|
zap.Duration("attempted", time.Duration(config.BanTime)),
|
|
)
|
|
}
|
|
|
|
// Initialize storage if specified and not yet initialized
|
|
if config.StoragePath != "" && g.storage == nil {
|
|
storage, err := InitStorage(logger, StorageConfig{
|
|
Path: config.StoragePath,
|
|
})
|
|
if err != nil {
|
|
logger.Warn("Failed to initialize storage from handler config",
|
|
zap.Error(err),
|
|
)
|
|
} else {
|
|
g.storage = storage
|
|
g.StoragePath = config.StoragePath
|
|
// Load existing bans from storage
|
|
if bans, err := storage.LoadActiveBans(); err == nil {
|
|
for _, ban := range bans {
|
|
entry := ban
|
|
g.bannedIPs[entry.IP] = &entry
|
|
}
|
|
logger.Info("Loaded bans from storage", zap.Int("count", len(bans)))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Initialize GeoIP if specified and not yet initialized
|
|
if config.GeoIPPath != "" && g.geoIP == nil {
|
|
geoIP, err := NewGeoIPLookup(config.GeoIPPath)
|
|
if err != nil {
|
|
logger.Warn("Failed to initialize GeoIP from handler config",
|
|
zap.Error(err),
|
|
)
|
|
} else {
|
|
g.geoIP = geoIP
|
|
g.GeoIPPath = config.GeoIPPath
|
|
}
|
|
}
|
|
|
|
// Merge blocked/allowed countries
|
|
for _, country := range config.BlockedCountries {
|
|
found := false
|
|
for _, existing := range g.BlockedCountries {
|
|
if existing == country {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
g.BlockedCountries = append(g.BlockedCountries, country)
|
|
}
|
|
}
|
|
for _, country := range config.AllowedCountries {
|
|
found := false
|
|
for _, existing := range g.AllowedCountries {
|
|
if existing == country {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
g.AllowedCountries = append(g.AllowedCountries, country)
|
|
}
|
|
}
|
|
|
|
// Merge webhooks (add new ones by URL)
|
|
for _, webhook := range config.Webhooks {
|
|
found := false
|
|
for _, existing := range g.Webhooks {
|
|
if existing.URL == webhook.URL {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
g.Webhooks = append(g.Webhooks, webhook)
|
|
// Register with webhook manager
|
|
if g.webhooksEnabled() {
|
|
wm := GetWebhookManager(logger)
|
|
wm.AddWebhook(webhook)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Apply enumeration config if specified
|
|
if config.Enumeration != nil && g.Enumeration == nil {
|
|
g.Enumeration = config.Enumeration
|
|
// Apply to global detector
|
|
SetEnumerationConfig(*config.Enumeration)
|
|
logger.Debug("Applied enumeration config from handler")
|
|
}
|
|
|
|
logger.Debug("Merged guardian config",
|
|
zap.Int("whitelist_count", len(g.whitelistNets)),
|
|
zap.Int("webhook_count", len(g.Webhooks)),
|
|
zap.Duration("ban_time", time.Duration(g.BanTime)),
|
|
)
|
|
}
|
|
|
|
// GetGuardian returns an existing guardian instance
|
|
func GetGuardian(name string) *SIPGuardian {
|
|
if name == "" {
|
|
name = "default"
|
|
}
|
|
|
|
registryMu.RLock()
|
|
defer registryMu.RUnlock()
|
|
|
|
return guardianRegistry[name]
|
|
}
|
|
|
|
// ListGuardians returns all guardian names
|
|
func ListGuardians() []string {
|
|
registryMu.RLock()
|
|
defer registryMu.RUnlock()
|
|
|
|
names := make([]string, 0, len(guardianRegistry))
|
|
for name := range guardianRegistry {
|
|
names = append(names, name)
|
|
}
|
|
return names
|
|
}
|