Ryan Malloy ca63620316 Major architectural refactor: eliminate global state and resource leaks
This commit addresses all critical architectural issues identified in the
Matt Holt code review, transforming the module from using anti-patterns
to following Caddy best practices.

### 🔴 CRITICAL FIXES:

**1. Global Registry → Caddy App System**
- Created SIPGuardianApp implementing caddy.App interface (app.go)
- Eliminates memory/goroutine leaks on config reload
- Before: guardians accumulated in global map, never cleaned up
- After: Caddy calls Stop() on old app before loading new config
- Impact: Prevents OOM in production with frequent config reloads

**2. Feature Flags → Instance Fields**
- Moved enableMetrics/Webhooks/Storage from globals to *bool struct fields
- Allows per-instance configuration (not shared across all guardians)
- Helper methods default to true if not set
- Impact: Thread-safe, configurable per guardian instance

**3. Prometheus Panic Prevention**
- Replaced MustRegister() with Register() + AlreadyRegisteredError handling
- Makes RegisterMetrics() idempotent and safe for multiple calls
- Before: panics on second call (e.g., config reload)
- After: silently ignores already-registered collectors
- Impact: No more crashes on config reload

### 🟠 HIGH PRIORITY FIXES:

**4. Storage Worker Pool**
- Fixed pool of 4 workers + 1000-entry buffered channel
- Replaces unbounded go func() spawns (3 locations)
- Before: 100k goroutines during DDoS → memory exhaustion
- After: bounded resources, drops writes when full (fail-fast)
- Impact: Survives attacks without resource exhaustion

**5. Config Immutability**
- MaxFailures/FindTime/BanTime no longer modified on running instance
- Prevents race with RecordFailure() reading values without lock
- Changed mutations to warning logs
- Additive changes still allowed (whitelists, webhooks)
- Impact: No more race conditions, predictable ban behavior

### Modified Files:
- app.go (NEW): SIPGuardianApp with proper lifecycle management
- sipguardian.go: Removed module registration, added worker pool, feature flags
- l4handler.go: Use ctx.App() instead of global registry
- metrics.go: Use ctx.App() instead of global registry
- registry.go: Config immutability warnings instead of mutations

### Test Results:
All tests pass (1.228s) 

### Breaking Changes:
None - backwards compatible, but requires apps {} block in Caddyfile
for proper lifecycle management

### Estimated Impact:
- Memory leak fix: Prevents unbounded growth over time
- Resource usage: 100k goroutines → 4 workers during attack
- Stability: No more panics on config reload
- Performance: O(n log n) sorting (addressed in quick wins)
2025-12-24 23:19:38 -07:00

153 lines
3.8 KiB
Go

package sipguardian
import (
"fmt"
"sync"
"github.com/caddyserver/caddy/v2"
"go.uber.org/zap"
)
func init() {
caddy.RegisterModule(SIPGuardianApp{})
}
// SIPGuardianApp is a Caddy app that manages SIPGuardian instances
// This replaces the global registry pattern with proper Caddy lifecycle management
type SIPGuardianApp struct {
guardians map[string]*SIPGuardian
mu sync.RWMutex
logger *zap.Logger
}
// CaddyModule returns the Caddy module information
func (SIPGuardianApp) CaddyModule() caddy.ModuleInfo {
return caddy.ModuleInfo{
ID: "sip_guardian",
New: func() caddy.Module { return &SIPGuardianApp{} },
}
}
// Provision sets up the app
func (app *SIPGuardianApp) Provision(ctx caddy.Context) error {
app.guardians = make(map[string]*SIPGuardian)
app.logger = ctx.Logger()
app.logger.Debug("SIP Guardian app provisioned")
return nil
}
// Start starts the app (no-op for us, guardians start when created)
func (app *SIPGuardianApp) Start() error {
app.logger.Info("SIP Guardian app started")
return nil
}
// Stop stops the app and cleans up all guardians
func (app *SIPGuardianApp) Stop() error {
app.mu.Lock()
defer app.mu.Unlock()
app.logger.Info("SIP Guardian app stopping", zap.Int("guardians", len(app.guardians)))
// Cleanup all guardians
for name, guardian := range app.guardians {
app.logger.Debug("Cleaning up guardian", zap.String("name", name))
if err := guardian.Cleanup(); err != nil {
app.logger.Error("Error cleaning up guardian",
zap.String("name", name),
zap.Error(err),
)
}
}
// Clear the map
app.guardians = make(map[string]*SIPGuardian)
app.logger.Debug("SIP Guardian app stopped")
return nil
}
// GetOrCreateGuardian returns a shared guardian instance, creating it if needed
func (app *SIPGuardianApp) GetOrCreateGuardian(ctx caddy.Context, name string, config *SIPGuardian) (*SIPGuardian, error) {
if name == "" {
name = "default"
}
app.mu.Lock()
defer app.mu.Unlock()
if g, exists := app.guardians[name]; exists {
// Guardian exists - merge any new config (additive only)
if config != nil {
mergeGuardianConfig(ctx, g, config)
}
return g, nil
}
// Create new guardian with config
var g *SIPGuardian
if config != nil {
// Copy config values to a new guardian
g = &SIPGuardian{
MaxFailures: config.MaxFailures,
FindTime: config.FindTime,
BanTime: config.BanTime,
WhitelistCIDR: config.WhitelistCIDR,
WhitelistHosts: config.WhitelistHosts,
WhitelistSRV: config.WhitelistSRV,
DNSRefresh: config.DNSRefresh,
Webhooks: config.Webhooks,
StoragePath: config.StoragePath,
GeoIPPath: config.GeoIPPath,
BlockedCountries: config.BlockedCountries,
AllowedCountries: config.AllowedCountries,
Enumeration: config.Enumeration,
Validation: config.Validation,
EnableMetrics: config.EnableMetrics,
EnableWebhooks: config.EnableWebhooks,
EnableStorage: config.EnableStorage,
}
} else {
g = &SIPGuardian{}
}
if err := g.Provision(ctx); err != nil {
return nil, fmt.Errorf("failed to provision guardian: %w", err)
}
app.guardians[name] = g
app.logger.Debug("Guardian created", zap.String("name", name))
return g, nil
}
// GetGuardian returns an existing guardian instance (or nil if not found)
func (app *SIPGuardianApp) GetGuardian(name string) *SIPGuardian {
if name == "" {
name = "default"
}
app.mu.RLock()
defer app.mu.RUnlock()
return app.guardians[name]
}
// ListGuardians returns all guardian names
func (app *SIPGuardianApp) ListGuardians() []string {
app.mu.RLock()
defer app.mu.RUnlock()
names := make([]string, 0, len(app.guardians))
for name := range app.guardians {
names = append(names, name)
}
return names
}
// Interface guards
var (
_ caddy.App = (*SIPGuardianApp)(nil)
_ caddy.Provisioner = (*SIPGuardianApp)(nil)
)