Hamilton M8: a compromised TSIG key — or a misconfigured client retrying forever — must not be able to drive unbounded UPDATE traffic. Each UPDATE costs disk IOPS, a git commit, and a slot in the SOA serial counter (now 9999/day per zone). Without a cap, a few hours of runaway traffic could exhaust the SOA serial counter and brick the zone for the day. Implementation: per-key token bucket in ratelimit.go. Default 100 tokens / 60 seconds. New keys start full so legitimate clients see no delay at boot. Refill is continuous, capped at the burst value. Configurable in Corefile: rate-limit off # disable entirely rate-limit <burst> <period-secs> # e.g., rate-limit 200 60 Enforcement runs in ServeDNS after TSIG verification — a request that fails auth doesn't consume a token (and a forged TSIG can't be used to deny service to a real key holder, since we never reached the rate check). 100/min is well above ACME's needs: a worst-case full-renewal storm across our ~84 zones emits maybe 200 UPDATEs total over several minutes. Anything beyond is suspicious by definition. New tests covering: first-call allowed, burst exhaustion, refill behavior, per-key isolation, refill-cap (no idle-accumulation overflow).
83 lines
2.1 KiB
Go
83 lines
2.1 KiB
Go
package rfc2136
|
|
|
|
import (
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// Per-key token bucket. Hamilton M8: a compromised TSIG key — or a
|
|
// misconfigured client retrying forever — must not be able to drive
|
|
// unbounded UPDATE traffic. Each UPDATE costs disk IOPS, a git commit,
|
|
// and a slot in the SOA serial counter (9999/day per zone). 100
|
|
// UPDATEs/minute per key is well above any legitimate ACME workflow
|
|
// (a full renewal storm across our ~84 zones might emit ~200 UPDATEs
|
|
// total over several minutes); anything beyond is suspicious.
|
|
const (
|
|
defaultRateBurst = 100 // max tokens
|
|
defaultRatePeriod = time.Minute // refill window
|
|
)
|
|
|
|
// rateLimiter is a goroutine-safe per-key token bucket. The zero value
|
|
// is unusable; construct via newRateLimiter.
|
|
type rateLimiter struct {
|
|
mu sync.Mutex
|
|
buckets map[string]*bucket
|
|
burst float64 // max tokens
|
|
period time.Duration // time to fully refill
|
|
}
|
|
|
|
type bucket struct {
|
|
tokens float64
|
|
lastRefill time.Time
|
|
}
|
|
|
|
func newRateLimiter(burst int, period time.Duration) *rateLimiter {
|
|
if burst <= 0 {
|
|
burst = defaultRateBurst
|
|
}
|
|
if period <= 0 {
|
|
period = defaultRatePeriod
|
|
}
|
|
return &rateLimiter{
|
|
buckets: make(map[string]*bucket),
|
|
burst: float64(burst),
|
|
period: period,
|
|
}
|
|
}
|
|
|
|
// allow attempts to take one token for `key`. Returns true if a token
|
|
// was available, false otherwise. New keys start full (burst tokens).
|
|
//
|
|
// Refill is continuous: tokens accumulate at burst/period per second.
|
|
// The bucket caps at burst tokens.
|
|
func (rl *rateLimiter) allow(key string, now time.Time) bool {
|
|
rl.mu.Lock()
|
|
defer rl.mu.Unlock()
|
|
|
|
b, ok := rl.buckets[key]
|
|
if !ok {
|
|
// First time we see this key — start the bucket full so
|
|
// legitimate clients don't see refill delays at boot.
|
|
rl.buckets[key] = &bucket{
|
|
tokens: rl.burst - 1,
|
|
lastRefill: now,
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Refill: tokens earned since last access.
|
|
elapsed := now.Sub(b.lastRefill).Seconds()
|
|
earned := elapsed * (rl.burst / rl.period.Seconds())
|
|
b.tokens += earned
|
|
if b.tokens > rl.burst {
|
|
b.tokens = rl.burst
|
|
}
|
|
b.lastRefill = now
|
|
|
|
if b.tokens >= 1.0 {
|
|
b.tokens -= 1.0
|
|
return true
|
|
}
|
|
return false
|
|
}
|