M8: per-key UPDATE rate limiting (token bucket)
Hamilton M8: a compromised TSIG key — or a misconfigured client retrying forever — must not be able to drive unbounded UPDATE traffic. Each UPDATE costs disk IOPS, a git commit, and a slot in the SOA serial counter (now 9999/day per zone). Without a cap, a few hours of runaway traffic could exhaust the SOA serial counter and brick the zone for the day. Implementation: per-key token bucket in ratelimit.go. Default 100 tokens / 60 seconds. New keys start full so legitimate clients see no delay at boot. Refill is continuous, capped at the burst value. Configurable in Corefile: rate-limit off # disable entirely rate-limit <burst> <period-secs> # e.g., rate-limit 200 60 Enforcement runs in ServeDNS after TSIG verification — a request that fails auth doesn't consume a token (and a forged TSIG can't be used to deny service to a real key holder, since we never reached the rate check). 100/min is well above ACME's needs: a worst-case full-renewal storm across our ~84 zones emits maybe 200 UPDATEs total over several minutes. Anything beyond is suspicious by definition. New tests covering: first-call allowed, burst exhaustion, refill behavior, per-key isolation, refill-cap (no idle-accumulation overflow).
This commit is contained in:
parent
6ab2b6af6d
commit
8d1477350a
20
plugin.go
20
plugin.go
@ -19,6 +19,8 @@ package rfc2136
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/coredns/coredns/plugin"
|
"github.com/coredns/coredns/plugin"
|
||||||
"github.com/miekg/dns"
|
"github.com/miekg/dns"
|
||||||
@ -62,6 +64,11 @@ type RFC2136 struct {
|
|||||||
// zones holds per-zone file handlers, keyed by canonical zone name.
|
// zones holds per-zone file handlers, keyed by canonical zone name.
|
||||||
// Populated in setup; mutexes live inside each zoneFile.
|
// Populated in setup; mutexes live inside each zoneFile.
|
||||||
zones map[string]*zoneFile
|
zones map[string]*zoneFile
|
||||||
|
|
||||||
|
// rateLimit caps UPDATE traffic per TSIG key (Hamilton M8). nil
|
||||||
|
// disables rate limiting (test mode, or insecure deployments).
|
||||||
|
// Populated in setup() once TSIG keys are known.
|
||||||
|
rateLimit *rateLimiter
|
||||||
}
|
}
|
||||||
|
|
||||||
// Name implements plugin.Handler.
|
// Name implements plugin.Handler.
|
||||||
@ -88,6 +95,19 @@ func (p *RFC2136) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
|||||||
_ = w.WriteMsg(resp)
|
_ = w.WriteMsg(resp)
|
||||||
return dns.RcodeRefused, nil
|
return dns.RcodeRefused, nil
|
||||||
}
|
}
|
||||||
|
// Hamilton M8: per-key rate limit. TSIG just authenticates the
|
||||||
|
// sender — it doesn't prove the sender's behavior is sane. A
|
||||||
|
// compromised key or a runaway client must not be able to
|
||||||
|
// exhaust disk/git/serial-counter resources.
|
||||||
|
if p.rateLimit != nil {
|
||||||
|
if tsig := r.IsTsig(); tsig != nil && !p.rateLimit.allow(strings.ToLower(tsig.Hdr.Name), time.Now()) {
|
||||||
|
log.Warningf("UPDATE rate-limited for key %q", tsig.Hdr.Name)
|
||||||
|
resp := new(dns.Msg)
|
||||||
|
resp.SetRcode(r, dns.RcodeRefused)
|
||||||
|
_ = w.WriteMsg(resp)
|
||||||
|
return dns.RcodeRefused, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
return p.handleUpdate(w, r, true)
|
return p.handleUpdate(w, r, true)
|
||||||
}
|
}
|
||||||
return plugin.NextOrFailure(p.Name(), p.Next, ctx, w, r)
|
return plugin.NextOrFailure(p.Name(), p.Next, ctx, w, r)
|
||||||
|
|||||||
82
ratelimit.go
Normal file
82
ratelimit.go
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
package rfc2136
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Per-key token bucket. Hamilton M8: a compromised TSIG key — or a
|
||||||
|
// misconfigured client retrying forever — must not be able to drive
|
||||||
|
// unbounded UPDATE traffic. Each UPDATE costs disk IOPS, a git commit,
|
||||||
|
// and a slot in the SOA serial counter (9999/day per zone). 100
|
||||||
|
// UPDATEs/minute per key is well above any legitimate ACME workflow
|
||||||
|
// (a full renewal storm across our ~84 zones might emit ~200 UPDATEs
|
||||||
|
// total over several minutes); anything beyond is suspicious.
|
||||||
|
const (
|
||||||
|
defaultRateBurst = 100 // max tokens
|
||||||
|
defaultRatePeriod = time.Minute // refill window
|
||||||
|
)
|
||||||
|
|
||||||
|
// rateLimiter is a goroutine-safe per-key token bucket. The zero value
|
||||||
|
// is unusable; construct via newRateLimiter.
|
||||||
|
type rateLimiter struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
buckets map[string]*bucket
|
||||||
|
burst float64 // max tokens
|
||||||
|
period time.Duration // time to fully refill
|
||||||
|
}
|
||||||
|
|
||||||
|
type bucket struct {
|
||||||
|
tokens float64
|
||||||
|
lastRefill time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func newRateLimiter(burst int, period time.Duration) *rateLimiter {
|
||||||
|
if burst <= 0 {
|
||||||
|
burst = defaultRateBurst
|
||||||
|
}
|
||||||
|
if period <= 0 {
|
||||||
|
period = defaultRatePeriod
|
||||||
|
}
|
||||||
|
return &rateLimiter{
|
||||||
|
buckets: make(map[string]*bucket),
|
||||||
|
burst: float64(burst),
|
||||||
|
period: period,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// allow attempts to take one token for `key`. Returns true if a token
|
||||||
|
// was available, false otherwise. New keys start full (burst tokens).
|
||||||
|
//
|
||||||
|
// Refill is continuous: tokens accumulate at burst/period per second.
|
||||||
|
// The bucket caps at burst tokens.
|
||||||
|
func (rl *rateLimiter) allow(key string, now time.Time) bool {
|
||||||
|
rl.mu.Lock()
|
||||||
|
defer rl.mu.Unlock()
|
||||||
|
|
||||||
|
b, ok := rl.buckets[key]
|
||||||
|
if !ok {
|
||||||
|
// First time we see this key — start the bucket full so
|
||||||
|
// legitimate clients don't see refill delays at boot.
|
||||||
|
rl.buckets[key] = &bucket{
|
||||||
|
tokens: rl.burst - 1,
|
||||||
|
lastRefill: now,
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Refill: tokens earned since last access.
|
||||||
|
elapsed := now.Sub(b.lastRefill).Seconds()
|
||||||
|
earned := elapsed * (rl.burst / rl.period.Seconds())
|
||||||
|
b.tokens += earned
|
||||||
|
if b.tokens > rl.burst {
|
||||||
|
b.tokens = rl.burst
|
||||||
|
}
|
||||||
|
b.lastRefill = now
|
||||||
|
|
||||||
|
if b.tokens >= 1.0 {
|
||||||
|
b.tokens -= 1.0
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
83
ratelimit_test.go
Normal file
83
ratelimit_test.go
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
package rfc2136
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRateLimiter_FirstCallAllowed(t *testing.T) {
|
||||||
|
rl := newRateLimiter(5, time.Minute)
|
||||||
|
now := time.Now()
|
||||||
|
if !rl.allow("key-a", now) {
|
||||||
|
t.Errorf("first call for new key must be allowed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRateLimiter_BurstExhausts(t *testing.T) {
|
||||||
|
rl := newRateLimiter(3, time.Minute)
|
||||||
|
now := time.Now()
|
||||||
|
// First 3 calls succeed.
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
if !rl.allow("key-a", now) {
|
||||||
|
t.Fatalf("call %d should be allowed (burst=3)", i+1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 4th immediately after burst should be denied (no time elapsed
|
||||||
|
// for refill).
|
||||||
|
if rl.allow("key-a", now) {
|
||||||
|
t.Errorf("4th call exceeded burst; should be denied")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRateLimiter_RefillsOverTime(t *testing.T) {
|
||||||
|
// burst=2, period=1s → refill rate is 2 tokens/sec.
|
||||||
|
rl := newRateLimiter(2, time.Second)
|
||||||
|
t0 := time.Now()
|
||||||
|
if !rl.allow("k", t0) {
|
||||||
|
t.Fatal("call 1")
|
||||||
|
}
|
||||||
|
if !rl.allow("k", t0) {
|
||||||
|
t.Fatal("call 2")
|
||||||
|
}
|
||||||
|
if rl.allow("k", t0) {
|
||||||
|
t.Fatal("call 3 should be denied; bucket empty")
|
||||||
|
}
|
||||||
|
// Advance time by 500ms — should refill ~1 token.
|
||||||
|
if !rl.allow("k", t0.Add(500*time.Millisecond)) {
|
||||||
|
t.Errorf("expected refill after 500ms")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRateLimiter_PerKeyIsolation(t *testing.T) {
|
||||||
|
rl := newRateLimiter(2, time.Minute)
|
||||||
|
now := time.Now()
|
||||||
|
// Exhaust key-a.
|
||||||
|
rl.allow("key-a", now)
|
||||||
|
rl.allow("key-a", now)
|
||||||
|
if rl.allow("key-a", now) {
|
||||||
|
t.Fatal("key-a still has tokens; setup wrong")
|
||||||
|
}
|
||||||
|
// key-b is independent — must still be allowed.
|
||||||
|
if !rl.allow("key-b", now) {
|
||||||
|
t.Errorf("key-b was rate-limited despite no prior use")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRateLimiter_DoesNotOverflow guards against refill math
|
||||||
|
// accumulating beyond burst (which would let an attacker burst more
|
||||||
|
// after a long idle period than the configured cap).
|
||||||
|
func TestRateLimiter_DoesNotOverflow(t *testing.T) {
|
||||||
|
rl := newRateLimiter(5, time.Second)
|
||||||
|
t0 := time.Now()
|
||||||
|
rl.allow("k", t0) // create bucket
|
||||||
|
// Advance time 1 hour. Refill should cap at burst=5.
|
||||||
|
tFuture := t0.Add(time.Hour)
|
||||||
|
for i := 0; i < 5; i++ {
|
||||||
|
if !rl.allow("k", tFuture) {
|
||||||
|
t.Fatalf("post-idle call %d should be allowed (cap=5)", i+1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if rl.allow("k", tFuture) {
|
||||||
|
t.Errorf("post-idle call 6 should be denied; cap exceeded")
|
||||||
|
}
|
||||||
|
}
|
||||||
37
setup.go
37
setup.go
@ -6,6 +6,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/coredns/caddy"
|
"github.com/coredns/caddy"
|
||||||
"github.com/coredns/coredns/core/dnsserver"
|
"github.com/coredns/coredns/core/dnsserver"
|
||||||
@ -164,6 +165,13 @@ func parse(c *caddy.Controller) (*RFC2136, error) {
|
|||||||
// Per-zone git author overrides. Defaults are applied later.
|
// Per-zone git author overrides. Defaults are applied later.
|
||||||
var gitAuthorName, gitAuthorEmail string
|
var gitAuthorName, gitAuthorEmail string
|
||||||
|
|
||||||
|
// Rate-limit config (Hamilton M8). Defaults are
|
||||||
|
// defaultRateBurst/defaultRatePeriod from ratelimit.go; an explicit
|
||||||
|
// `rate-limit <burst> <period-seconds>` directive overrides.
|
||||||
|
rateBurst := defaultRateBurst
|
||||||
|
ratePeriod := defaultRatePeriod
|
||||||
|
rateLimitEnabled := true
|
||||||
|
|
||||||
for c.Next() {
|
for c.Next() {
|
||||||
args := c.RemainingArgs()
|
args := c.RemainingArgs()
|
||||||
if len(args) < 1 {
|
if len(args) < 1 {
|
||||||
@ -235,6 +243,30 @@ func parse(c *caddy.Controller) (*RFC2136, error) {
|
|||||||
gitAuthorName = gArgs[0]
|
gitAuthorName = gArgs[0]
|
||||||
gitAuthorEmail = gArgs[1]
|
gitAuthorEmail = gArgs[1]
|
||||||
|
|
||||||
|
case "rate-limit":
|
||||||
|
rArgs := c.RemainingArgs()
|
||||||
|
switch len(rArgs) {
|
||||||
|
case 1:
|
||||||
|
if rArgs[0] == "off" || rArgs[0] == "false" || rArgs[0] == "no" {
|
||||||
|
rateLimitEnabled = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return nil, c.Errf("rate-limit single-arg form must be 'off'; for limits use 'rate-limit <burst> <period-seconds>'")
|
||||||
|
case 2:
|
||||||
|
b, err := strconv.ParseUint(rArgs[0], 10, 31)
|
||||||
|
if err != nil || b < 1 {
|
||||||
|
return nil, c.Errf("rate-limit burst must be positive integer, got %q", rArgs[0])
|
||||||
|
}
|
||||||
|
pSec, err := strconv.ParseUint(rArgs[1], 10, 31)
|
||||||
|
if err != nil || pSec < 1 {
|
||||||
|
return nil, c.Errf("rate-limit period must be positive integer seconds, got %q", rArgs[1])
|
||||||
|
}
|
||||||
|
rateBurst = int(b)
|
||||||
|
ratePeriod = time.Duration(pSec) * time.Second
|
||||||
|
default:
|
||||||
|
return nil, c.Errf("rate-limit takes 'off' OR '<burst> <period-seconds>', got %d args", len(rArgs))
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return nil, c.Errf("unknown directive: %s", c.Val())
|
return nil, c.Errf("unknown directive: %s", c.Val())
|
||||||
}
|
}
|
||||||
@ -248,6 +280,11 @@ func parse(c *caddy.Controller) (*RFC2136, error) {
|
|||||||
return nil, c.Err("zones-dir is required")
|
return nil, c.Err("zones-dir is required")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Construct rate limiter if enabled.
|
||||||
|
if rateLimitEnabled {
|
||||||
|
p.rateLimit = newRateLimiter(rateBurst, ratePeriod)
|
||||||
|
}
|
||||||
|
|
||||||
// Build zoneFile handles for each declared zone.
|
// Build zoneFile handles for each declared zone.
|
||||||
p.zones = make(map[string]*zoneFile, len(p.Zones))
|
p.zones = make(map[string]*zoneFile, len(p.Zones))
|
||||||
for _, z := range p.Zones {
|
for _, z := range p.Zones {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user