package rfc2136 import ( "encoding/base64" "fmt" "os" "path/filepath" "strconv" "time" "github.com/coredns/caddy" "github.com/coredns/coredns/core/dnsserver" "github.com/coredns/coredns/plugin" clog "github.com/coredns/coredns/plugin/pkg/log" "github.com/miekg/dns" ) // log is the package logger, scoped so messages are prefixed `[rfc2136]`. var log = clog.NewWithPlugin("rfc2136") func init() { plugin.Register("rfc2136", setup) // SECURITY/OPERATIONAL NOTE — process-global mutation: // // miekg/dns's default MsgAcceptFunc rejects UPDATE opcode messages // with NOTIMP at the wire layer, before any plugin sees them // (acceptfunc.go: "Don't allow dynamic updates, because then the // sections can contain a whole bunch of RRs"). CoreDNS 1.14.3 // constructs its dns.Server instances without exposing a // per-server MsgAcceptFunc to plugins (see // coredns/core/dnsserver/server.go:159 — the dns.Server struct is // hardcoded), so to accept UPDATE opcodes anywhere we must // override the package-level default. We do that here. // // Consequences operators should understand: // // 1. The override is PROCESS-WIDE. Every CoreDNS server block in // this binary will accept UPDATE opcodes at the wire layer, // not just the one(s) where `rfc2136` is configured. Other // blocks will pass the UPDATE through their plugin chains; // since no plugin in those chains handles UPDATE, the request // falls off the end of the chain and CoreDNS returns // FormatError. No state is mutated — but the wire-layer // gatekeeping moved into the plugin chain. // // 2. The actual security boundary is TSIG verification, which // happens in this plugin's ServeDNS (checkTSIG) AND inside // handleUpdate (assertAuthenticated). Defense in depth: any // state-mutating path re-verifies, so a future refactor that // adds a new caller cannot accidentally skip auth. // // 3. If you remove `rfc2136` from your Corefile and reload via // SIGUSR1, this global is NOT restored. Restart the process // to fully revert. // // The mitigation matrix is: (a) loud comment here, (b) startup // INFO log listing the zones this plugin owns, (c) TSIG re-check // inside handleUpdate. The architecturally clean fix would be // upstream support in CoreDNS for per-Config MsgAcceptFunc — when // that lands, delete this whole stanza. dns.DefaultMsgAcceptFunc = msgAcceptFunc } // msgAcceptFunc mirrors miekg/dns's defaultMsgAcceptFunc but additionally // allows OpcodeUpdate. For UPDATE messages, the conservative Ancount/ // Nscount limits in the default function don't apply -- per RFC 2136 // those sections (Prerequisite / Update) can carry many RRs. func msgAcceptFunc(dh dns.Header) dns.MsgAcceptAction { // Responses are silently ignored regardless of opcode (default behaviour). if isResponse := dh.Bits&0x8000 != 0; isResponse { return dns.MsgIgnore } opcode := int(dh.Bits>>11) & 0xF switch opcode { case dns.OpcodeQuery, dns.OpcodeNotify, dns.OpcodeUpdate: // allowed default: return dns.MsgRejectNotImplemented } if dh.Qdcount != 1 { return dns.MsgReject } // UPDATE messages legitimately carry multiple RRs in the // Prerequisite (Ancount) and Update (Nscount) sections -- skip the // "exactly 1" check that the default function applies for queries. if opcode != dns.OpcodeUpdate { if dh.Ancount > 1 { return dns.MsgReject } if dh.Nscount > 1 { return dns.MsgReject } } if dh.Arcount > 2 { return dns.MsgReject } return dns.MsgAccept } // setup is invoked by the CoreDNS plugin registry once per Corefile // `rfc2136` directive. It parses the directive, validates that each // declared zone has a corresponding file in zones-dir, registers // TSIG keys with the underlying dns.Server, and links the handler // into the plugin chain. func setup(c *caddy.Controller) error { p, err := parse(c) if err != nil { return plugin.Error("rfc2136", err) } if err := p.validateZoneFiles(); err != nil { return plugin.Error("rfc2136", err) } cfg := dnsserver.GetConfig(c) // Register TSIG keys with the underlying dns.Server so miekg/dns // auto-verifies incoming signatures. We then just inspect the // result via dns.ResponseWriter.TsigStatus() in our UPDATE handler. if len(p.TSIGKeys) > 0 { if cfg.TsigSecret == nil { cfg.TsigSecret = make(map[string]string) } for name, key := range p.TSIGKeys { cfg.TsigSecret[name] = base64.StdEncoding.EncodeToString(key.Secret) } } cfg.AddPlugin(func(next plugin.Handler) plugin.Handler { p.Next = next return p }) log.Infof("ready: zones=%v keys=%d ttl=%d dir=%q auto-commit=%t", p.Zones, len(p.TSIGKeys), p.TTL, p.ZonesDir, p.AutoCommit) // Surface the global MsgAcceptFunc override for operator audit — // if a sibling server block on this process doesn't expect UPDATE // opcodes to traverse it, the operator should know they will. log.Infof("dns.DefaultMsgAcceptFunc was overridden process-wide to permit OpcodeUpdate; state mutation requires TSIG verification on this plugin's zones=%v only", p.Zones) return nil } // parse reads a single `rfc2136 [...] { ... }` block. // // Grammar: // // rfc2136 [...] { // zones-dir ; required // tsig-key ; may repeat // ttl ; default 60 // auto-commit ; default true // git-author ; optional // } func parse(c *caddy.Controller) (*RFC2136, error) { p := &RFC2136{ TSIGKeys: make(map[string]tsigKey), TTL: DefaultTTL, AutoCommit: true, } // Per-zone git author overrides. Defaults are applied later. var gitAuthorName, gitAuthorEmail string // Rate-limit config (Hamilton M8). Defaults are // defaultRateBurst/defaultRatePeriod from ratelimit.go; an explicit // `rate-limit ` directive overrides. rateBurst := defaultRateBurst ratePeriod := defaultRatePeriod rateLimitEnabled := true for c.Next() { args := c.RemainingArgs() if len(args) < 1 { return nil, c.ArgErr() } for _, z := range args { p.Zones = append(p.Zones, plugin.Host(z).NormalizeExact()...) } for c.NextBlock() { switch c.Val() { case "zones-dir": dArgs := c.RemainingArgs() if len(dArgs) != 1 { return nil, c.ArgErr() } p.ZonesDir = dArgs[0] case "tsig-key": kArgs := c.RemainingArgs() if len(kArgs) != 3 { return nil, c.Errf("tsig-key requires 3 args (name algorithm secret), got %d", len(kArgs)) } keyName := canonicalKeyName(kArgs[0]) algo, err := parseTSIGAlgorithm(kArgs[1]) if err != nil { return nil, c.Err(err.Error()) } secret, err := decodeTSIGSecret(kArgs[2]) if err != nil { return nil, c.Errf("tsig-key %q: %v", keyName, err) } if _, exists := p.TSIGKeys[keyName]; exists { return nil, c.Errf("duplicate tsig-key %q", keyName) } p.TSIGKeys[keyName] = tsigKey{Algorithm: algo, Secret: secret} case "ttl": tArgs := c.RemainingArgs() if len(tArgs) != 1 { return nil, c.ArgErr() } ttl, err := strconv.ParseUint(tArgs[0], 10, 32) if err != nil { return nil, c.Errf("ttl must be a non-negative integer: %v", err) } p.TTL = uint32(ttl) case "auto-commit": aArgs := c.RemainingArgs() if len(aArgs) != 1 { return nil, c.ArgErr() } switch aArgs[0] { case "true", "yes", "on": p.AutoCommit = true case "false", "no", "off": p.AutoCommit = false default: return nil, c.Errf("auto-commit must be true|false, got %q", aArgs[0]) } case "git-author": gArgs := c.RemainingArgs() if len(gArgs) != 2 { return nil, c.Errf("git-author requires 2 args (name email), got %d", len(gArgs)) } gitAuthorName = gArgs[0] gitAuthorEmail = gArgs[1] case "notify": nArgs := c.RemainingArgs() if len(nArgs) < 1 { return nil, c.Errf("notify requires at least one secondary (host or host:port)") } p.NotifyTargets = append(p.NotifyTargets, nArgs...) case "rate-limit": rArgs := c.RemainingArgs() switch len(rArgs) { case 1: if rArgs[0] == "off" || rArgs[0] == "false" || rArgs[0] == "no" { rateLimitEnabled = false break } return nil, c.Errf("rate-limit single-arg form must be 'off'; for limits use 'rate-limit '") case 2: b, err := strconv.ParseUint(rArgs[0], 10, 31) if err != nil || b < 1 { return nil, c.Errf("rate-limit burst must be positive integer, got %q", rArgs[0]) } pSec, err := strconv.ParseUint(rArgs[1], 10, 31) if err != nil || pSec < 1 { return nil, c.Errf("rate-limit period must be positive integer seconds, got %q", rArgs[1]) } rateBurst = int(b) ratePeriod = time.Duration(pSec) * time.Second default: return nil, c.Errf("rate-limit takes 'off' OR ' ', got %d args", len(rArgs)) } default: return nil, c.Errf("unknown directive: %s", c.Val()) } } } if len(p.Zones) == 0 { return nil, c.Err("at least one zone must be specified") } if p.ZonesDir == "" { return nil, c.Err("zones-dir is required") } // Construct rate limiter if enabled. if rateLimitEnabled { p.rateLimit = newRateLimiter(rateBurst, ratePeriod) } // Build zoneFile handles for each declared zone. p.zones = make(map[string]*zoneFile, len(p.Zones)) for _, z := range p.Zones { // Trailing dot → filename. supported.systems. → supported.systems.zone stem := z if l := len(stem); l > 0 && stem[l-1] == '.' { stem = stem[:l-1] } path := filepath.Join(p.ZonesDir, stem+".zone") zf := openZoneFile(path, z) zf.AutoCommit = p.AutoCommit if gitAuthorName != "" { zf.GitAuthorName = gitAuthorName } if gitAuthorEmail != "" { zf.GitAuthorEmail = gitAuthorEmail } p.zones[z] = zf } return p, nil } // validateZoneFiles ensures every configured zone has an accessible // AND parseable file on disk at the expected path. Catches both typos // (file missing) and corrupt zone content at CoreDNS startup rather // than on the first UPDATE — the operator gets an immediate signal // instead of discovering the breakage minutes later when ACME fires. // // Hamilton M4: the previous version only stat()'d the file. A zone // with a syntax error sailed through startup, then the first UPDATE // returned SERVFAIL with no startup-time signal. We now run the same // loadRRs + assertSingleApexSOA path the UPDATE handler uses, so any // parse-time or SOA-invariant failure surfaces at startup. func (p *RFC2136) validateZoneFiles() error { for zone, zf := range p.zones { st, err := os.Stat(zf.Path) if err != nil { return fmt.Errorf("zone %q: file not accessible at %s: %w", zone, zf.Path, err) } if st.IsDir() { return fmt.Errorf("zone %q: %s is a directory, expected a regular file", zone, zf.Path) } if _, _, err := zf.loadRRs(); err != nil { return fmt.Errorf("zone %q at %s: %w", zone, zf.Path, err) } } return nil }