coredns-rfc2136/setup.go
Ryan Malloy 6ab2b6af6d H6/H7/M3/M4/M7: hardening + behavior documentation
H6 — TSIG replay-window test. New TestCheckTSIG_BadStatus_Refused
verifies that when miekg/dns reports a TSIG verification failure via
ResponseWriter.TsigStatus (the channel for fudge-window violations,
bad MACs, expired timestamps), our plugin refuses. The fudge tolerance
itself is miekg/dns's default (300s); documented in tsig.go so
operators know the dependency.

H7 — No-op UPDATE policy: documented explicitly in update.go. We do
NOT bump the SOA on a no-op (deduped) UPDATE — forcing downstream
secondaries to AXFR identical content wastes bandwidth and contradicts
RFC 2136's intent. Callers wanting to force a serial bump can send a
throwaway add+delete pair (touch-UPDATE pattern).

M3 — Delete-by-exact-match ignores TTL and class per RFC 2136 §2.5.4.
The previous rr.String() comparison included TTL, so an UPDATE with
CLASS=NONE TTL=0 (the protocol-required encoding for a delete) failed
to match stored RRs at CLASS=IN with non-zero TTL. Now we normalize
both sides (TTL=0, class=IN) before invoking dns.IsDuplicate.

M4 — validateZoneFiles now actually parses each zone at startup
(loadRRs invocation). Previously it only stat()'d the file; corrupt
zone content sailed through startup and produced SERVFAIL on the first
UPDATE with no startup-time signal. Combined with H3+H4's invariant
checks, this turns silent zone corruption into immediate startup
failure.

M7 — Commit-message sanitization. RR names are attacker-controlled
(TSIG only authenticates the sender; the payload is hostile by
default). Control characters in commit messages could inject newlines
into git log or ANSI sequences into downstream log renderers. New
sanitizeForCommitMessage escapes \n, \r, \t, and other C0 controls.

New tests:
- TestCheckTSIG_BadStatus_Refused (H6)
- TestUpdate_DeleteRR_IgnoresTTL (M3)
- TestSanitizeForCommitMessage (M7)
2026-05-22 21:29:13 -06:00

300 lines
9.5 KiB
Go

package rfc2136
import (
"encoding/base64"
"fmt"
"os"
"path/filepath"
"strconv"
"github.com/coredns/caddy"
"github.com/coredns/coredns/core/dnsserver"
"github.com/coredns/coredns/plugin"
clog "github.com/coredns/coredns/plugin/pkg/log"
"github.com/miekg/dns"
)
// log is the package logger, scoped so messages are prefixed `[rfc2136]`.
var log = clog.NewWithPlugin("rfc2136")
func init() {
plugin.Register("rfc2136", setup)
// SECURITY/OPERATIONAL NOTE — process-global mutation:
//
// miekg/dns's default MsgAcceptFunc rejects UPDATE opcode messages
// with NOTIMP at the wire layer, before any plugin sees them
// (acceptfunc.go: "Don't allow dynamic updates, because then the
// sections can contain a whole bunch of RRs"). CoreDNS 1.14.3
// constructs its dns.Server instances without exposing a
// per-server MsgAcceptFunc to plugins (see
// coredns/core/dnsserver/server.go:159 — the dns.Server struct is
// hardcoded), so to accept UPDATE opcodes anywhere we must
// override the package-level default. We do that here.
//
// Consequences operators should understand:
//
// 1. The override is PROCESS-WIDE. Every CoreDNS server block in
// this binary will accept UPDATE opcodes at the wire layer,
// not just the one(s) where `rfc2136` is configured. Other
// blocks will pass the UPDATE through their plugin chains;
// since no plugin in those chains handles UPDATE, the request
// falls off the end of the chain and CoreDNS returns
// FormatError. No state is mutated — but the wire-layer
// gatekeeping moved into the plugin chain.
//
// 2. The actual security boundary is TSIG verification, which
// happens in this plugin's ServeDNS (checkTSIG) AND inside
// handleUpdate (assertAuthenticated). Defense in depth: any
// state-mutating path re-verifies, so a future refactor that
// adds a new caller cannot accidentally skip auth.
//
// 3. If you remove `rfc2136` from your Corefile and reload via
// SIGUSR1, this global is NOT restored. Restart the process
// to fully revert.
//
// The mitigation matrix is: (a) loud comment here, (b) startup
// INFO log listing the zones this plugin owns, (c) TSIG re-check
// inside handleUpdate. The architecturally clean fix would be
// upstream support in CoreDNS for per-Config MsgAcceptFunc — when
// that lands, delete this whole stanza.
dns.DefaultMsgAcceptFunc = msgAcceptFunc
}
// msgAcceptFunc mirrors miekg/dns's defaultMsgAcceptFunc but additionally
// allows OpcodeUpdate. For UPDATE messages, the conservative Ancount/
// Nscount limits in the default function don't apply -- per RFC 2136
// those sections (Prerequisite / Update) can carry many RRs.
func msgAcceptFunc(dh dns.Header) dns.MsgAcceptAction {
// Responses are silently ignored regardless of opcode (default behaviour).
if isResponse := dh.Bits&0x8000 != 0; isResponse {
return dns.MsgIgnore
}
opcode := int(dh.Bits>>11) & 0xF
switch opcode {
case dns.OpcodeQuery, dns.OpcodeNotify, dns.OpcodeUpdate:
// allowed
default:
return dns.MsgRejectNotImplemented
}
if dh.Qdcount != 1 {
return dns.MsgReject
}
// UPDATE messages legitimately carry multiple RRs in the
// Prerequisite (Ancount) and Update (Nscount) sections -- skip the
// "exactly 1" check that the default function applies for queries.
if opcode != dns.OpcodeUpdate {
if dh.Ancount > 1 {
return dns.MsgReject
}
if dh.Nscount > 1 {
return dns.MsgReject
}
}
if dh.Arcount > 2 {
return dns.MsgReject
}
return dns.MsgAccept
}
// setup is invoked by the CoreDNS plugin registry once per Corefile
// `rfc2136` directive. It parses the directive, validates that each
// declared zone has a corresponding file in zones-dir, registers
// TSIG keys with the underlying dns.Server, and links the handler
// into the plugin chain.
func setup(c *caddy.Controller) error {
p, err := parse(c)
if err != nil {
return plugin.Error("rfc2136", err)
}
if err := p.validateZoneFiles(); err != nil {
return plugin.Error("rfc2136", err)
}
cfg := dnsserver.GetConfig(c)
// Register TSIG keys with the underlying dns.Server so miekg/dns
// auto-verifies incoming signatures. We then just inspect the
// result via dns.ResponseWriter.TsigStatus() in our UPDATE handler.
if len(p.TSIGKeys) > 0 {
if cfg.TsigSecret == nil {
cfg.TsigSecret = make(map[string]string)
}
for name, key := range p.TSIGKeys {
cfg.TsigSecret[name] = base64.StdEncoding.EncodeToString(key.Secret)
}
}
cfg.AddPlugin(func(next plugin.Handler) plugin.Handler {
p.Next = next
return p
})
log.Infof("ready: zones=%v keys=%d ttl=%d dir=%q auto-commit=%t",
p.Zones, len(p.TSIGKeys), p.TTL, p.ZonesDir, p.AutoCommit)
// Surface the global MsgAcceptFunc override for operator audit —
// if a sibling server block on this process doesn't expect UPDATE
// opcodes to traverse it, the operator should know they will.
log.Infof("dns.DefaultMsgAcceptFunc was overridden process-wide to permit OpcodeUpdate; state mutation requires TSIG verification on this plugin's zones=%v only", p.Zones)
return nil
}
// parse reads a single `rfc2136 <zone> [<zone>...] { ... }` block.
//
// Grammar:
//
// rfc2136 <zone> [<zone>...] {
// zones-dir <path> ; required
// tsig-key <name> <algorithm> <base64-secret> ; may repeat
// ttl <seconds> ; default 60
// auto-commit <true|false> ; default true
// git-author <name> <email> ; optional
// }
func parse(c *caddy.Controller) (*RFC2136, error) {
p := &RFC2136{
TSIGKeys: make(map[string]tsigKey),
TTL: DefaultTTL,
AutoCommit: true,
}
// Per-zone git author overrides. Defaults are applied later.
var gitAuthorName, gitAuthorEmail string
for c.Next() {
args := c.RemainingArgs()
if len(args) < 1 {
return nil, c.ArgErr()
}
for _, z := range args {
p.Zones = append(p.Zones, plugin.Host(z).NormalizeExact()...)
}
for c.NextBlock() {
switch c.Val() {
case "zones-dir":
dArgs := c.RemainingArgs()
if len(dArgs) != 1 {
return nil, c.ArgErr()
}
p.ZonesDir = dArgs[0]
case "tsig-key":
kArgs := c.RemainingArgs()
if len(kArgs) != 3 {
return nil, c.Errf("tsig-key requires 3 args (name algorithm secret), got %d", len(kArgs))
}
keyName := canonicalKeyName(kArgs[0])
algo, err := parseTSIGAlgorithm(kArgs[1])
if err != nil {
return nil, c.Err(err.Error())
}
secret, err := decodeTSIGSecret(kArgs[2])
if err != nil {
return nil, c.Errf("tsig-key %q: %v", keyName, err)
}
if _, exists := p.TSIGKeys[keyName]; exists {
return nil, c.Errf("duplicate tsig-key %q", keyName)
}
p.TSIGKeys[keyName] = tsigKey{Algorithm: algo, Secret: secret}
case "ttl":
tArgs := c.RemainingArgs()
if len(tArgs) != 1 {
return nil, c.ArgErr()
}
ttl, err := strconv.ParseUint(tArgs[0], 10, 32)
if err != nil {
return nil, c.Errf("ttl must be a non-negative integer: %v", err)
}
p.TTL = uint32(ttl)
case "auto-commit":
aArgs := c.RemainingArgs()
if len(aArgs) != 1 {
return nil, c.ArgErr()
}
switch aArgs[0] {
case "true", "yes", "on":
p.AutoCommit = true
case "false", "no", "off":
p.AutoCommit = false
default:
return nil, c.Errf("auto-commit must be true|false, got %q", aArgs[0])
}
case "git-author":
gArgs := c.RemainingArgs()
if len(gArgs) != 2 {
return nil, c.Errf("git-author requires 2 args (name email), got %d", len(gArgs))
}
gitAuthorName = gArgs[0]
gitAuthorEmail = gArgs[1]
default:
return nil, c.Errf("unknown directive: %s", c.Val())
}
}
}
if len(p.Zones) == 0 {
return nil, c.Err("at least one zone must be specified")
}
if p.ZonesDir == "" {
return nil, c.Err("zones-dir is required")
}
// Build zoneFile handles for each declared zone.
p.zones = make(map[string]*zoneFile, len(p.Zones))
for _, z := range p.Zones {
// Trailing dot → filename. supported.systems. → supported.systems.zone
stem := z
if l := len(stem); l > 0 && stem[l-1] == '.' {
stem = stem[:l-1]
}
path := filepath.Join(p.ZonesDir, stem+".zone")
zf := openZoneFile(path, z)
zf.AutoCommit = p.AutoCommit
if gitAuthorName != "" {
zf.GitAuthorName = gitAuthorName
}
if gitAuthorEmail != "" {
zf.GitAuthorEmail = gitAuthorEmail
}
p.zones[z] = zf
}
return p, nil
}
// validateZoneFiles ensures every configured zone has an accessible
// AND parseable file on disk at the expected path. Catches both typos
// (file missing) and corrupt zone content at CoreDNS startup rather
// than on the first UPDATE — the operator gets an immediate signal
// instead of discovering the breakage minutes later when ACME fires.
//
// Hamilton M4: the previous version only stat()'d the file. A zone
// with a syntax error sailed through startup, then the first UPDATE
// returned SERVFAIL with no startup-time signal. We now run the same
// loadRRs + assertSingleApexSOA path the UPDATE handler uses, so any
// parse-time or SOA-invariant failure surfaces at startup.
func (p *RFC2136) validateZoneFiles() error {
for zone, zf := range p.zones {
st, err := os.Stat(zf.Path)
if err != nil {
return fmt.Errorf("zone %q: file not accessible at %s: %w", zone, zf.Path, err)
}
if st.IsDir() {
return fmt.Errorf("zone %q: %s is a directory, expected a regular file", zone, zf.Path)
}
if _, _, err := zf.loadRRs(); err != nil {
return fmt.Errorf("zone %q at %s: %w", zone, zf.Path, err)
}
}
return nil
}