H6 — TSIG replay-window test. New TestCheckTSIG_BadStatus_Refused verifies that when miekg/dns reports a TSIG verification failure via ResponseWriter.TsigStatus (the channel for fudge-window violations, bad MACs, expired timestamps), our plugin refuses. The fudge tolerance itself is miekg/dns's default (300s); documented in tsig.go so operators know the dependency. H7 — No-op UPDATE policy: documented explicitly in update.go. We do NOT bump the SOA on a no-op (deduped) UPDATE — forcing downstream secondaries to AXFR identical content wastes bandwidth and contradicts RFC 2136's intent. Callers wanting to force a serial bump can send a throwaway add+delete pair (touch-UPDATE pattern). M3 — Delete-by-exact-match ignores TTL and class per RFC 2136 §2.5.4. The previous rr.String() comparison included TTL, so an UPDATE with CLASS=NONE TTL=0 (the protocol-required encoding for a delete) failed to match stored RRs at CLASS=IN with non-zero TTL. Now we normalize both sides (TTL=0, class=IN) before invoking dns.IsDuplicate. M4 — validateZoneFiles now actually parses each zone at startup (loadRRs invocation). Previously it only stat()'d the file; corrupt zone content sailed through startup and produced SERVFAIL on the first UPDATE with no startup-time signal. Combined with H3+H4's invariant checks, this turns silent zone corruption into immediate startup failure. M7 — Commit-message sanitization. RR names are attacker-controlled (TSIG only authenticates the sender; the payload is hostile by default). Control characters in commit messages could inject newlines into git log or ANSI sequences into downstream log renderers. New sanitizeForCommitMessage escapes \n, \r, \t, and other C0 controls. New tests: - TestCheckTSIG_BadStatus_Refused (H6) - TestUpdate_DeleteRR_IgnoresTTL (M3) - TestSanitizeForCommitMessage (M7)
300 lines
9.5 KiB
Go
300 lines
9.5 KiB
Go
package rfc2136
|
|
|
|
import (
|
|
"encoding/base64"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
|
|
"github.com/coredns/caddy"
|
|
"github.com/coredns/coredns/core/dnsserver"
|
|
"github.com/coredns/coredns/plugin"
|
|
clog "github.com/coredns/coredns/plugin/pkg/log"
|
|
"github.com/miekg/dns"
|
|
)
|
|
|
|
// log is the package logger, scoped so messages are prefixed `[rfc2136]`.
|
|
var log = clog.NewWithPlugin("rfc2136")
|
|
|
|
func init() {
|
|
plugin.Register("rfc2136", setup)
|
|
|
|
// SECURITY/OPERATIONAL NOTE — process-global mutation:
|
|
//
|
|
// miekg/dns's default MsgAcceptFunc rejects UPDATE opcode messages
|
|
// with NOTIMP at the wire layer, before any plugin sees them
|
|
// (acceptfunc.go: "Don't allow dynamic updates, because then the
|
|
// sections can contain a whole bunch of RRs"). CoreDNS 1.14.3
|
|
// constructs its dns.Server instances without exposing a
|
|
// per-server MsgAcceptFunc to plugins (see
|
|
// coredns/core/dnsserver/server.go:159 — the dns.Server struct is
|
|
// hardcoded), so to accept UPDATE opcodes anywhere we must
|
|
// override the package-level default. We do that here.
|
|
//
|
|
// Consequences operators should understand:
|
|
//
|
|
// 1. The override is PROCESS-WIDE. Every CoreDNS server block in
|
|
// this binary will accept UPDATE opcodes at the wire layer,
|
|
// not just the one(s) where `rfc2136` is configured. Other
|
|
// blocks will pass the UPDATE through their plugin chains;
|
|
// since no plugin in those chains handles UPDATE, the request
|
|
// falls off the end of the chain and CoreDNS returns
|
|
// FormatError. No state is mutated — but the wire-layer
|
|
// gatekeeping moved into the plugin chain.
|
|
//
|
|
// 2. The actual security boundary is TSIG verification, which
|
|
// happens in this plugin's ServeDNS (checkTSIG) AND inside
|
|
// handleUpdate (assertAuthenticated). Defense in depth: any
|
|
// state-mutating path re-verifies, so a future refactor that
|
|
// adds a new caller cannot accidentally skip auth.
|
|
//
|
|
// 3. If you remove `rfc2136` from your Corefile and reload via
|
|
// SIGUSR1, this global is NOT restored. Restart the process
|
|
// to fully revert.
|
|
//
|
|
// The mitigation matrix is: (a) loud comment here, (b) startup
|
|
// INFO log listing the zones this plugin owns, (c) TSIG re-check
|
|
// inside handleUpdate. The architecturally clean fix would be
|
|
// upstream support in CoreDNS for per-Config MsgAcceptFunc — when
|
|
// that lands, delete this whole stanza.
|
|
dns.DefaultMsgAcceptFunc = msgAcceptFunc
|
|
}
|
|
|
|
// msgAcceptFunc mirrors miekg/dns's defaultMsgAcceptFunc but additionally
|
|
// allows OpcodeUpdate. For UPDATE messages, the conservative Ancount/
|
|
// Nscount limits in the default function don't apply -- per RFC 2136
|
|
// those sections (Prerequisite / Update) can carry many RRs.
|
|
func msgAcceptFunc(dh dns.Header) dns.MsgAcceptAction {
|
|
// Responses are silently ignored regardless of opcode (default behaviour).
|
|
if isResponse := dh.Bits&0x8000 != 0; isResponse {
|
|
return dns.MsgIgnore
|
|
}
|
|
|
|
opcode := int(dh.Bits>>11) & 0xF
|
|
switch opcode {
|
|
case dns.OpcodeQuery, dns.OpcodeNotify, dns.OpcodeUpdate:
|
|
// allowed
|
|
default:
|
|
return dns.MsgRejectNotImplemented
|
|
}
|
|
|
|
if dh.Qdcount != 1 {
|
|
return dns.MsgReject
|
|
}
|
|
|
|
// UPDATE messages legitimately carry multiple RRs in the
|
|
// Prerequisite (Ancount) and Update (Nscount) sections -- skip the
|
|
// "exactly 1" check that the default function applies for queries.
|
|
if opcode != dns.OpcodeUpdate {
|
|
if dh.Ancount > 1 {
|
|
return dns.MsgReject
|
|
}
|
|
if dh.Nscount > 1 {
|
|
return dns.MsgReject
|
|
}
|
|
}
|
|
|
|
if dh.Arcount > 2 {
|
|
return dns.MsgReject
|
|
}
|
|
return dns.MsgAccept
|
|
}
|
|
|
|
// setup is invoked by the CoreDNS plugin registry once per Corefile
|
|
// `rfc2136` directive. It parses the directive, validates that each
|
|
// declared zone has a corresponding file in zones-dir, registers
|
|
// TSIG keys with the underlying dns.Server, and links the handler
|
|
// into the plugin chain.
|
|
func setup(c *caddy.Controller) error {
|
|
p, err := parse(c)
|
|
if err != nil {
|
|
return plugin.Error("rfc2136", err)
|
|
}
|
|
if err := p.validateZoneFiles(); err != nil {
|
|
return plugin.Error("rfc2136", err)
|
|
}
|
|
|
|
cfg := dnsserver.GetConfig(c)
|
|
|
|
// Register TSIG keys with the underlying dns.Server so miekg/dns
|
|
// auto-verifies incoming signatures. We then just inspect the
|
|
// result via dns.ResponseWriter.TsigStatus() in our UPDATE handler.
|
|
if len(p.TSIGKeys) > 0 {
|
|
if cfg.TsigSecret == nil {
|
|
cfg.TsigSecret = make(map[string]string)
|
|
}
|
|
for name, key := range p.TSIGKeys {
|
|
cfg.TsigSecret[name] = base64.StdEncoding.EncodeToString(key.Secret)
|
|
}
|
|
}
|
|
|
|
cfg.AddPlugin(func(next plugin.Handler) plugin.Handler {
|
|
p.Next = next
|
|
return p
|
|
})
|
|
|
|
log.Infof("ready: zones=%v keys=%d ttl=%d dir=%q auto-commit=%t",
|
|
p.Zones, len(p.TSIGKeys), p.TTL, p.ZonesDir, p.AutoCommit)
|
|
// Surface the global MsgAcceptFunc override for operator audit —
|
|
// if a sibling server block on this process doesn't expect UPDATE
|
|
// opcodes to traverse it, the operator should know they will.
|
|
log.Infof("dns.DefaultMsgAcceptFunc was overridden process-wide to permit OpcodeUpdate; state mutation requires TSIG verification on this plugin's zones=%v only", p.Zones)
|
|
return nil
|
|
}
|
|
|
|
// parse reads a single `rfc2136 <zone> [<zone>...] { ... }` block.
|
|
//
|
|
// Grammar:
|
|
//
|
|
// rfc2136 <zone> [<zone>...] {
|
|
// zones-dir <path> ; required
|
|
// tsig-key <name> <algorithm> <base64-secret> ; may repeat
|
|
// ttl <seconds> ; default 60
|
|
// auto-commit <true|false> ; default true
|
|
// git-author <name> <email> ; optional
|
|
// }
|
|
func parse(c *caddy.Controller) (*RFC2136, error) {
|
|
p := &RFC2136{
|
|
TSIGKeys: make(map[string]tsigKey),
|
|
TTL: DefaultTTL,
|
|
AutoCommit: true,
|
|
}
|
|
|
|
// Per-zone git author overrides. Defaults are applied later.
|
|
var gitAuthorName, gitAuthorEmail string
|
|
|
|
for c.Next() {
|
|
args := c.RemainingArgs()
|
|
if len(args) < 1 {
|
|
return nil, c.ArgErr()
|
|
}
|
|
for _, z := range args {
|
|
p.Zones = append(p.Zones, plugin.Host(z).NormalizeExact()...)
|
|
}
|
|
|
|
for c.NextBlock() {
|
|
switch c.Val() {
|
|
|
|
case "zones-dir":
|
|
dArgs := c.RemainingArgs()
|
|
if len(dArgs) != 1 {
|
|
return nil, c.ArgErr()
|
|
}
|
|
p.ZonesDir = dArgs[0]
|
|
|
|
case "tsig-key":
|
|
kArgs := c.RemainingArgs()
|
|
if len(kArgs) != 3 {
|
|
return nil, c.Errf("tsig-key requires 3 args (name algorithm secret), got %d", len(kArgs))
|
|
}
|
|
keyName := canonicalKeyName(kArgs[0])
|
|
algo, err := parseTSIGAlgorithm(kArgs[1])
|
|
if err != nil {
|
|
return nil, c.Err(err.Error())
|
|
}
|
|
secret, err := decodeTSIGSecret(kArgs[2])
|
|
if err != nil {
|
|
return nil, c.Errf("tsig-key %q: %v", keyName, err)
|
|
}
|
|
if _, exists := p.TSIGKeys[keyName]; exists {
|
|
return nil, c.Errf("duplicate tsig-key %q", keyName)
|
|
}
|
|
p.TSIGKeys[keyName] = tsigKey{Algorithm: algo, Secret: secret}
|
|
|
|
case "ttl":
|
|
tArgs := c.RemainingArgs()
|
|
if len(tArgs) != 1 {
|
|
return nil, c.ArgErr()
|
|
}
|
|
ttl, err := strconv.ParseUint(tArgs[0], 10, 32)
|
|
if err != nil {
|
|
return nil, c.Errf("ttl must be a non-negative integer: %v", err)
|
|
}
|
|
p.TTL = uint32(ttl)
|
|
|
|
case "auto-commit":
|
|
aArgs := c.RemainingArgs()
|
|
if len(aArgs) != 1 {
|
|
return nil, c.ArgErr()
|
|
}
|
|
switch aArgs[0] {
|
|
case "true", "yes", "on":
|
|
p.AutoCommit = true
|
|
case "false", "no", "off":
|
|
p.AutoCommit = false
|
|
default:
|
|
return nil, c.Errf("auto-commit must be true|false, got %q", aArgs[0])
|
|
}
|
|
|
|
case "git-author":
|
|
gArgs := c.RemainingArgs()
|
|
if len(gArgs) != 2 {
|
|
return nil, c.Errf("git-author requires 2 args (name email), got %d", len(gArgs))
|
|
}
|
|
gitAuthorName = gArgs[0]
|
|
gitAuthorEmail = gArgs[1]
|
|
|
|
default:
|
|
return nil, c.Errf("unknown directive: %s", c.Val())
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(p.Zones) == 0 {
|
|
return nil, c.Err("at least one zone must be specified")
|
|
}
|
|
if p.ZonesDir == "" {
|
|
return nil, c.Err("zones-dir is required")
|
|
}
|
|
|
|
// Build zoneFile handles for each declared zone.
|
|
p.zones = make(map[string]*zoneFile, len(p.Zones))
|
|
for _, z := range p.Zones {
|
|
// Trailing dot → filename. supported.systems. → supported.systems.zone
|
|
stem := z
|
|
if l := len(stem); l > 0 && stem[l-1] == '.' {
|
|
stem = stem[:l-1]
|
|
}
|
|
path := filepath.Join(p.ZonesDir, stem+".zone")
|
|
zf := openZoneFile(path, z)
|
|
zf.AutoCommit = p.AutoCommit
|
|
if gitAuthorName != "" {
|
|
zf.GitAuthorName = gitAuthorName
|
|
}
|
|
if gitAuthorEmail != "" {
|
|
zf.GitAuthorEmail = gitAuthorEmail
|
|
}
|
|
p.zones[z] = zf
|
|
}
|
|
|
|
return p, nil
|
|
}
|
|
|
|
// validateZoneFiles ensures every configured zone has an accessible
|
|
// AND parseable file on disk at the expected path. Catches both typos
|
|
// (file missing) and corrupt zone content at CoreDNS startup rather
|
|
// than on the first UPDATE — the operator gets an immediate signal
|
|
// instead of discovering the breakage minutes later when ACME fires.
|
|
//
|
|
// Hamilton M4: the previous version only stat()'d the file. A zone
|
|
// with a syntax error sailed through startup, then the first UPDATE
|
|
// returned SERVFAIL with no startup-time signal. We now run the same
|
|
// loadRRs + assertSingleApexSOA path the UPDATE handler uses, so any
|
|
// parse-time or SOA-invariant failure surfaces at startup.
|
|
func (p *RFC2136) validateZoneFiles() error {
|
|
for zone, zf := range p.zones {
|
|
st, err := os.Stat(zf.Path)
|
|
if err != nil {
|
|
return fmt.Errorf("zone %q: file not accessible at %s: %w", zone, zf.Path, err)
|
|
}
|
|
if st.IsDir() {
|
|
return fmt.Errorf("zone %q: %s is a directory, expected a regular file", zone, zf.Path)
|
|
}
|
|
if _, _, err := zf.loadRRs(); err != nil {
|
|
return fmt.Errorf("zone %q at %s: %w", zone, zf.Path, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|