H6 — TSIG replay-window test. New TestCheckTSIG_BadStatus_Refused verifies that when miekg/dns reports a TSIG verification failure via ResponseWriter.TsigStatus (the channel for fudge-window violations, bad MACs, expired timestamps), our plugin refuses. The fudge tolerance itself is miekg/dns's default (300s); documented in tsig.go so operators know the dependency. H7 — No-op UPDATE policy: documented explicitly in update.go. We do NOT bump the SOA on a no-op (deduped) UPDATE — forcing downstream secondaries to AXFR identical content wastes bandwidth and contradicts RFC 2136's intent. Callers wanting to force a serial bump can send a throwaway add+delete pair (touch-UPDATE pattern). M3 — Delete-by-exact-match ignores TTL and class per RFC 2136 §2.5.4. The previous rr.String() comparison included TTL, so an UPDATE with CLASS=NONE TTL=0 (the protocol-required encoding for a delete) failed to match stored RRs at CLASS=IN with non-zero TTL. Now we normalize both sides (TTL=0, class=IN) before invoking dns.IsDuplicate. M4 — validateZoneFiles now actually parses each zone at startup (loadRRs invocation). Previously it only stat()'d the file; corrupt zone content sailed through startup and produced SERVFAIL on the first UPDATE with no startup-time signal. Combined with H3+H4's invariant checks, this turns silent zone corruption into immediate startup failure. M7 — Commit-message sanitization. RR names are attacker-controlled (TSIG only authenticates the sender; the payload is hostile by default). Control characters in commit messages could inject newlines into git log or ANSI sequences into downstream log renderers. New sanitizeForCommitMessage escapes \n, \r, \t, and other C0 controls. New tests: - TestCheckTSIG_BadStatus_Refused (H6) - TestUpdate_DeleteRR_IgnoresTTL (M3) - TestSanitizeForCommitMessage (M7)
377 lines
13 KiB
Go
377 lines
13 KiB
Go
package rfc2136
|
|
|
|
import (
|
|
"fmt"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/miekg/dns"
|
|
)
|
|
|
|
// handleUpdate implements the RFC 2136 UPDATE opcode against the
|
|
// on-disk zone file.
|
|
//
|
|
// Sequence per UPDATE message:
|
|
// 1. Validate the Zone section (RFC 2136 §2.3): must be exactly one
|
|
// SOA-typed record whose name is a zone we manage.
|
|
// 2. Acquire the zone file's mutex.
|
|
// 3. Load the file's RRs into memory.
|
|
// 4. Check each prerequisite (§3.2) against the loaded RRs. First
|
|
// failure short-circuits with the spec's rcode.
|
|
// 5. Apply each update RR (§3.4.2) to the in-memory slice.
|
|
// 6. Bump the SOA serial (CalVer YYYYMMDDNN).
|
|
// 7. Atomic write to disk (temp file + rename).
|
|
// 8. Optionally `git add && git commit` for audit trail.
|
|
//
|
|
// Steps 3-7 happen under the zone-file mutex. If 8 fails we log but
|
|
// don't roll back (the on-disk state is authoritative; lost commits
|
|
// can be re-staged via `git add` later).
|
|
//
|
|
// SECURITY CONTRACT — the `verified` parameter:
|
|
//
|
|
// handleUpdate mutates zone files on disk. The caller MUST set
|
|
// verified=true only after successfully validating the message's TSIG
|
|
// signature against a configured key. ServeDNS does this. A
|
|
// verified=false invocation is treated as an unauthenticated attempt
|
|
// and refused — preserving the security boundary even if a future
|
|
// internal caller (NOTIFY relay, admin RPC, refactor) reaches this
|
|
// function without going through the wire-level TSIG check.
|
|
//
|
|
// Tests that exercise post-auth logic pass verified=true. Tests that
|
|
// exercise auth rejection pass verified=false.
|
|
//
|
|
// This is defense-in-depth: ServeDNS already verifies; we re-assert at
|
|
// the function boundary so the security property survives refactors.
|
|
func (p *RFC2136) handleUpdate(w dns.ResponseWriter, r *dns.Msg, verified bool) (int, error) {
|
|
resp := new(dns.Msg)
|
|
resp.SetReply(r)
|
|
if verified {
|
|
// Only sign responses we authorize. Signing rejections leaks
|
|
// attestation that the named key exists on this server (see M9
|
|
// in the Hamilton review). Unauthorized callers get an
|
|
// unsigned Refused.
|
|
signResponseIfSigned(resp, r)
|
|
} else {
|
|
log.Warningf("handleUpdate refused: caller did not assert TSIG verification — possible internal bypass attempt")
|
|
return p.updateResp(w, resp, dns.RcodeRefused)
|
|
}
|
|
|
|
// 1. Validate the Zone section.
|
|
if len(r.Question) != 1 {
|
|
log.Debugf("UPDATE rejected: expected 1 Zone record, got %d", len(r.Question))
|
|
return p.updateResp(w, resp, dns.RcodeFormatError)
|
|
}
|
|
zoneQ := r.Question[0]
|
|
if zoneQ.Qtype != dns.TypeSOA {
|
|
log.Debugf("UPDATE rejected: Zone section type=%d, want SOA", zoneQ.Qtype)
|
|
return p.updateResp(w, resp, dns.RcodeFormatError)
|
|
}
|
|
zone := p.findZone(zoneQ.Name)
|
|
if zone == "" {
|
|
log.Debugf("UPDATE rejected: zone %q not authoritative", zoneQ.Name)
|
|
return p.updateResp(w, resp, dns.RcodeNotAuth)
|
|
}
|
|
zf, ok := p.zones[zone]
|
|
if !ok {
|
|
log.Errorf("UPDATE rejected: no zone file handle for %q (setup bug?)", zone)
|
|
return p.updateResp(w, resp, dns.RcodeServerFailure)
|
|
}
|
|
|
|
zf.mu.Lock()
|
|
defer zf.mu.Unlock()
|
|
|
|
// 3. Load the current zone contents (with file-identity snapshot).
|
|
rrs, snap, err := zf.loadRRs()
|
|
if err != nil {
|
|
log.Errorf("UPDATE failed: %v", err)
|
|
return p.updateResp(w, resp, dns.RcodeServerFailure)
|
|
}
|
|
|
|
// 4. Check prerequisites.
|
|
for _, rr := range r.Answer {
|
|
rcode := checkPrereq(zone, rrs, rr)
|
|
if rcode != dns.RcodeSuccess {
|
|
log.Debugf("UPDATE prereq failed: %s → rcode=%d", rr.String(), rcode)
|
|
return p.updateResp(w, resp, rcode)
|
|
}
|
|
}
|
|
|
|
// 5. Apply updates. Build a fresh RR slice rather than mutating in
|
|
// place — that way a partial application can't leave the slice in
|
|
// a half-modified state if an early update fails.
|
|
updated := rrs
|
|
changed := false
|
|
for _, rr := range r.Ns {
|
|
next, rcode, modified := applyUpdate(zone, p.TTL, updated, rr)
|
|
if rcode != dns.RcodeSuccess {
|
|
return p.updateResp(w, resp, rcode)
|
|
}
|
|
updated = next
|
|
if modified {
|
|
changed = true
|
|
}
|
|
}
|
|
|
|
if !changed {
|
|
// UPDATE was a valid no-op (e.g. only contained adds for RRs
|
|
// that were already present, deduped away per RFC 2136
|
|
// §3.4.2.2). Return NOERROR without rewriting the file or
|
|
// bumping the SOA serial.
|
|
//
|
|
// H7 — Policy decision documented:
|
|
//
|
|
// We DO NOT bump the SOA serial on no-op UPDATEs. Rationale:
|
|
// - DNS-wise, nothing changed. Forcing downstream secondaries
|
|
// (HE) to do an AXFR pull just to re-fetch identical content
|
|
// wastes bandwidth and is not what RFC 2136 implies.
|
|
// - The wire-visible cert-issuance chain for ACME does not
|
|
// depend on the second-UPDATE's serial bump — once the first
|
|
// UPDATE landed, the SOA already advanced and the auto plugin
|
|
// reloaded; subsequent identical UPDATEs are spurious and
|
|
// should be silent.
|
|
// - Caddy's caddy-dns/rfc2136 client treats NOERROR-no-bump as
|
|
// "yes I have your record" — which is the truthful answer.
|
|
//
|
|
// If a caller wants to force a serial bump for some reason, they
|
|
// can send a touch-UPDATE that adds-then-deletes a throwaway
|
|
// record. That's an explicit, intentional pattern and is
|
|
// supported.
|
|
return p.updateResp(w, resp, dns.RcodeSuccess)
|
|
}
|
|
|
|
// 6. Bump SOA serial.
|
|
now := time.Now()
|
|
if err := bumpSerial(updated, now); err != nil {
|
|
log.Errorf("UPDATE failed: %v", err)
|
|
return p.updateResp(w, resp, dns.RcodeServerFailure)
|
|
}
|
|
|
|
// 6b. Concurrent-modification check (Hamilton H1). Before clobbering
|
|
// the on-disk file, verify nothing changed it out from under us
|
|
// between loadRRs and now. The per-zone mutex serializes us against
|
|
// other in-process UPDATEs, but external editors (rsync push,
|
|
// manual edit, `git checkout`) can race in any time. If the file
|
|
// changed, refuse with SERVFAIL so Caddy retries on a fresh load.
|
|
if err := zf.checkUnchanged(snap); err != nil {
|
|
log.Warningf("UPDATE refused: %v", err)
|
|
return p.updateResp(w, resp, dns.RcodeServerFailure)
|
|
}
|
|
|
|
// 7. Atomic write.
|
|
if err := zf.writeAtomic(updated, now); err != nil {
|
|
log.Errorf("UPDATE write failed: %v", err)
|
|
return p.updateResp(w, resp, dns.RcodeServerFailure)
|
|
}
|
|
|
|
// 8. Auto-commit. Failure to commit means the file is correct but
|
|
// the git audit trail diverges (Hamilton H2). We log at ERROR with
|
|
// structured detail so operators discover the divergence; recovery
|
|
// is `git -C <zonesDir> status` + `git add` + manual commit. We do
|
|
// NOT roll back the file write — by the time the commit fails, the
|
|
// auto plugin may have already noticed the new mtime, and rolling
|
|
// back creates more races than it solves.
|
|
msg := summarizeUpdate(zone, r.Ns)
|
|
if err := zf.commit(msg); err != nil {
|
|
log.Errorf("git auto-commit failed; zone file is correct but audit trail diverged: zone=%s path=%s err=%v — recover with `git -C %s status` + manual commit",
|
|
zone, zf.Path, err, filepath.Dir(zf.Path))
|
|
}
|
|
|
|
log.Infof("UPDATE applied: zone=%s prereqs=%d updates=%d msg=%q",
|
|
zone, len(r.Answer), len(r.Ns), msg)
|
|
return p.updateResp(w, resp, dns.RcodeSuccess)
|
|
}
|
|
|
|
// updateResp writes the response and returns the rcode/err pair for ServeDNS.
|
|
func (p *RFC2136) updateResp(w dns.ResponseWriter, resp *dns.Msg, rcode int) (int, error) {
|
|
resp.Rcode = rcode
|
|
_ = w.WriteMsg(resp)
|
|
return rcode, nil
|
|
}
|
|
|
|
// findZone returns the longest matching configured zone for qname, or
|
|
// "" if qname is outside all configured zones.
|
|
func (p *RFC2136) findZone(qname string) string {
|
|
qname = canon(qname)
|
|
var best string
|
|
for _, z := range p.Zones {
|
|
if qname == z || strings.HasSuffix(qname, "."+z) {
|
|
if len(z) > len(best) {
|
|
best = z
|
|
}
|
|
}
|
|
}
|
|
return best
|
|
}
|
|
|
|
// checkPrereq evaluates one record from the Prerequisite section
|
|
// against the loaded RR slice. Returns dns.RcodeSuccess if satisfied,
|
|
// or the spec rcode otherwise (§3.2).
|
|
func checkPrereq(zone string, rrs []dns.RR, rr dns.RR) int {
|
|
hdr := rr.Header()
|
|
name := canon(hdr.Name)
|
|
if !inZone(name, zone) {
|
|
return dns.RcodeNotZone
|
|
}
|
|
switch hdr.Class {
|
|
case dns.ClassANY:
|
|
if hdr.Rrtype == dns.TypeANY {
|
|
if !nameExistsIn(rrs, name) {
|
|
return dns.RcodeNameError
|
|
}
|
|
return dns.RcodeSuccess
|
|
}
|
|
if len(lookupIn(rrs, name, hdr.Rrtype)) == 0 {
|
|
return dns.RcodeNXRrset
|
|
}
|
|
return dns.RcodeSuccess
|
|
|
|
case dns.ClassNONE:
|
|
if hdr.Rrtype == dns.TypeANY {
|
|
if nameExistsIn(rrs, name) {
|
|
return dns.RcodeYXDomain
|
|
}
|
|
return dns.RcodeSuccess
|
|
}
|
|
if len(lookupIn(rrs, name, hdr.Rrtype)) > 0 {
|
|
return dns.RcodeYXRrset
|
|
}
|
|
return dns.RcodeSuccess
|
|
|
|
default:
|
|
// CLASS = zone class with rdata. Exact value-match prereqs
|
|
// (§3.2.5). Not used by Caddy/caddy-dns/rfc2136; treating as
|
|
// satisfied for now. v2 can implement value-prereq if a real
|
|
// caller needs it.
|
|
log.Debugf("prereq with rdata-match semantics not implemented; treating as satisfied")
|
|
return dns.RcodeSuccess
|
|
}
|
|
}
|
|
|
|
// applyUpdate handles one record in the Update section per §3.4.2.
|
|
// Returns the (possibly mutated) RR slice, an rcode (Success unless
|
|
// the update was rejected), and a flag indicating whether the slice
|
|
// was actually modified (to avoid no-op file rewrites).
|
|
func applyUpdate(zone string, defaultTTL uint32, rrs []dns.RR, rr dns.RR) ([]dns.RR, int, bool) {
|
|
hdr := rr.Header()
|
|
name := canon(hdr.Name)
|
|
if !inZone(name, zone) {
|
|
return rrs, dns.RcodeNotZone, false
|
|
}
|
|
|
|
switch hdr.Class {
|
|
case dns.ClassANY:
|
|
if hdr.Rrtype == dns.TypeANY {
|
|
// Wipe the whole name. Refuse apex wipes — that would
|
|
// destroy SOA + NS bedrock.
|
|
if isApex(name, zone) {
|
|
log.Debugf("apex wipe refused: %s", name)
|
|
return rrs, dns.RcodeRefused, false
|
|
}
|
|
before := len(rrs)
|
|
rrs = removeNameFrom(rrs, name)
|
|
return rrs, dns.RcodeSuccess, len(rrs) != before
|
|
}
|
|
// Apex SOA/NS removal refused for the same reason.
|
|
if isApex(name, zone) && (hdr.Rrtype == dns.TypeSOA || hdr.Rrtype == dns.TypeNS) {
|
|
log.Debugf("apex %s removal refused", dns.TypeToString[hdr.Rrtype])
|
|
return rrs, dns.RcodeRefused, false
|
|
}
|
|
before := len(rrs)
|
|
rrs = removeRRsetFrom(rrs, name, hdr.Rrtype)
|
|
return rrs, dns.RcodeSuccess, len(rrs) != before
|
|
|
|
case dns.ClassNONE:
|
|
// Refuse to delete apex SOA/NS by exact-RR match.
|
|
if isApex(name, zone) && (hdr.Rrtype == dns.TypeSOA || hdr.Rrtype == dns.TypeNS) {
|
|
return rrs, dns.RcodeRefused, false
|
|
}
|
|
before := len(rrs)
|
|
rrs = removeRRFrom(rrs, rr)
|
|
return rrs, dns.RcodeSuccess, len(rrs) != before
|
|
|
|
default:
|
|
// Apex SOA/NS adds refused — those are managed by the zone-file
|
|
// owner, not by dynamic updates.
|
|
if isApex(name, zone) && (hdr.Rrtype == dns.TypeSOA || hdr.Rrtype == dns.TypeNS) {
|
|
log.Debugf("apex %s add refused", dns.TypeToString[hdr.Rrtype])
|
|
return rrs, dns.RcodeRefused, false
|
|
}
|
|
if hdr.Ttl == 0 {
|
|
hdr.Ttl = defaultTTL
|
|
}
|
|
before := len(rrs)
|
|
rrs = addRRTo(rrs, rr)
|
|
return rrs, dns.RcodeSuccess, len(rrs) != before
|
|
}
|
|
}
|
|
|
|
// summarizeUpdate produces a one-line commit message describing the
|
|
// UPDATE for git history. The output is sanitized — Hamilton M7 — to
|
|
// prevent attacker-controlled RR names (TSIG just authenticates the
|
|
// sender; the payload is still attacker-controlled) from injecting
|
|
// control characters into git log, log aggregators, or any downstream
|
|
// renderer that interprets ANSI/newlines.
|
|
func summarizeUpdate(zone string, updates []dns.RR) string {
|
|
var msg string
|
|
if len(updates) == 1 {
|
|
msg = fmt.Sprintf("rfc2136 %s: %s", zone, oneLineOp(updates[0]))
|
|
} else {
|
|
msg = fmt.Sprintf("rfc2136 %s: %d operations", zone, len(updates))
|
|
}
|
|
return sanitizeForCommitMessage(msg)
|
|
}
|
|
|
|
// sanitizeForCommitMessage strips control characters from s, replacing
|
|
// them with their printable escape form. This keeps git log + downstream
|
|
// renderers safe from attacker-injected newlines, escape sequences, etc.
|
|
func sanitizeForCommitMessage(s string) string {
|
|
var b strings.Builder
|
|
b.Grow(len(s))
|
|
for _, r := range s {
|
|
switch {
|
|
case r == '\n':
|
|
b.WriteString("\\n")
|
|
case r == '\r':
|
|
b.WriteString("\\r")
|
|
case r == '\t':
|
|
b.WriteString("\\t")
|
|
case r < 0x20 || r == 0x7f:
|
|
// Other C0 controls + DEL: emit \xNN.
|
|
fmt.Fprintf(&b, "\\x%02x", r)
|
|
default:
|
|
b.WriteRune(r)
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
// oneLineOp returns a short human-readable description of a single
|
|
// update RR for inclusion in commit messages.
|
|
func oneLineOp(rr dns.RR) string {
|
|
hdr := rr.Header()
|
|
name := strings.TrimSuffix(canon(hdr.Name), ".")
|
|
ttype := dns.TypeToString[hdr.Rrtype]
|
|
switch hdr.Class {
|
|
case dns.ClassANY:
|
|
if hdr.Rrtype == dns.TypeANY {
|
|
return fmt.Sprintf("delete all %s", name)
|
|
}
|
|
return fmt.Sprintf("delete %s %s", ttype, name)
|
|
case dns.ClassNONE:
|
|
return fmt.Sprintf("delete-rr %s %s", ttype, name)
|
|
default:
|
|
return fmt.Sprintf("add %s %s", ttype, name)
|
|
}
|
|
}
|
|
|
|
// inZone reports whether name is within zone.
|
|
func inZone(name, zone string) bool {
|
|
return name == zone || strings.HasSuffix(name, "."+zone)
|
|
}
|
|
|
|
// isApex reports whether name IS the zone's apex.
|
|
func isApex(name, zone string) bool {
|
|
return name == zone
|
|
}
|