diff --git a/README.md b/README.md index f594c7a..6943d7c 100644 --- a/README.md +++ b/README.md @@ -33,17 +33,24 @@ messages — zero static zone-file churn. ## Status -**Phase 1 (skeleton)**: compiles, registers with CoreDNS, parses the -Corefile directive. Does not yet handle UPDATE messages or serve any -records. ServeDNS is a pass-through. See `phases.md` for the roadmap. +**v2026.05.22.2**: production-ready. Handles UPDATE messages against +file-backed zones, TSIG-authenticates, bumps SOA serial in CalVer +YYMMDD*10000+NNNN form, atomically writes the zone file, optionally +git-commits each change for an audit trail. Designed to coexist with +CoreDNS's `auto` plugin (which serves queries from the same zone files +on its reload cycle). ## Configuration ``` rfc2136 [...] { - tsig-key - ttl - persist + zones-dir # required + tsig-key # may repeat + ttl # default 60 + auto-commit # default true + git-author # optional + rate-limit # default 100 / 60s + rate-limit off # disable rate-limit } ``` @@ -52,14 +59,96 @@ Example: ``` .:53 auth.example.com { rfc2136 auth.example.com { + zones-dir /var/lib/coredns/zones tsig-key acme-key. hmac-sha256 BASE64SECRET== ttl 60 + auto-commit true + git-author "coredns-rfc2136" "rfc2136@coredns.example.com" + } + auto { + directory /var/lib/coredns/zones (.*)\.zone {1} + reload 30s } errors log } ``` +## Operational constraints + +A few behaviors operators should know before relying on this plugin: + +### Single-process atomicity only + +The per-zone mutex serializes UPDATEs *within one CoreDNS process*. It +does NOT coordinate with external file edits. If you `rsync` a zone +file from a workstation while the plugin is mid-UPDATE, you get a +race. The plugin defends against this with a snapshot-and-recheck: +loadRRs captures (mtime, size), and immediately before writing back, +we re-stat; if the file changed, the UPDATE is refused with SERVFAIL +and the client (Caddy etc.) retries on a fresh load. The window is +narrow but non-zero. + +**Recommendation**: don't rsync zone files into a directory the plugin +is actively writing to. If you must, expect occasional SERVFAILs that +resolve on retry. + +### MsgAcceptFunc is process-global + +CoreDNS 1.14.3 doesn't expose a per-Config `MsgAcceptFunc`, so this +plugin overrides the miekg/dns package-level default at `init()` time. +**Every server block** in the process will accept the UPDATE opcode +at the wire layer — but only blocks with `rfc2136` in their plugin +chain do anything useful with it (others pass through and return +FormatError). The actual security boundary is TSIG, enforced both in +`ServeDNS` and as a defense-in-depth check inside `handleUpdate`. + +### No-op UPDATEs do not bump the SOA serial + +If an UPDATE adds an RR that's already present (deduped per RFC 2136 +§3.4.2.2) or deletes one that doesn't exist, the file is not rewritten +and the SOA serial is not advanced. We return NOERROR. Downstream +secondaries are not asked to AXFR for a no-change. + +If you need to force a serial bump (rare), send a touch-UPDATE: add a +throwaway RR then delete it. + +### SOA invariants are enforced strictly + +`loadRRs` refuses zone files with: zero SOAs, multiple SOAs, or an +SOA whose owner doesn't match the zone apex. Both at startup (via +`validateZoneFiles`) and on every UPDATE. Zone-file corruption fails +loud at boot rather than mysteriously on first ACME activity. + +### Serial counter rolls over at NNNN=9999 + +Format is `YYMMDD*10000 + NNNN`. At NNNN=9999, the next bump rolls to +the next encoded day with NNNN=0001. On heavy days the encoded date +drifts ahead of wall time; on quiet days it catches back up. Monotonic +ordering (the only DNS requirement) holds. uint32 won't wrap for ~117 +years at full 10000/day burn. + +### TSIG replay window is miekg/dns's default (currently 300s) + +The fudge window enforced by miekg/dns's TsigVerify is what gates +replay. If miekg/dns ever changes its default, this plugin's behavior +changes with it. A future enhancement is `tsig-fudge` as a Corefile +directive. + +### Git commit failure is logged at ERROR, not rolled back + +If `git commit` fails after a successful `writeAtomic`, the zone file +is correct but the audit trail diverges. We log at ERROR with a +recovery hint (`git -C status` + manual commit). We do NOT roll +back the file write — the auto plugin may have already noticed the +new mtime, and rolling back creates more races than it solves. + +### Per-key rate limit + +UPDATE traffic is token-bucket capped per TSIG key. Default 100 +UPDATEs per 60 seconds. ACME storms are well within this; anything +beyond is suspicious. Tune via `rate-limit `. + ## Building This plugin is consumed by a custom CoreDNS build via `plugin.cfg`: diff --git a/update.go b/update.go index 139e3b3..1a570c7 100644 --- a/update.go +++ b/update.go @@ -297,8 +297,13 @@ func applyUpdate(zone string, defaultTTL uint32, rrs []dns.RR, rr dns.RR) ([]dns log.Debugf("apex %s add refused", dns.TypeToString[hdr.Rrtype]) return rrs, dns.RcodeRefused, false } + // Hamilton L2: don't mutate the caller's RR header. miekg/dns + // parses the UPDATE message into RRs the caller still owns; + // silently rewriting their TTL is a hygiene smell. Copy first, + // then apply our default if needed. if hdr.Ttl == 0 { - hdr.Ttl = defaultTTL + rr = dns.Copy(rr) + rr.Header().Ttl = defaultTTL } before := len(rrs) rrs = addRRTo(rrs, rr) diff --git a/zonefile.go b/zonefile.go index e18a148..c3b7bc5 100644 --- a/zonefile.go +++ b/zonefile.go @@ -7,6 +7,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "sync" "time" @@ -346,9 +347,9 @@ func bumpSerial(rrs []dns.RR, now time.Time) error { // serial that happens to look like a valid YYMMDD prefix but is in // the past, which is the same handling: jump to today). if curDate := cur[:6]; isValidYYMMDD(curDate) && curDate >= today { - nnnn := atoi(cur[6:10]) + nnnn := int(mustParseUint(cur[6:10])) if nnnn < 9999 { - soa.Serial = uint32(parseUint(curDate)*serialCounterMul + uint64(nnnn+1)) + soa.Serial = uint32(mustParseUint(curDate)*serialCounterMul + uint64(nnnn+1)) return nil } // NNNN=9999: roll to next encoded day, NNNN=0001. @@ -357,13 +358,13 @@ func bumpSerial(rrs []dns.RR, now time.Time) error { return fmt.Errorf("serial date %q unparseable: %w", curDate, err) } next := d.AddDate(0, 0, 1).Format("060102") - soa.Serial = uint32(parseUint(next)*serialCounterMul + 1) + soa.Serial = uint32(mustParseUint(next)*serialCounterMul + 1) return nil } // Older or unparseable: jump to today*10000+1. Migration path for // legacy YYYYMMDDNN serials lives here. - candidate := uint32(parseUint(today)*serialCounterMul + 1) + candidate := uint32(mustParseUint(today)*serialCounterMul + 1) // H5 — explicit MaxUint32 guard. Plain `>` comparison is correct in // practice (we'd never wrap during the zone's lifetime: 10000 @@ -399,23 +400,20 @@ func isValidYYMMDD(s string) bool { return err == nil } -// atoi is a tiny helper that ignores errors — only called on a -// substring we already validated is two digits. -func atoi(s string) int { - n := 0 - for _, c := range s { - n = n*10 + int(c-'0') - } - return n -} - -// parseUint parses an all-digits string into a uint64. Used because -// strconv.ParseUint adds error-handling overhead we don't need on -// internally-controlled inputs. -func parseUint(s string) uint64 { - var n uint64 - for _, c := range s { - n = n*10 + uint64(c-'0') +// mustParseUint parses an all-digit string into uint64. Panics on +// malformed input — the caller is responsible for passing only strings +// that were validated as digit-substrings (e.g., a fixed-width slice of +// a YYMMDDNNNN-formatted serial). Using strconv via this thin wrapper +// keeps the panic behavior explicit while sharing stdlib's robust +// parsing (Hamilton L1). +func mustParseUint(s string) uint64 { + n, err := strconv.ParseUint(s, 10, 64) + if err != nil { + // Programmer error if we ever hit this — every caller passes + // digits-only strings derived from time.Format or a sliced + // SOA serial. Panic so the bug surfaces in tests/CI rather + // than silently producing a 0 serial. + panic(fmt.Sprintf("mustParseUint(%q): %v", s, err)) } return n }