package rfc2136 import ( "context" "fmt" "math" "os" "os/exec" "path/filepath" "strconv" "strings" "sync" "time" "github.com/miekg/dns" ) // gitCommandTimeout caps any single git invocation we shell out to. If // the process hangs (NFS stall, gpg-sign prompt, broken pre-commit hook // waiting on stdin, etc.) we must not block the caller — which holds // the per-zone mutex — forever. 10s is generous for a local-disk repo. const gitCommandTimeout = 10 * time.Second // fileSnapshot captures the file-identity fingerprint at the moment we // last read the zone file. We compare it to the live stat just before // writing back to detect concurrent modification (rsync push, manual // edit, `git checkout`). If anything changed the file out from under // us, we refuse the UPDATE — Caddy will retry, and the next loadRRs // will see the updated state. type fileSnapshot struct { mtime time.Time size int64 } // matches returns true if `info` reports the same mtime and size we // captured. Inode comparison would be stricter, but mtime+size is // sufficient for the failure modes we care about (rename-over-original // edits change both). func (s fileSnapshot) matches(info os.FileInfo) bool { return s.mtime.Equal(info.ModTime()) && s.size == info.Size() } // zoneFile is a file-backed authority for a single DNS zone. Replaces // the Phase-1.3 in-memory recordStore. // // On every UPDATE, the file is read fully into memory as parsed RRs, // the requested adds/deletes are applied to that slice, the SOA serial // is bumped (CalVer YYYYMMDDNN style), and the file is rewritten via // an atomic temp-file rename. CoreDNS's `auto` plugin notices the // mtime change within its reload interval (~30s) and re-serves the // zone. HE eventually pulls on its SOA refresh. // // Concurrency: per-zone mutex serializes RFC 2136 UPDATEs against // each other and against the plugin's own reads. It does NOT protect // against external editors (e.g. a human running an editor while the // plugin is mid-write); that's the operator's responsibility, and // the typical mitigation is to do manual edits when no UPDATEs are // in flight (or just accept the rare race — the worst case is one // lost manual edit, easily restored from git). type zoneFile struct { mu sync.Mutex // Path is the absolute path to the zone file on disk. Path string // Origin is the canonical (lowercase, trailing dot) zone apex. Origin string // AutoCommit, when true, runs `git add && git commit ...` // after every successful write. Defaults to true (per the chosen // architecture: every dynamic update should leave a git trail). AutoCommit bool // GitAuthorName and GitAuthorEmail are passed to `git commit` // via -c user.name and -c user.email so the commits are // attributable without depending on the system git config. GitAuthorName string GitAuthorEmail string } // canon normalises a DNS name to the store's internal form: lowercase // with trailing dot. miekg/dns sometimes hands us names without the // trailing dot; passing through this once at the boundary keeps every // lookup, every comparison consistent. func canon(name string) string { return strings.ToLower(dns.Fqdn(name)) } // openZoneFile prepares a zoneFile handle. Does NOT read or parse the // file; that happens lazily in each operation (so the file's content // is always fresh and we never serve a stale snapshot). func openZoneFile(path, origin string) *zoneFile { return &zoneFile{ Path: path, Origin: canon(origin), AutoCommit: true, GitAuthorName: "coredns-rfc2136", GitAuthorEmail: "rfc2136@coredns", } } // loadRRs reads the zone file and parses it into an RR slice via // miekg/dns's zone parser. The parser handles $ORIGIN, $TTL, multi-line // SOA, comments, includes, etc. // // Returns (rrs, snapshot, error). The snapshot fingerprints the file // identity at read time so a subsequent writeIfUnchanged can detect // concurrent modification. // // Hamilton H4 — strict-parse validation: a single malformed line could // otherwise produce a partial parse where parser.Err() returns nil but // some records silently went missing. To catch this, we enforce a // post-parse invariant: exactly one SOA RR, and that SOA's name equals // the configured zone origin. A zone file that's been partially eaten // by the parser usually loses its SOA along the way — checking SOA // presence catches both H4 (silent truncation) and H3 (multi-SOA or // wrong-apex SOA) with a single guard. func (z *zoneFile) loadRRs() ([]dns.RR, fileSnapshot, error) { f, err := os.Open(z.Path) if err != nil { return nil, fileSnapshot{}, fmt.Errorf("open %s: %w", z.Path, err) } defer f.Close() info, err := f.Stat() if err != nil { return nil, fileSnapshot{}, fmt.Errorf("stat %s: %w", z.Path, err) } snap := fileSnapshot{mtime: info.ModTime(), size: info.Size()} parser := dns.NewZoneParser(f, z.Origin, z.Path) parser.SetDefaultTTL(3600) var rrs []dns.RR for rr, ok := parser.Next(); ok; rr, ok = parser.Next() { rrs = append(rrs, rr) } if err := parser.Err(); err != nil { return nil, snap, fmt.Errorf("parse %s: %w", z.Path, err) } if len(rrs) == 0 { return nil, snap, fmt.Errorf("%s: zero RRs parsed", z.Path) } // H3/H4 invariant: exactly one SOA, anchored at the zone origin. // Refuse to operate on a zone file whose SOA structure is wrong — // any subsequent bumpSerial or write would compound the damage. if err := assertSingleApexSOA(rrs, z.Origin); err != nil { return nil, snap, fmt.Errorf("zone %s integrity check failed: %w", z.Path, err) } return rrs, snap, nil } // assertSingleApexSOA enforces that rrs contains exactly one SOA and // that its owner matches the zone origin. Returns an error otherwise. // This is the H3+H4 zone-integrity invariant. func assertSingleApexSOA(rrs []dns.RR, origin string) error { origin = canon(origin) var soas []*dns.SOA for _, rr := range rrs { if s, ok := rr.(*dns.SOA); ok { soas = append(soas, s) } } switch len(soas) { case 0: return fmt.Errorf("no SOA record found (expected one at %q)", origin) case 1: if canon(soas[0].Hdr.Name) != origin { return fmt.Errorf("SOA owner is %q, expected zone apex %q", soas[0].Hdr.Name, origin) } return nil default: names := make([]string, len(soas)) for i, s := range soas { names[i] = s.Hdr.Name } return fmt.Errorf("multiple SOA records found (%d): %s", len(soas), strings.Join(names, ", ")) } } // checkUnchanged returns nil if the on-disk file still matches the // captured snapshot. If the file has been modified (mtime or size // differs), returns an error — the caller should refuse the UPDATE // rather than clobber the external change. func (z *zoneFile) checkUnchanged(snap fileSnapshot) error { info, err := os.Stat(z.Path) if err != nil { return fmt.Errorf("stat %s: %w", z.Path, err) } if !snap.matches(info) { return fmt.Errorf("concurrent modification detected on %s: mtime %s/%s size %d/%d", z.Path, snap.mtime.Format(time.RFC3339Nano), info.ModTime().Format(time.RFC3339Nano), snap.size, info.Size()) } return nil } // Lookup returns RRs in `rrs` matching (name, rtype). Both name and // the RR header names are canonicalised for the comparison. Pass-by- // slice rather than holding state means we can let the caller batch // multiple operations against one snapshot of the file. func lookupIn(rrs []dns.RR, name string, rtype uint16) []dns.RR { name = canon(name) var out []dns.RR for _, rr := range rrs { hdr := rr.Header() if canon(hdr.Name) == name && hdr.Rrtype == rtype { out = append(out, rr) } } return out } // nameExistsIn reports whether any RR's owner equals name (canonical). func nameExistsIn(rrs []dns.RR, name string) bool { name = canon(name) for _, rr := range rrs { if canon(rr.Header().Name) == name { return true } } return false } // removeRRsetFrom returns rrs minus every RR matching (name, rtype). func removeRRsetFrom(rrs []dns.RR, name string, rtype uint16) []dns.RR { name = canon(name) out := rrs[:0:0] for _, rr := range rrs { hdr := rr.Header() if canon(hdr.Name) == name && hdr.Rrtype == rtype { continue } out = append(out, rr) } return out } // removeNameFrom returns rrs minus every RR with the given owner name. func removeNameFrom(rrs []dns.RR, name string) []dns.RR { name = canon(name) out := rrs[:0:0] for _, rr := range rrs { if canon(rr.Header().Name) == name { continue } out = append(out, rr) } return out } // removeRRFrom returns rrs minus the single RR matching the given one // by owner + type + rdata. // // Hamilton M3: per RFC 2136 §2.5.4, a delete-by-exact-match UPDATE // carries CLASS=NONE and TTL=0 as protocol flags, not as match // criteria. The target must match a stored RR by owner+type+rdata // alone. We normalize both sides to the same class + TTL before // invoking dns.IsDuplicate so the comparison is correct. func removeRRFrom(rrs []dns.RR, target dns.RR) []dns.RR { targetN := normalizeForCompare(target) out := rrs[:0:0] matched := false for _, rr := range rrs { if !matched && dns.IsDuplicate(normalizeForCompare(rr), targetN) { matched = true continue } out = append(out, rr) } return out } // addRRTo appends rr to rrs unless an identical RR already exists // (de-dupe semantics per RFC 2136 §3.4.2.2). Same normalization as // removeRRFrom — dedupe is TTL- and class-insensitive in the comparison // (though the stored RR retains its original TTL/class). func addRRTo(rrs []dns.RR, rr dns.RR) []dns.RR { rrN := normalizeForCompare(rr) for _, existing := range rrs { if dns.IsDuplicate(normalizeForCompare(existing), rrN) { return rrs } } return append(rrs, rr) } // normalizeForCompare returns a copy of rr with TTL=0 and class=IN so // dns.IsDuplicate can be used to compare by (owner, type, rdata) alone. // Required by RFC 2136 §2.5.4's "TTL and CLASS are flags, not match // criteria" semantics. func normalizeForCompare(rr dns.RR) dns.RR { n := dns.Copy(rr) n.Header().Ttl = 0 n.Header().Class = dns.ClassINET return n } // serialCounterMul is the multiplier between the date prefix and the // counter in our SOA-serial encoding. The format is YYMMDD*10000 + NNNN, // giving 10000 bumps/day (NNNN ∈ [0001, 9999]). The 2-digit year keeps // the maximum within uint32 (the RFC 1035 ceiling for SOA serials): for // 2026-05-22, max serial 2,605,229,999 is well below 2^32-1=4,294,967,295. // A 4-digit year (e.g., 20260522*10000) would overflow uint32. const serialCounterMul = 10000 // bumpSerial advances the SOA's serial in CalVer YYMMDD*10000+NNNN form. // // Behaviour: // - If cur encodes today (or a future-encoded date from a prior NNNN // rollover), increment NNNN. On NNNN=9999, roll forward to the next // encoded day with NNNN=0001. The encoded date drifts ahead of wall // time during heavy churn and catches back up on quiet days; serial // numbers stay strictly monotonic, which is the only DNS hard // requirement. // - Otherwise (older serial; including legacy YYYYMMDDNN-format serials // left over from before this format change), jump to today*10000+1. // Legacy serials migrate automatically here: a value like 2026052299 // (~2.026B) is numerically smaller than today's new-format minimum // 2605220001 (~2.605B), so it falls to this branch and gets rewritten // in-place. The new value is strictly greater, so AXFR receivers (HE // et al.) treat it as a normal forward bump and pull cleanly. // // The SOA is found by type (there should be exactly one); mutated in // place. The returned slice is the same slice with the SOA's serial // updated. func bumpSerial(rrs []dns.RR, now time.Time) error { var soa *dns.SOA for _, rr := range rrs { if s, ok := rr.(*dns.SOA); ok { soa = s break } } if soa == nil { return fmt.Errorf("zone has no SOA record") } today := now.UTC().Format("060102") // YYMMDD cur := fmt.Sprintf("%010d", soa.Serial) // Try the new-format read: cur[:6] is YYMMDD, cur[6:10] is NNNN. // We only honour this when the encoded date is today or later — an // older encoded date means a normal new-day reset (or a legacy // serial that happens to look like a valid YYMMDD prefix but is in // the past, which is the same handling: jump to today). if curDate := cur[:6]; isValidYYMMDD(curDate) && curDate >= today { nnnn := int(mustParseUint(cur[6:10])) if nnnn < 9999 { soa.Serial = uint32(mustParseUint(curDate)*serialCounterMul + uint64(nnnn+1)) return nil } // NNNN=9999: roll to next encoded day, NNNN=0001. d, err := time.Parse("060102", curDate) if err != nil { return fmt.Errorf("serial date %q unparseable: %w", curDate, err) } next := d.AddDate(0, 0, 1).Format("060102") soa.Serial = uint32(mustParseUint(next)*serialCounterMul + 1) return nil } // Older or unparseable: jump to today*10000+1. Migration path for // legacy YYYYMMDDNN serials lives here. candidate := uint32(mustParseUint(today)*serialCounterMul + 1) // H5 — explicit MaxUint32 guard. Plain `>` comparison is correct in // practice (we'd never wrap during the zone's lifetime: 10000 // bumps/day × 365 days × ~117 years = ~427M, well under 2^32). The // real failure mode we must prevent is wrap-to-0: if soa.Serial // somehow reached MaxUint32 (hand-edit, fuzz, or a future code path // we haven't written), `soa.Serial++` would wrap to 0, and // downstream secondaries per RFC 1982 treat 0-after-MaxUint32 as // "older" — they refuse to AXFR, and the zone goes dark. Loud // refusal forces the operator to manually reset the serial, // instead of silently bricking the zone. if candidate <= soa.Serial { if soa.Serial == math.MaxUint32 { return fmt.Errorf("SOA serial at uint32 max (%d) — refusing to wrap to 0; operator must reset zone serial manually (see RFC 1982 §3.2)", soa.Serial) } // Defensive monotonic advance for the unusual "current serial // is already > today's new-format minimum" case (e.g., a // hand-edit set it to a far-future value). soa.Serial++ return nil } soa.Serial = candidate return nil } // isValidYYMMDD reports whether s is a 6-character YYMMDD string with a // valid month and day. Year is any 2-digit value (00-99). func isValidYYMMDD(s string) bool { if len(s) != 6 { return false } _, err := time.Parse("060102", s) return err == nil } // mustParseUint parses an all-digit string into uint64. Panics on // malformed input — the caller is responsible for passing only strings // that were validated as digit-substrings (e.g., a fixed-width slice of // a YYMMDDNNNN-formatted serial). Using strconv via this thin wrapper // keeps the panic behavior explicit while sharing stdlib's robust // parsing (Hamilton L1). func mustParseUint(s string) uint64 { n, err := strconv.ParseUint(s, 10, 64) if err != nil { // Programmer error if we ever hit this — every caller passes // digits-only strings derived from time.Format or a sliced // SOA serial. Panic so the bug surfaces in tests/CI rather // than silently producing a 0 serial. panic(fmt.Sprintf("mustParseUint(%q): %v", s, err)) } return n } // writeAtomic serializes rrs to a temp file in the same directory as // z.Path, then renames over the destination. POSIX guarantees atomic // rename on local filesystems, so a partial write can never leave a // corrupt zone file on disk. // // Format: one RR per line, tab-separated owner/TTL/class/type/rdata. // Comments and multi-line SOA formatting from the original file are // NOT preserved (v1 limitation; sophisticated comment preservation can // land in v2). A short header line is emitted with the write timestamp // and the plugin name, so it's obvious in `git log` what touched the // file. func (z *zoneFile) writeAtomic(rrs []dns.RR, now time.Time) error { dir := filepath.Dir(z.Path) tmp, err := os.CreateTemp(dir, ".rfc2136-*.zone") if err != nil { return fmt.Errorf("create temp: %w", err) } tmpPath := tmp.Name() // Best-effort cleanup if we fail before the rename. defer func() { if tmpPath != "" { _ = os.Remove(tmpPath) } }() header := fmt.Sprintf("; Auto-written by coredns-rfc2136 on %s\n; Zone: %s\n$ORIGIN %s\n", now.UTC().Format(time.RFC3339), z.Origin, z.Origin) if _, err := tmp.WriteString(header); err != nil { _ = tmp.Close() return fmt.Errorf("write header: %w", err) } for _, rr := range rrs { if _, err := tmp.WriteString(rr.String() + "\n"); err != nil { _ = tmp.Close() return fmt.Errorf("write rr: %w", err) } } if err := tmp.Sync(); err != nil { _ = tmp.Close() return fmt.Errorf("sync: %w", err) } if err := tmp.Close(); err != nil { return fmt.Errorf("close: %w", err) } if err := os.Rename(tmpPath, z.Path); err != nil { return fmt.Errorf("rename %s -> %s: %w", tmpPath, z.Path, err) } tmpPath = "" // suppress cleanup; rename consumed it return nil } // commit stages and commits the zone file via git. Runs from the // repository directory inferred from the zone file's parent. Returns // nil silently if AutoCommit is false. Returns an error if the commit // fails; the caller decides whether to roll back the file write. // // Both git invocations run under a context with a hard timeout // (gitCommandTimeout). If git hangs (NFS stall, gpg-sign prompt, // pre-commit hook waiting on stdin), we kill it rather than block the // caller's per-zone mutex indefinitely. ACME storms must not be able // to wedge the plugin via git getting stuck. func (z *zoneFile) commit(message string) error { if !z.AutoCommit { return nil } // We run git from the directory containing the zone file. git will // walk upward to find the .git dir. dir := filepath.Dir(z.Path) ctx, cancel := context.WithTimeout(context.Background(), gitCommandTimeout) defer cancel() // `git add` first; if file is already in the index, no harm done. add := exec.CommandContext(ctx, "git", "-C", dir, "add", "--", z.Path, ) if out, err := add.CombinedOutput(); err != nil { return fmt.Errorf("git add failed: %w: %s", err, strings.TrimSpace(string(out))) } commit := exec.CommandContext(ctx, "git", "-C", dir, "-c", "user.name="+z.GitAuthorName, "-c", "user.email="+z.GitAuthorEmail, "commit", "-q", "-m", message, "--", z.Path, ) if out, err := commit.CombinedOutput(); err != nil { return fmt.Errorf("git commit failed: %w: %s", err, strings.TrimSpace(string(out))) } return nil }