coredns-rfc2136/zonefile.go
Ryan Malloy 8466f08780 Widen SOA serial counter: YYMMDDNNNN, 10000 bumps/day
The previous YYYYMMDDNN encoding capped at NN=99 (100 bumps/day) and
hard-failed UPDATEs once the day's counter was exhausted — confirmed
in production on 2026-05-22 when ACME activity across the supported.
systems zone hit the cap and SERVFAILed every subsequent UPDATE.

New format: YYMMDD*10000+NNNN. With 4-digit NNNN we get 10000/day, and
dropping the century keeps a 2026-dated serial (2,605,229,999 max) under
uint32's 4,294,967,295 ceiling. A 4-digit year (e.g., 20260522*10000)
would overflow uint32 — RFC 1035's SOA serial type bounds this.

Three behavior changes:

1. On NNNN=9999, roll forward to the next encoded day with NNNN=0001
   rather than erroring. The encoded date drifts ahead of wall time on
   heavy churn days and catches up on quiet days; monotonic ordering
   (the only DNS requirement) holds.

2. Future-encoded serials (from a prior rollover) are honoured — the
   previous "older date" branch downgraded them back to today*100+1,
   producing a backwards serial. This bug also tripped a manual
   workaround on the same day. Now: future encoded dates bump their
   own NNNN.

3. Legacy YYYYMMDDNN serials migrate automatically on first bump. A
   value like 2026052299 (~2.026B) is numerically smaller than today's
   new-format minimum 2605220001 (~2.605B), so the older-or-unparseable
   branch fires and rewrites in place. New > old, so AXFR receivers
   treat it as a clean forward bump.

Tests cover same-day, rollover, future-encoded no-regress, legacy
migration, non-CalVer reset, and no-SOA error.
2026-05-22 11:51:45 -06:00

374 lines
12 KiB
Go

package rfc2136
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"github.com/miekg/dns"
)
// zoneFile is a file-backed authority for a single DNS zone. Replaces
// the Phase-1.3 in-memory recordStore.
//
// On every UPDATE, the file is read fully into memory as parsed RRs,
// the requested adds/deletes are applied to that slice, the SOA serial
// is bumped (CalVer YYYYMMDDNN style), and the file is rewritten via
// an atomic temp-file rename. CoreDNS's `auto` plugin notices the
// mtime change within its reload interval (~30s) and re-serves the
// zone. HE eventually pulls on its SOA refresh.
//
// Concurrency: per-zone mutex serializes RFC 2136 UPDATEs against
// each other and against the plugin's own reads. It does NOT protect
// against external editors (e.g. a human running an editor while the
// plugin is mid-write); that's the operator's responsibility, and
// the typical mitigation is to do manual edits when no UPDATEs are
// in flight (or just accept the rare race — the worst case is one
// lost manual edit, easily restored from git).
type zoneFile struct {
mu sync.Mutex
// Path is the absolute path to the zone file on disk.
Path string
// Origin is the canonical (lowercase, trailing dot) zone apex.
Origin string
// AutoCommit, when true, runs `git add <path> && git commit ...`
// after every successful write. Defaults to true (per the chosen
// architecture: every dynamic update should leave a git trail).
AutoCommit bool
// GitAuthorName and GitAuthorEmail are passed to `git commit`
// via -c user.name and -c user.email so the commits are
// attributable without depending on the system git config.
GitAuthorName string
GitAuthorEmail string
}
// canon normalises a DNS name to the store's internal form: lowercase
// with trailing dot. miekg/dns sometimes hands us names without the
// trailing dot; passing through this once at the boundary keeps every
// lookup, every comparison consistent.
func canon(name string) string {
return strings.ToLower(dns.Fqdn(name))
}
// openZoneFile prepares a zoneFile handle. Does NOT read or parse the
// file; that happens lazily in each operation (so the file's content
// is always fresh and we never serve a stale snapshot).
func openZoneFile(path, origin string) *zoneFile {
return &zoneFile{
Path: path,
Origin: canon(origin),
AutoCommit: true,
GitAuthorName: "coredns-rfc2136",
GitAuthorEmail: "rfc2136@coredns",
}
}
// loadRRs reads the zone file and parses it into an RR slice via
// miekg/dns's zone parser. The parser handles $ORIGIN, $TTL, multi-line
// SOA, comments, includes, etc.
func (z *zoneFile) loadRRs() ([]dns.RR, error) {
f, err := os.Open(z.Path)
if err != nil {
return nil, fmt.Errorf("open %s: %w", z.Path, err)
}
defer f.Close()
parser := dns.NewZoneParser(f, z.Origin, z.Path)
parser.SetDefaultTTL(3600)
var rrs []dns.RR
for rr, ok := parser.Next(); ok; rr, ok = parser.Next() {
rrs = append(rrs, rr)
}
if err := parser.Err(); err != nil {
return nil, fmt.Errorf("parse %s: %w", z.Path, err)
}
if len(rrs) == 0 {
return nil, fmt.Errorf("%s: zero RRs parsed", z.Path)
}
return rrs, nil
}
// Lookup returns RRs in `rrs` matching (name, rtype). Both name and
// the RR header names are canonicalised for the comparison. Pass-by-
// slice rather than holding state means we can let the caller batch
// multiple operations against one snapshot of the file.
func lookupIn(rrs []dns.RR, name string, rtype uint16) []dns.RR {
name = canon(name)
var out []dns.RR
for _, rr := range rrs {
hdr := rr.Header()
if canon(hdr.Name) == name && hdr.Rrtype == rtype {
out = append(out, rr)
}
}
return out
}
// nameExistsIn reports whether any RR's owner equals name (canonical).
func nameExistsIn(rrs []dns.RR, name string) bool {
name = canon(name)
for _, rr := range rrs {
if canon(rr.Header().Name) == name {
return true
}
}
return false
}
// removeRRsetFrom returns rrs minus every RR matching (name, rtype).
func removeRRsetFrom(rrs []dns.RR, name string, rtype uint16) []dns.RR {
name = canon(name)
out := rrs[:0:0]
for _, rr := range rrs {
hdr := rr.Header()
if canon(hdr.Name) == name && hdr.Rrtype == rtype {
continue
}
out = append(out, rr)
}
return out
}
// removeNameFrom returns rrs minus every RR with the given owner name.
func removeNameFrom(rrs []dns.RR, name string) []dns.RR {
name = canon(name)
out := rrs[:0:0]
for _, rr := range rrs {
if canon(rr.Header().Name) == name {
continue
}
out = append(out, rr)
}
return out
}
// removeRRFrom returns rrs minus the single RR matching the given one
// by owner + type + rdata. String() comparison covers rdata exactness.
func removeRRFrom(rrs []dns.RR, target dns.RR) []dns.RR {
targetStr := target.String()
out := rrs[:0:0]
matched := false
for _, rr := range rrs {
if !matched && rr.String() == targetStr {
matched = true
continue
}
out = append(out, rr)
}
return out
}
// addRRTo appends rr to rrs unless an identical RR already exists
// (de-dupe semantics per RFC 2136 §3.4.2.2).
func addRRTo(rrs []dns.RR, rr dns.RR) []dns.RR {
target := rr.String()
for _, existing := range rrs {
if existing.String() == target {
return rrs
}
}
return append(rrs, rr)
}
// serialCounterMul is the multiplier between the date prefix and the
// counter in our SOA-serial encoding. The format is YYMMDD*10000 + NNNN,
// giving 10000 bumps/day (NNNN ∈ [0001, 9999]). The 2-digit year keeps
// the maximum within uint32 (the RFC 1035 ceiling for SOA serials): for
// 2026-05-22, max serial 2,605,229,999 is well below 2^32-1=4,294,967,295.
// A 4-digit year (e.g., 20260522*10000) would overflow uint32.
const serialCounterMul = 10000
// bumpSerial advances the SOA's serial in CalVer YYMMDD*10000+NNNN form.
//
// Behaviour:
// - If cur encodes today (or a future-encoded date from a prior NNNN
// rollover), increment NNNN. On NNNN=9999, roll forward to the next
// encoded day with NNNN=0001. The encoded date drifts ahead of wall
// time during heavy churn and catches back up on quiet days; serial
// numbers stay strictly monotonic, which is the only DNS hard
// requirement.
// - Otherwise (older serial; including legacy YYYYMMDDNN-format serials
// left over from before this format change), jump to today*10000+1.
// Legacy serials migrate automatically here: a value like 2026052299
// (~2.026B) is numerically smaller than today's new-format minimum
// 2605220001 (~2.605B), so it falls to this branch and gets rewritten
// in-place. The new value is strictly greater, so AXFR receivers (HE
// et al.) treat it as a normal forward bump and pull cleanly.
//
// The SOA is found by type (there should be exactly one); mutated in
// place. The returned slice is the same slice with the SOA's serial
// updated.
func bumpSerial(rrs []dns.RR, now time.Time) error {
var soa *dns.SOA
for _, rr := range rrs {
if s, ok := rr.(*dns.SOA); ok {
soa = s
break
}
}
if soa == nil {
return fmt.Errorf("zone has no SOA record")
}
today := now.UTC().Format("060102") // YYMMDD
cur := fmt.Sprintf("%010d", soa.Serial)
// Try the new-format read: cur[:6] is YYMMDD, cur[6:10] is NNNN.
// We only honour this when the encoded date is today or later — an
// older encoded date means a normal new-day reset (or a legacy
// serial that happens to look like a valid YYMMDD prefix but is in
// the past, which is the same handling: jump to today).
if curDate := cur[:6]; isValidYYMMDD(curDate) && curDate >= today {
nnnn := atoi(cur[6:10])
if nnnn < 9999 {
soa.Serial = uint32(parseUint(curDate)*serialCounterMul + uint64(nnnn+1))
return nil
}
// NNNN=9999: roll to next encoded day, NNNN=0001.
d, err := time.Parse("060102", curDate)
if err != nil {
return fmt.Errorf("serial date %q unparseable: %w", curDate, err)
}
next := d.AddDate(0, 0, 1).Format("060102")
soa.Serial = uint32(parseUint(next)*serialCounterMul + 1)
return nil
}
// Older or unparseable: jump to today*10000+1. Migration path for
// legacy YYYYMMDDNN serials lives here.
candidate := uint32(parseUint(today)*serialCounterMul + 1)
if candidate <= soa.Serial {
// Defensive: don't regress. If something has somehow
// provisioned a serial >= today's new-format candidate (e.g.,
// far-future serial from a hand-edit), just +1 to advance.
soa.Serial++
return nil
}
soa.Serial = candidate
return nil
}
// isValidYYMMDD reports whether s is a 6-character YYMMDD string with a
// valid month and day. Year is any 2-digit value (00-99).
func isValidYYMMDD(s string) bool {
if len(s) != 6 {
return false
}
_, err := time.Parse("060102", s)
return err == nil
}
// atoi is a tiny helper that ignores errors — only called on a
// substring we already validated is two digits.
func atoi(s string) int {
n := 0
for _, c := range s {
n = n*10 + int(c-'0')
}
return n
}
// parseUint parses an all-digits string into a uint64. Used because
// strconv.ParseUint adds error-handling overhead we don't need on
// internally-controlled inputs.
func parseUint(s string) uint64 {
var n uint64
for _, c := range s {
n = n*10 + uint64(c-'0')
}
return n
}
// writeAtomic serializes rrs to a temp file in the same directory as
// z.Path, then renames over the destination. POSIX guarantees atomic
// rename on local filesystems, so a partial write can never leave a
// corrupt zone file on disk.
//
// Format: one RR per line, tab-separated owner/TTL/class/type/rdata.
// Comments and multi-line SOA formatting from the original file are
// NOT preserved (v1 limitation; sophisticated comment preservation can
// land in v2). A short header line is emitted with the write timestamp
// and the plugin name, so it's obvious in `git log` what touched the
// file.
func (z *zoneFile) writeAtomic(rrs []dns.RR, now time.Time) error {
dir := filepath.Dir(z.Path)
tmp, err := os.CreateTemp(dir, ".rfc2136-*.zone")
if err != nil {
return fmt.Errorf("create temp: %w", err)
}
tmpPath := tmp.Name()
// Best-effort cleanup if we fail before the rename.
defer func() {
if tmpPath != "" {
_ = os.Remove(tmpPath)
}
}()
header := fmt.Sprintf("; Auto-written by coredns-rfc2136 on %s\n; Zone: %s\n$ORIGIN %s\n",
now.UTC().Format(time.RFC3339), z.Origin, z.Origin)
if _, err := tmp.WriteString(header); err != nil {
_ = tmp.Close()
return fmt.Errorf("write header: %w", err)
}
for _, rr := range rrs {
if _, err := tmp.WriteString(rr.String() + "\n"); err != nil {
_ = tmp.Close()
return fmt.Errorf("write rr: %w", err)
}
}
if err := tmp.Sync(); err != nil {
_ = tmp.Close()
return fmt.Errorf("sync: %w", err)
}
if err := tmp.Close(); err != nil {
return fmt.Errorf("close: %w", err)
}
if err := os.Rename(tmpPath, z.Path); err != nil {
return fmt.Errorf("rename %s -> %s: %w", tmpPath, z.Path, err)
}
tmpPath = "" // suppress cleanup; rename consumed it
return nil
}
// commit stages and commits the zone file via git. Runs from the
// repository directory inferred from the zone file's parent. Returns
// nil silently if AutoCommit is false. Returns an error if the commit
// fails; the caller decides whether to roll back the file write.
func (z *zoneFile) commit(message string) error {
if !z.AutoCommit {
return nil
}
// We run git from the directory containing the zone file. git will
// walk upward to find the .git dir.
dir := filepath.Dir(z.Path)
// `git add` first; if file is already in the index, no harm done.
add := exec.Command("git",
"-C", dir,
"add", "--", z.Path,
)
if out, err := add.CombinedOutput(); err != nil {
return fmt.Errorf("git add failed: %w: %s", err, strings.TrimSpace(string(out)))
}
commit := exec.Command("git",
"-C", dir,
"-c", "user.name="+z.GitAuthorName,
"-c", "user.email="+z.GitAuthorEmail,
"commit", "-q", "-m", message, "--", z.Path,
)
if out, err := commit.CombinedOutput(); err != nil {
return fmt.Errorf("git commit failed: %w: %s", err, strings.TrimSpace(string(out)))
}
return nil
}