H3+H4 — Zone SOA invariant. After parsing, loadRRs enforces:
exactly one SOA, owned by the zone apex. Catches three failure modes
with a single guard:
- Missing SOA (H4): a malformed line earlier in the file may have
tripped miekg/dns's ZoneParser into dropping records without
reporting an error via parser.Err(). If the SOA went missing, we
refuse rather than treat the partial parse as authoritative.
- Multiple SOAs (H3): zone files with accidental duplicate SOA
records produce inconsistent zone state visible to AXFR clients.
The old code's first-match SOA-bump would silently propagate the
inconsistency. Now we refuse.
- Non-apex SOA (H3): an SOA whose owner doesn't match the zone
origin is either a parse error or a hand-edit mistake; bumping
it would leave the real apex unchanged. Now we refuse.
assertSingleApexSOA returns a descriptive error so the failure mode
is actionable from logs alone.
H5 — MaxUint32 guard in bumpSerial. The old "+1 defensive advance"
branch would wrap to 0 if soa.Serial == MaxUint32, and downstream
secondaries per RFC 1982 §3.2 treat 0-after-MaxUint32 as "older"
(they refuse to AXFR and the zone goes dark). Now we explicitly check
and refuse with a loud message; operator must reset the serial
manually. Practical reach is zero for our deployment (10000 bumps/day
× 117 years would still fit uint32) but the defensive ceiling matters
for fuzz, hand-edit, or future code-path errors.
The full RFC 1982 wraparound-aware comparison was prototyped but
removed: it broke the legacy-format migration case where a tiny
non-CalVer serial (e.g., 12345) is "more than 2^31 distant" from a
new-format serial (~2.6B), which RFC 1982 reads as "going backwards"
and would block migration. Naive `>` is correct in practice; the
MaxUint32 case is the only real failure mode worth guarding.
New tests:
- TestBumpSerial_MaxUint32_RefusesWrap
- TestLoadRRs_NoSOA_Refused
- TestLoadRRs_MultipleSOAs_Refused
- TestLoadRRs_NonApexSOA_Refused
495 lines
17 KiB
Go
495 lines
17 KiB
Go
package rfc2136
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"math"
|
||
"os"
|
||
"os/exec"
|
||
"path/filepath"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/miekg/dns"
|
||
)
|
||
|
||
// gitCommandTimeout caps any single git invocation we shell out to. If
|
||
// the process hangs (NFS stall, gpg-sign prompt, broken pre-commit hook
|
||
// waiting on stdin, etc.) we must not block the caller — which holds
|
||
// the per-zone mutex — forever. 10s is generous for a local-disk repo.
|
||
const gitCommandTimeout = 10 * time.Second
|
||
|
||
// fileSnapshot captures the file-identity fingerprint at the moment we
|
||
// last read the zone file. We compare it to the live stat just before
|
||
// writing back to detect concurrent modification (rsync push, manual
|
||
// edit, `git checkout`). If anything changed the file out from under
|
||
// us, we refuse the UPDATE — Caddy will retry, and the next loadRRs
|
||
// will see the updated state.
|
||
type fileSnapshot struct {
|
||
mtime time.Time
|
||
size int64
|
||
}
|
||
|
||
// matches returns true if `info` reports the same mtime and size we
|
||
// captured. Inode comparison would be stricter, but mtime+size is
|
||
// sufficient for the failure modes we care about (rename-over-original
|
||
// edits change both).
|
||
func (s fileSnapshot) matches(info os.FileInfo) bool {
|
||
return s.mtime.Equal(info.ModTime()) && s.size == info.Size()
|
||
}
|
||
|
||
// zoneFile is a file-backed authority for a single DNS zone. Replaces
|
||
// the Phase-1.3 in-memory recordStore.
|
||
//
|
||
// On every UPDATE, the file is read fully into memory as parsed RRs,
|
||
// the requested adds/deletes are applied to that slice, the SOA serial
|
||
// is bumped (CalVer YYYYMMDDNN style), and the file is rewritten via
|
||
// an atomic temp-file rename. CoreDNS's `auto` plugin notices the
|
||
// mtime change within its reload interval (~30s) and re-serves the
|
||
// zone. HE eventually pulls on its SOA refresh.
|
||
//
|
||
// Concurrency: per-zone mutex serializes RFC 2136 UPDATEs against
|
||
// each other and against the plugin's own reads. It does NOT protect
|
||
// against external editors (e.g. a human running an editor while the
|
||
// plugin is mid-write); that's the operator's responsibility, and
|
||
// the typical mitigation is to do manual edits when no UPDATEs are
|
||
// in flight (or just accept the rare race — the worst case is one
|
||
// lost manual edit, easily restored from git).
|
||
type zoneFile struct {
|
||
mu sync.Mutex
|
||
|
||
// Path is the absolute path to the zone file on disk.
|
||
Path string
|
||
|
||
// Origin is the canonical (lowercase, trailing dot) zone apex.
|
||
Origin string
|
||
|
||
// AutoCommit, when true, runs `git add <path> && git commit ...`
|
||
// after every successful write. Defaults to true (per the chosen
|
||
// architecture: every dynamic update should leave a git trail).
|
||
AutoCommit bool
|
||
|
||
// GitAuthorName and GitAuthorEmail are passed to `git commit`
|
||
// via -c user.name and -c user.email so the commits are
|
||
// attributable without depending on the system git config.
|
||
GitAuthorName string
|
||
GitAuthorEmail string
|
||
}
|
||
|
||
// canon normalises a DNS name to the store's internal form: lowercase
|
||
// with trailing dot. miekg/dns sometimes hands us names without the
|
||
// trailing dot; passing through this once at the boundary keeps every
|
||
// lookup, every comparison consistent.
|
||
func canon(name string) string {
|
||
return strings.ToLower(dns.Fqdn(name))
|
||
}
|
||
|
||
// openZoneFile prepares a zoneFile handle. Does NOT read or parse the
|
||
// file; that happens lazily in each operation (so the file's content
|
||
// is always fresh and we never serve a stale snapshot).
|
||
func openZoneFile(path, origin string) *zoneFile {
|
||
return &zoneFile{
|
||
Path: path,
|
||
Origin: canon(origin),
|
||
AutoCommit: true,
|
||
GitAuthorName: "coredns-rfc2136",
|
||
GitAuthorEmail: "rfc2136@coredns",
|
||
}
|
||
}
|
||
|
||
// loadRRs reads the zone file and parses it into an RR slice via
|
||
// miekg/dns's zone parser. The parser handles $ORIGIN, $TTL, multi-line
|
||
// SOA, comments, includes, etc.
|
||
//
|
||
// Returns (rrs, snapshot, error). The snapshot fingerprints the file
|
||
// identity at read time so a subsequent writeIfUnchanged can detect
|
||
// concurrent modification.
|
||
//
|
||
// Hamilton H4 — strict-parse validation: a single malformed line could
|
||
// otherwise produce a partial parse where parser.Err() returns nil but
|
||
// some records silently went missing. To catch this, we enforce a
|
||
// post-parse invariant: exactly one SOA RR, and that SOA's name equals
|
||
// the configured zone origin. A zone file that's been partially eaten
|
||
// by the parser usually loses its SOA along the way — checking SOA
|
||
// presence catches both H4 (silent truncation) and H3 (multi-SOA or
|
||
// wrong-apex SOA) with a single guard.
|
||
func (z *zoneFile) loadRRs() ([]dns.RR, fileSnapshot, error) {
|
||
f, err := os.Open(z.Path)
|
||
if err != nil {
|
||
return nil, fileSnapshot{}, fmt.Errorf("open %s: %w", z.Path, err)
|
||
}
|
||
defer f.Close()
|
||
|
||
info, err := f.Stat()
|
||
if err != nil {
|
||
return nil, fileSnapshot{}, fmt.Errorf("stat %s: %w", z.Path, err)
|
||
}
|
||
snap := fileSnapshot{mtime: info.ModTime(), size: info.Size()}
|
||
|
||
parser := dns.NewZoneParser(f, z.Origin, z.Path)
|
||
parser.SetDefaultTTL(3600)
|
||
|
||
var rrs []dns.RR
|
||
for rr, ok := parser.Next(); ok; rr, ok = parser.Next() {
|
||
rrs = append(rrs, rr)
|
||
}
|
||
if err := parser.Err(); err != nil {
|
||
return nil, snap, fmt.Errorf("parse %s: %w", z.Path, err)
|
||
}
|
||
if len(rrs) == 0 {
|
||
return nil, snap, fmt.Errorf("%s: zero RRs parsed", z.Path)
|
||
}
|
||
|
||
// H3/H4 invariant: exactly one SOA, anchored at the zone origin.
|
||
// Refuse to operate on a zone file whose SOA structure is wrong —
|
||
// any subsequent bumpSerial or write would compound the damage.
|
||
if err := assertSingleApexSOA(rrs, z.Origin); err != nil {
|
||
return nil, snap, fmt.Errorf("zone %s integrity check failed: %w", z.Path, err)
|
||
}
|
||
|
||
return rrs, snap, nil
|
||
}
|
||
|
||
// assertSingleApexSOA enforces that rrs contains exactly one SOA and
|
||
// that its owner matches the zone origin. Returns an error otherwise.
|
||
// This is the H3+H4 zone-integrity invariant.
|
||
func assertSingleApexSOA(rrs []dns.RR, origin string) error {
|
||
origin = canon(origin)
|
||
var soas []*dns.SOA
|
||
for _, rr := range rrs {
|
||
if s, ok := rr.(*dns.SOA); ok {
|
||
soas = append(soas, s)
|
||
}
|
||
}
|
||
switch len(soas) {
|
||
case 0:
|
||
return fmt.Errorf("no SOA record found (expected one at %q)", origin)
|
||
case 1:
|
||
if canon(soas[0].Hdr.Name) != origin {
|
||
return fmt.Errorf("SOA owner is %q, expected zone apex %q", soas[0].Hdr.Name, origin)
|
||
}
|
||
return nil
|
||
default:
|
||
names := make([]string, len(soas))
|
||
for i, s := range soas {
|
||
names[i] = s.Hdr.Name
|
||
}
|
||
return fmt.Errorf("multiple SOA records found (%d): %s", len(soas), strings.Join(names, ", "))
|
||
}
|
||
}
|
||
|
||
// checkUnchanged returns nil if the on-disk file still matches the
|
||
// captured snapshot. If the file has been modified (mtime or size
|
||
// differs), returns an error — the caller should refuse the UPDATE
|
||
// rather than clobber the external change.
|
||
func (z *zoneFile) checkUnchanged(snap fileSnapshot) error {
|
||
info, err := os.Stat(z.Path)
|
||
if err != nil {
|
||
return fmt.Errorf("stat %s: %w", z.Path, err)
|
||
}
|
||
if !snap.matches(info) {
|
||
return fmt.Errorf("concurrent modification detected on %s: mtime %s/%s size %d/%d",
|
||
z.Path, snap.mtime.Format(time.RFC3339Nano), info.ModTime().Format(time.RFC3339Nano),
|
||
snap.size, info.Size())
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// Lookup returns RRs in `rrs` matching (name, rtype). Both name and
|
||
// the RR header names are canonicalised for the comparison. Pass-by-
|
||
// slice rather than holding state means we can let the caller batch
|
||
// multiple operations against one snapshot of the file.
|
||
func lookupIn(rrs []dns.RR, name string, rtype uint16) []dns.RR {
|
||
name = canon(name)
|
||
var out []dns.RR
|
||
for _, rr := range rrs {
|
||
hdr := rr.Header()
|
||
if canon(hdr.Name) == name && hdr.Rrtype == rtype {
|
||
out = append(out, rr)
|
||
}
|
||
}
|
||
return out
|
||
}
|
||
|
||
// nameExistsIn reports whether any RR's owner equals name (canonical).
|
||
func nameExistsIn(rrs []dns.RR, name string) bool {
|
||
name = canon(name)
|
||
for _, rr := range rrs {
|
||
if canon(rr.Header().Name) == name {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
// removeRRsetFrom returns rrs minus every RR matching (name, rtype).
|
||
func removeRRsetFrom(rrs []dns.RR, name string, rtype uint16) []dns.RR {
|
||
name = canon(name)
|
||
out := rrs[:0:0]
|
||
for _, rr := range rrs {
|
||
hdr := rr.Header()
|
||
if canon(hdr.Name) == name && hdr.Rrtype == rtype {
|
||
continue
|
||
}
|
||
out = append(out, rr)
|
||
}
|
||
return out
|
||
}
|
||
|
||
// removeNameFrom returns rrs minus every RR with the given owner name.
|
||
func removeNameFrom(rrs []dns.RR, name string) []dns.RR {
|
||
name = canon(name)
|
||
out := rrs[:0:0]
|
||
for _, rr := range rrs {
|
||
if canon(rr.Header().Name) == name {
|
||
continue
|
||
}
|
||
out = append(out, rr)
|
||
}
|
||
return out
|
||
}
|
||
|
||
// removeRRFrom returns rrs minus the single RR matching the given one
|
||
// by owner + type + rdata. String() comparison covers rdata exactness.
|
||
func removeRRFrom(rrs []dns.RR, target dns.RR) []dns.RR {
|
||
targetStr := target.String()
|
||
out := rrs[:0:0]
|
||
matched := false
|
||
for _, rr := range rrs {
|
||
if !matched && rr.String() == targetStr {
|
||
matched = true
|
||
continue
|
||
}
|
||
out = append(out, rr)
|
||
}
|
||
return out
|
||
}
|
||
|
||
// addRRTo appends rr to rrs unless an identical RR already exists
|
||
// (de-dupe semantics per RFC 2136 §3.4.2.2).
|
||
func addRRTo(rrs []dns.RR, rr dns.RR) []dns.RR {
|
||
target := rr.String()
|
||
for _, existing := range rrs {
|
||
if existing.String() == target {
|
||
return rrs
|
||
}
|
||
}
|
||
return append(rrs, rr)
|
||
}
|
||
|
||
// serialCounterMul is the multiplier between the date prefix and the
|
||
// counter in our SOA-serial encoding. The format is YYMMDD*10000 + NNNN,
|
||
// giving 10000 bumps/day (NNNN ∈ [0001, 9999]). The 2-digit year keeps
|
||
// the maximum within uint32 (the RFC 1035 ceiling for SOA serials): for
|
||
// 2026-05-22, max serial 2,605,229,999 is well below 2^32-1=4,294,967,295.
|
||
// A 4-digit year (e.g., 20260522*10000) would overflow uint32.
|
||
const serialCounterMul = 10000
|
||
|
||
// bumpSerial advances the SOA's serial in CalVer YYMMDD*10000+NNNN form.
|
||
//
|
||
// Behaviour:
|
||
// - If cur encodes today (or a future-encoded date from a prior NNNN
|
||
// rollover), increment NNNN. On NNNN=9999, roll forward to the next
|
||
// encoded day with NNNN=0001. The encoded date drifts ahead of wall
|
||
// time during heavy churn and catches back up on quiet days; serial
|
||
// numbers stay strictly monotonic, which is the only DNS hard
|
||
// requirement.
|
||
// - Otherwise (older serial; including legacy YYYYMMDDNN-format serials
|
||
// left over from before this format change), jump to today*10000+1.
|
||
// Legacy serials migrate automatically here: a value like 2026052299
|
||
// (~2.026B) is numerically smaller than today's new-format minimum
|
||
// 2605220001 (~2.605B), so it falls to this branch and gets rewritten
|
||
// in-place. The new value is strictly greater, so AXFR receivers (HE
|
||
// et al.) treat it as a normal forward bump and pull cleanly.
|
||
//
|
||
// The SOA is found by type (there should be exactly one); mutated in
|
||
// place. The returned slice is the same slice with the SOA's serial
|
||
// updated.
|
||
func bumpSerial(rrs []dns.RR, now time.Time) error {
|
||
var soa *dns.SOA
|
||
for _, rr := range rrs {
|
||
if s, ok := rr.(*dns.SOA); ok {
|
||
soa = s
|
||
break
|
||
}
|
||
}
|
||
if soa == nil {
|
||
return fmt.Errorf("zone has no SOA record")
|
||
}
|
||
|
||
today := now.UTC().Format("060102") // YYMMDD
|
||
cur := fmt.Sprintf("%010d", soa.Serial)
|
||
|
||
// Try the new-format read: cur[:6] is YYMMDD, cur[6:10] is NNNN.
|
||
// We only honour this when the encoded date is today or later — an
|
||
// older encoded date means a normal new-day reset (or a legacy
|
||
// serial that happens to look like a valid YYMMDD prefix but is in
|
||
// the past, which is the same handling: jump to today).
|
||
if curDate := cur[:6]; isValidYYMMDD(curDate) && curDate >= today {
|
||
nnnn := atoi(cur[6:10])
|
||
if nnnn < 9999 {
|
||
soa.Serial = uint32(parseUint(curDate)*serialCounterMul + uint64(nnnn+1))
|
||
return nil
|
||
}
|
||
// NNNN=9999: roll to next encoded day, NNNN=0001.
|
||
d, err := time.Parse("060102", curDate)
|
||
if err != nil {
|
||
return fmt.Errorf("serial date %q unparseable: %w", curDate, err)
|
||
}
|
||
next := d.AddDate(0, 0, 1).Format("060102")
|
||
soa.Serial = uint32(parseUint(next)*serialCounterMul + 1)
|
||
return nil
|
||
}
|
||
|
||
// Older or unparseable: jump to today*10000+1. Migration path for
|
||
// legacy YYYYMMDDNN serials lives here.
|
||
candidate := uint32(parseUint(today)*serialCounterMul + 1)
|
||
|
||
// H5 — explicit MaxUint32 guard. Plain `>` comparison is correct in
|
||
// practice (we'd never wrap during the zone's lifetime: 10000
|
||
// bumps/day × 365 days × ~117 years = ~427M, well under 2^32). The
|
||
// real failure mode we must prevent is wrap-to-0: if soa.Serial
|
||
// somehow reached MaxUint32 (hand-edit, fuzz, or a future code path
|
||
// we haven't written), `soa.Serial++` would wrap to 0, and
|
||
// downstream secondaries per RFC 1982 treat 0-after-MaxUint32 as
|
||
// "older" — they refuse to AXFR, and the zone goes dark. Loud
|
||
// refusal forces the operator to manually reset the serial,
|
||
// instead of silently bricking the zone.
|
||
if candidate <= soa.Serial {
|
||
if soa.Serial == math.MaxUint32 {
|
||
return fmt.Errorf("SOA serial at uint32 max (%d) — refusing to wrap to 0; operator must reset zone serial manually (see RFC 1982 §3.2)", soa.Serial)
|
||
}
|
||
// Defensive monotonic advance for the unusual "current serial
|
||
// is already > today's new-format minimum" case (e.g., a
|
||
// hand-edit set it to a far-future value).
|
||
soa.Serial++
|
||
return nil
|
||
}
|
||
soa.Serial = candidate
|
||
return nil
|
||
}
|
||
|
||
// isValidYYMMDD reports whether s is a 6-character YYMMDD string with a
|
||
// valid month and day. Year is any 2-digit value (00-99).
|
||
func isValidYYMMDD(s string) bool {
|
||
if len(s) != 6 {
|
||
return false
|
||
}
|
||
_, err := time.Parse("060102", s)
|
||
return err == nil
|
||
}
|
||
|
||
// atoi is a tiny helper that ignores errors — only called on a
|
||
// substring we already validated is two digits.
|
||
func atoi(s string) int {
|
||
n := 0
|
||
for _, c := range s {
|
||
n = n*10 + int(c-'0')
|
||
}
|
||
return n
|
||
}
|
||
|
||
// parseUint parses an all-digits string into a uint64. Used because
|
||
// strconv.ParseUint adds error-handling overhead we don't need on
|
||
// internally-controlled inputs.
|
||
func parseUint(s string) uint64 {
|
||
var n uint64
|
||
for _, c := range s {
|
||
n = n*10 + uint64(c-'0')
|
||
}
|
||
return n
|
||
}
|
||
|
||
// writeAtomic serializes rrs to a temp file in the same directory as
|
||
// z.Path, then renames over the destination. POSIX guarantees atomic
|
||
// rename on local filesystems, so a partial write can never leave a
|
||
// corrupt zone file on disk.
|
||
//
|
||
// Format: one RR per line, tab-separated owner/TTL/class/type/rdata.
|
||
// Comments and multi-line SOA formatting from the original file are
|
||
// NOT preserved (v1 limitation; sophisticated comment preservation can
|
||
// land in v2). A short header line is emitted with the write timestamp
|
||
// and the plugin name, so it's obvious in `git log` what touched the
|
||
// file.
|
||
func (z *zoneFile) writeAtomic(rrs []dns.RR, now time.Time) error {
|
||
dir := filepath.Dir(z.Path)
|
||
tmp, err := os.CreateTemp(dir, ".rfc2136-*.zone")
|
||
if err != nil {
|
||
return fmt.Errorf("create temp: %w", err)
|
||
}
|
||
tmpPath := tmp.Name()
|
||
// Best-effort cleanup if we fail before the rename.
|
||
defer func() {
|
||
if tmpPath != "" {
|
||
_ = os.Remove(tmpPath)
|
||
}
|
||
}()
|
||
|
||
header := fmt.Sprintf("; Auto-written by coredns-rfc2136 on %s\n; Zone: %s\n$ORIGIN %s\n",
|
||
now.UTC().Format(time.RFC3339), z.Origin, z.Origin)
|
||
if _, err := tmp.WriteString(header); err != nil {
|
||
_ = tmp.Close()
|
||
return fmt.Errorf("write header: %w", err)
|
||
}
|
||
|
||
for _, rr := range rrs {
|
||
if _, err := tmp.WriteString(rr.String() + "\n"); err != nil {
|
||
_ = tmp.Close()
|
||
return fmt.Errorf("write rr: %w", err)
|
||
}
|
||
}
|
||
|
||
if err := tmp.Sync(); err != nil {
|
||
_ = tmp.Close()
|
||
return fmt.Errorf("sync: %w", err)
|
||
}
|
||
if err := tmp.Close(); err != nil {
|
||
return fmt.Errorf("close: %w", err)
|
||
}
|
||
if err := os.Rename(tmpPath, z.Path); err != nil {
|
||
return fmt.Errorf("rename %s -> %s: %w", tmpPath, z.Path, err)
|
||
}
|
||
tmpPath = "" // suppress cleanup; rename consumed it
|
||
return nil
|
||
}
|
||
|
||
// commit stages and commits the zone file via git. Runs from the
|
||
// repository directory inferred from the zone file's parent. Returns
|
||
// nil silently if AutoCommit is false. Returns an error if the commit
|
||
// fails; the caller decides whether to roll back the file write.
|
||
//
|
||
// Both git invocations run under a context with a hard timeout
|
||
// (gitCommandTimeout). If git hangs (NFS stall, gpg-sign prompt,
|
||
// pre-commit hook waiting on stdin), we kill it rather than block the
|
||
// caller's per-zone mutex indefinitely. ACME storms must not be able
|
||
// to wedge the plugin via git getting stuck.
|
||
func (z *zoneFile) commit(message string) error {
|
||
if !z.AutoCommit {
|
||
return nil
|
||
}
|
||
// We run git from the directory containing the zone file. git will
|
||
// walk upward to find the .git dir.
|
||
dir := filepath.Dir(z.Path)
|
||
ctx, cancel := context.WithTimeout(context.Background(), gitCommandTimeout)
|
||
defer cancel()
|
||
// `git add` first; if file is already in the index, no harm done.
|
||
add := exec.CommandContext(ctx, "git",
|
||
"-C", dir,
|
||
"add", "--", z.Path,
|
||
)
|
||
if out, err := add.CombinedOutput(); err != nil {
|
||
return fmt.Errorf("git add failed: %w: %s", err, strings.TrimSpace(string(out)))
|
||
}
|
||
commit := exec.CommandContext(ctx, "git",
|
||
"-C", dir,
|
||
"-c", "user.name="+z.GitAuthorName,
|
||
"-c", "user.email="+z.GitAuthorEmail,
|
||
"commit", "-q", "-m", message, "--", z.Path,
|
||
)
|
||
if out, err := commit.CombinedOutput(); err != nil {
|
||
return fmt.Errorf("git commit failed: %w: %s", err, strings.TrimSpace(string(out)))
|
||
}
|
||
return nil
|
||
}
|