H1 — Concurrent-modification detection. loadRRs now returns a fileSnapshot capturing (mtime, size) at read time. handleUpdate calls zf.checkUnchanged(snap) immediately before writeAtomic. If anything modified the file between load and write — rsync push, manual edit, `git checkout` — the UPDATE is refused with SERVFAIL. Caddy retries with a fresh load. Protects against the CLAUDE.md-documented rsync workflow racing the plugin. H2 — Git commit-failure policy. The previous code logged at WARN and continued, breaking the documented "file + git both updated" contract. Now logs at ERROR with structured fields (zone, path, error, recovery command) so operators discover the divergence. We do NOT roll back the file write: by the time the commit fails, the auto plugin may have already noticed the new mtime and reloaded; rolling back creates more races than it solves. Recovery is `git -C <dir> status` + manual commit. M1 — exec.CommandContext with 10s timeout on git invocations. If git hangs (NFS stall, gpg-sign prompt, broken pre-commit hook waiting on stdin), the per-zone mutex would otherwise be held forever and queue all subsequent UPDATEs. gitCommandTimeout caps the hang. M2 deferred. Dropping the separate `git add` cleanly requires either `-a` (wrong scope: auto-stages all tracked modifications) or `--include` (still needs prior staging). The race window between add and commit is theoretical for our setup (single-writer plugin + occasional `git status`). M1's timeout already mitigates the worst hang case. New tests: - TestZoneFile_CheckUnchanged_DetectsExternalModification (H1)
435 lines
14 KiB
Go
435 lines
14 KiB
Go
package rfc2136
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/miekg/dns"
|
|
)
|
|
|
|
// gitCommandTimeout caps any single git invocation we shell out to. If
|
|
// the process hangs (NFS stall, gpg-sign prompt, broken pre-commit hook
|
|
// waiting on stdin, etc.) we must not block the caller — which holds
|
|
// the per-zone mutex — forever. 10s is generous for a local-disk repo.
|
|
const gitCommandTimeout = 10 * time.Second
|
|
|
|
// fileSnapshot captures the file-identity fingerprint at the moment we
|
|
// last read the zone file. We compare it to the live stat just before
|
|
// writing back to detect concurrent modification (rsync push, manual
|
|
// edit, `git checkout`). If anything changed the file out from under
|
|
// us, we refuse the UPDATE — Caddy will retry, and the next loadRRs
|
|
// will see the updated state.
|
|
type fileSnapshot struct {
|
|
mtime time.Time
|
|
size int64
|
|
}
|
|
|
|
// matches returns true if `info` reports the same mtime and size we
|
|
// captured. Inode comparison would be stricter, but mtime+size is
|
|
// sufficient for the failure modes we care about (rename-over-original
|
|
// edits change both).
|
|
func (s fileSnapshot) matches(info os.FileInfo) bool {
|
|
return s.mtime.Equal(info.ModTime()) && s.size == info.Size()
|
|
}
|
|
|
|
// zoneFile is a file-backed authority for a single DNS zone. Replaces
|
|
// the Phase-1.3 in-memory recordStore.
|
|
//
|
|
// On every UPDATE, the file is read fully into memory as parsed RRs,
|
|
// the requested adds/deletes are applied to that slice, the SOA serial
|
|
// is bumped (CalVer YYYYMMDDNN style), and the file is rewritten via
|
|
// an atomic temp-file rename. CoreDNS's `auto` plugin notices the
|
|
// mtime change within its reload interval (~30s) and re-serves the
|
|
// zone. HE eventually pulls on its SOA refresh.
|
|
//
|
|
// Concurrency: per-zone mutex serializes RFC 2136 UPDATEs against
|
|
// each other and against the plugin's own reads. It does NOT protect
|
|
// against external editors (e.g. a human running an editor while the
|
|
// plugin is mid-write); that's the operator's responsibility, and
|
|
// the typical mitigation is to do manual edits when no UPDATEs are
|
|
// in flight (or just accept the rare race — the worst case is one
|
|
// lost manual edit, easily restored from git).
|
|
type zoneFile struct {
|
|
mu sync.Mutex
|
|
|
|
// Path is the absolute path to the zone file on disk.
|
|
Path string
|
|
|
|
// Origin is the canonical (lowercase, trailing dot) zone apex.
|
|
Origin string
|
|
|
|
// AutoCommit, when true, runs `git add <path> && git commit ...`
|
|
// after every successful write. Defaults to true (per the chosen
|
|
// architecture: every dynamic update should leave a git trail).
|
|
AutoCommit bool
|
|
|
|
// GitAuthorName and GitAuthorEmail are passed to `git commit`
|
|
// via -c user.name and -c user.email so the commits are
|
|
// attributable without depending on the system git config.
|
|
GitAuthorName string
|
|
GitAuthorEmail string
|
|
}
|
|
|
|
// canon normalises a DNS name to the store's internal form: lowercase
|
|
// with trailing dot. miekg/dns sometimes hands us names without the
|
|
// trailing dot; passing through this once at the boundary keeps every
|
|
// lookup, every comparison consistent.
|
|
func canon(name string) string {
|
|
return strings.ToLower(dns.Fqdn(name))
|
|
}
|
|
|
|
// openZoneFile prepares a zoneFile handle. Does NOT read or parse the
|
|
// file; that happens lazily in each operation (so the file's content
|
|
// is always fresh and we never serve a stale snapshot).
|
|
func openZoneFile(path, origin string) *zoneFile {
|
|
return &zoneFile{
|
|
Path: path,
|
|
Origin: canon(origin),
|
|
AutoCommit: true,
|
|
GitAuthorName: "coredns-rfc2136",
|
|
GitAuthorEmail: "rfc2136@coredns",
|
|
}
|
|
}
|
|
|
|
// loadRRs reads the zone file and parses it into an RR slice via
|
|
// miekg/dns's zone parser. The parser handles $ORIGIN, $TTL, multi-line
|
|
// SOA, comments, includes, etc.
|
|
//
|
|
// Returns (rrs, snapshot, error). The snapshot fingerprints the file
|
|
// identity at read time so a subsequent writeIfUnchanged can detect
|
|
// concurrent modification.
|
|
func (z *zoneFile) loadRRs() ([]dns.RR, fileSnapshot, error) {
|
|
f, err := os.Open(z.Path)
|
|
if err != nil {
|
|
return nil, fileSnapshot{}, fmt.Errorf("open %s: %w", z.Path, err)
|
|
}
|
|
defer f.Close()
|
|
|
|
info, err := f.Stat()
|
|
if err != nil {
|
|
return nil, fileSnapshot{}, fmt.Errorf("stat %s: %w", z.Path, err)
|
|
}
|
|
snap := fileSnapshot{mtime: info.ModTime(), size: info.Size()}
|
|
|
|
parser := dns.NewZoneParser(f, z.Origin, z.Path)
|
|
parser.SetDefaultTTL(3600)
|
|
|
|
var rrs []dns.RR
|
|
for rr, ok := parser.Next(); ok; rr, ok = parser.Next() {
|
|
rrs = append(rrs, rr)
|
|
}
|
|
if err := parser.Err(); err != nil {
|
|
return nil, snap, fmt.Errorf("parse %s: %w", z.Path, err)
|
|
}
|
|
if len(rrs) == 0 {
|
|
return nil, snap, fmt.Errorf("%s: zero RRs parsed", z.Path)
|
|
}
|
|
return rrs, snap, nil
|
|
}
|
|
|
|
// checkUnchanged returns nil if the on-disk file still matches the
|
|
// captured snapshot. If the file has been modified (mtime or size
|
|
// differs), returns an error — the caller should refuse the UPDATE
|
|
// rather than clobber the external change.
|
|
func (z *zoneFile) checkUnchanged(snap fileSnapshot) error {
|
|
info, err := os.Stat(z.Path)
|
|
if err != nil {
|
|
return fmt.Errorf("stat %s: %w", z.Path, err)
|
|
}
|
|
if !snap.matches(info) {
|
|
return fmt.Errorf("concurrent modification detected on %s: mtime %s/%s size %d/%d",
|
|
z.Path, snap.mtime.Format(time.RFC3339Nano), info.ModTime().Format(time.RFC3339Nano),
|
|
snap.size, info.Size())
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Lookup returns RRs in `rrs` matching (name, rtype). Both name and
|
|
// the RR header names are canonicalised for the comparison. Pass-by-
|
|
// slice rather than holding state means we can let the caller batch
|
|
// multiple operations against one snapshot of the file.
|
|
func lookupIn(rrs []dns.RR, name string, rtype uint16) []dns.RR {
|
|
name = canon(name)
|
|
var out []dns.RR
|
|
for _, rr := range rrs {
|
|
hdr := rr.Header()
|
|
if canon(hdr.Name) == name && hdr.Rrtype == rtype {
|
|
out = append(out, rr)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// nameExistsIn reports whether any RR's owner equals name (canonical).
|
|
func nameExistsIn(rrs []dns.RR, name string) bool {
|
|
name = canon(name)
|
|
for _, rr := range rrs {
|
|
if canon(rr.Header().Name) == name {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// removeRRsetFrom returns rrs minus every RR matching (name, rtype).
|
|
func removeRRsetFrom(rrs []dns.RR, name string, rtype uint16) []dns.RR {
|
|
name = canon(name)
|
|
out := rrs[:0:0]
|
|
for _, rr := range rrs {
|
|
hdr := rr.Header()
|
|
if canon(hdr.Name) == name && hdr.Rrtype == rtype {
|
|
continue
|
|
}
|
|
out = append(out, rr)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// removeNameFrom returns rrs minus every RR with the given owner name.
|
|
func removeNameFrom(rrs []dns.RR, name string) []dns.RR {
|
|
name = canon(name)
|
|
out := rrs[:0:0]
|
|
for _, rr := range rrs {
|
|
if canon(rr.Header().Name) == name {
|
|
continue
|
|
}
|
|
out = append(out, rr)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// removeRRFrom returns rrs minus the single RR matching the given one
|
|
// by owner + type + rdata. String() comparison covers rdata exactness.
|
|
func removeRRFrom(rrs []dns.RR, target dns.RR) []dns.RR {
|
|
targetStr := target.String()
|
|
out := rrs[:0:0]
|
|
matched := false
|
|
for _, rr := range rrs {
|
|
if !matched && rr.String() == targetStr {
|
|
matched = true
|
|
continue
|
|
}
|
|
out = append(out, rr)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// addRRTo appends rr to rrs unless an identical RR already exists
|
|
// (de-dupe semantics per RFC 2136 §3.4.2.2).
|
|
func addRRTo(rrs []dns.RR, rr dns.RR) []dns.RR {
|
|
target := rr.String()
|
|
for _, existing := range rrs {
|
|
if existing.String() == target {
|
|
return rrs
|
|
}
|
|
}
|
|
return append(rrs, rr)
|
|
}
|
|
|
|
// serialCounterMul is the multiplier between the date prefix and the
|
|
// counter in our SOA-serial encoding. The format is YYMMDD*10000 + NNNN,
|
|
// giving 10000 bumps/day (NNNN ∈ [0001, 9999]). The 2-digit year keeps
|
|
// the maximum within uint32 (the RFC 1035 ceiling for SOA serials): for
|
|
// 2026-05-22, max serial 2,605,229,999 is well below 2^32-1=4,294,967,295.
|
|
// A 4-digit year (e.g., 20260522*10000) would overflow uint32.
|
|
const serialCounterMul = 10000
|
|
|
|
// bumpSerial advances the SOA's serial in CalVer YYMMDD*10000+NNNN form.
|
|
//
|
|
// Behaviour:
|
|
// - If cur encodes today (or a future-encoded date from a prior NNNN
|
|
// rollover), increment NNNN. On NNNN=9999, roll forward to the next
|
|
// encoded day with NNNN=0001. The encoded date drifts ahead of wall
|
|
// time during heavy churn and catches back up on quiet days; serial
|
|
// numbers stay strictly monotonic, which is the only DNS hard
|
|
// requirement.
|
|
// - Otherwise (older serial; including legacy YYYYMMDDNN-format serials
|
|
// left over from before this format change), jump to today*10000+1.
|
|
// Legacy serials migrate automatically here: a value like 2026052299
|
|
// (~2.026B) is numerically smaller than today's new-format minimum
|
|
// 2605220001 (~2.605B), so it falls to this branch and gets rewritten
|
|
// in-place. The new value is strictly greater, so AXFR receivers (HE
|
|
// et al.) treat it as a normal forward bump and pull cleanly.
|
|
//
|
|
// The SOA is found by type (there should be exactly one); mutated in
|
|
// place. The returned slice is the same slice with the SOA's serial
|
|
// updated.
|
|
func bumpSerial(rrs []dns.RR, now time.Time) error {
|
|
var soa *dns.SOA
|
|
for _, rr := range rrs {
|
|
if s, ok := rr.(*dns.SOA); ok {
|
|
soa = s
|
|
break
|
|
}
|
|
}
|
|
if soa == nil {
|
|
return fmt.Errorf("zone has no SOA record")
|
|
}
|
|
|
|
today := now.UTC().Format("060102") // YYMMDD
|
|
cur := fmt.Sprintf("%010d", soa.Serial)
|
|
|
|
// Try the new-format read: cur[:6] is YYMMDD, cur[6:10] is NNNN.
|
|
// We only honour this when the encoded date is today or later — an
|
|
// older encoded date means a normal new-day reset (or a legacy
|
|
// serial that happens to look like a valid YYMMDD prefix but is in
|
|
// the past, which is the same handling: jump to today).
|
|
if curDate := cur[:6]; isValidYYMMDD(curDate) && curDate >= today {
|
|
nnnn := atoi(cur[6:10])
|
|
if nnnn < 9999 {
|
|
soa.Serial = uint32(parseUint(curDate)*serialCounterMul + uint64(nnnn+1))
|
|
return nil
|
|
}
|
|
// NNNN=9999: roll to next encoded day, NNNN=0001.
|
|
d, err := time.Parse("060102", curDate)
|
|
if err != nil {
|
|
return fmt.Errorf("serial date %q unparseable: %w", curDate, err)
|
|
}
|
|
next := d.AddDate(0, 0, 1).Format("060102")
|
|
soa.Serial = uint32(parseUint(next)*serialCounterMul + 1)
|
|
return nil
|
|
}
|
|
|
|
// Older or unparseable: jump to today*10000+1. Migration path for
|
|
// legacy YYYYMMDDNN serials lives here.
|
|
candidate := uint32(parseUint(today)*serialCounterMul + 1)
|
|
if candidate <= soa.Serial {
|
|
// Defensive: don't regress. If something has somehow
|
|
// provisioned a serial >= today's new-format candidate (e.g.,
|
|
// far-future serial from a hand-edit), just +1 to advance.
|
|
soa.Serial++
|
|
return nil
|
|
}
|
|
soa.Serial = candidate
|
|
return nil
|
|
}
|
|
|
|
// isValidYYMMDD reports whether s is a 6-character YYMMDD string with a
|
|
// valid month and day. Year is any 2-digit value (00-99).
|
|
func isValidYYMMDD(s string) bool {
|
|
if len(s) != 6 {
|
|
return false
|
|
}
|
|
_, err := time.Parse("060102", s)
|
|
return err == nil
|
|
}
|
|
|
|
// atoi is a tiny helper that ignores errors — only called on a
|
|
// substring we already validated is two digits.
|
|
func atoi(s string) int {
|
|
n := 0
|
|
for _, c := range s {
|
|
n = n*10 + int(c-'0')
|
|
}
|
|
return n
|
|
}
|
|
|
|
// parseUint parses an all-digits string into a uint64. Used because
|
|
// strconv.ParseUint adds error-handling overhead we don't need on
|
|
// internally-controlled inputs.
|
|
func parseUint(s string) uint64 {
|
|
var n uint64
|
|
for _, c := range s {
|
|
n = n*10 + uint64(c-'0')
|
|
}
|
|
return n
|
|
}
|
|
|
|
// writeAtomic serializes rrs to a temp file in the same directory as
|
|
// z.Path, then renames over the destination. POSIX guarantees atomic
|
|
// rename on local filesystems, so a partial write can never leave a
|
|
// corrupt zone file on disk.
|
|
//
|
|
// Format: one RR per line, tab-separated owner/TTL/class/type/rdata.
|
|
// Comments and multi-line SOA formatting from the original file are
|
|
// NOT preserved (v1 limitation; sophisticated comment preservation can
|
|
// land in v2). A short header line is emitted with the write timestamp
|
|
// and the plugin name, so it's obvious in `git log` what touched the
|
|
// file.
|
|
func (z *zoneFile) writeAtomic(rrs []dns.RR, now time.Time) error {
|
|
dir := filepath.Dir(z.Path)
|
|
tmp, err := os.CreateTemp(dir, ".rfc2136-*.zone")
|
|
if err != nil {
|
|
return fmt.Errorf("create temp: %w", err)
|
|
}
|
|
tmpPath := tmp.Name()
|
|
// Best-effort cleanup if we fail before the rename.
|
|
defer func() {
|
|
if tmpPath != "" {
|
|
_ = os.Remove(tmpPath)
|
|
}
|
|
}()
|
|
|
|
header := fmt.Sprintf("; Auto-written by coredns-rfc2136 on %s\n; Zone: %s\n$ORIGIN %s\n",
|
|
now.UTC().Format(time.RFC3339), z.Origin, z.Origin)
|
|
if _, err := tmp.WriteString(header); err != nil {
|
|
_ = tmp.Close()
|
|
return fmt.Errorf("write header: %w", err)
|
|
}
|
|
|
|
for _, rr := range rrs {
|
|
if _, err := tmp.WriteString(rr.String() + "\n"); err != nil {
|
|
_ = tmp.Close()
|
|
return fmt.Errorf("write rr: %w", err)
|
|
}
|
|
}
|
|
|
|
if err := tmp.Sync(); err != nil {
|
|
_ = tmp.Close()
|
|
return fmt.Errorf("sync: %w", err)
|
|
}
|
|
if err := tmp.Close(); err != nil {
|
|
return fmt.Errorf("close: %w", err)
|
|
}
|
|
if err := os.Rename(tmpPath, z.Path); err != nil {
|
|
return fmt.Errorf("rename %s -> %s: %w", tmpPath, z.Path, err)
|
|
}
|
|
tmpPath = "" // suppress cleanup; rename consumed it
|
|
return nil
|
|
}
|
|
|
|
// commit stages and commits the zone file via git. Runs from the
|
|
// repository directory inferred from the zone file's parent. Returns
|
|
// nil silently if AutoCommit is false. Returns an error if the commit
|
|
// fails; the caller decides whether to roll back the file write.
|
|
//
|
|
// Both git invocations run under a context with a hard timeout
|
|
// (gitCommandTimeout). If git hangs (NFS stall, gpg-sign prompt,
|
|
// pre-commit hook waiting on stdin), we kill it rather than block the
|
|
// caller's per-zone mutex indefinitely. ACME storms must not be able
|
|
// to wedge the plugin via git getting stuck.
|
|
func (z *zoneFile) commit(message string) error {
|
|
if !z.AutoCommit {
|
|
return nil
|
|
}
|
|
// We run git from the directory containing the zone file. git will
|
|
// walk upward to find the .git dir.
|
|
dir := filepath.Dir(z.Path)
|
|
ctx, cancel := context.WithTimeout(context.Background(), gitCommandTimeout)
|
|
defer cancel()
|
|
// `git add` first; if file is already in the index, no harm done.
|
|
add := exec.CommandContext(ctx, "git",
|
|
"-C", dir,
|
|
"add", "--", z.Path,
|
|
)
|
|
if out, err := add.CombinedOutput(); err != nil {
|
|
return fmt.Errorf("git add failed: %w: %s", err, strings.TrimSpace(string(out)))
|
|
}
|
|
commit := exec.CommandContext(ctx, "git",
|
|
"-C", dir,
|
|
"-c", "user.name="+z.GitAuthorName,
|
|
"-c", "user.email="+z.GitAuthorEmail,
|
|
"commit", "-q", "-m", message, "--", z.Path,
|
|
)
|
|
if out, err := commit.CombinedOutput(); err != nil {
|
|
return fmt.Errorf("git commit failed: %w: %s", err, strings.TrimSpace(string(out)))
|
|
}
|
|
return nil
|
|
}
|