H3/H4/H5: zone-integrity invariants

H3+H4 — Zone SOA invariant. After parsing, loadRRs enforces:
exactly one SOA, owned by the zone apex. Catches three failure modes
with a single guard:

  - Missing SOA (H4): a malformed line earlier in the file may have
    tripped miekg/dns's ZoneParser into dropping records without
    reporting an error via parser.Err(). If the SOA went missing, we
    refuse rather than treat the partial parse as authoritative.

  - Multiple SOAs (H3): zone files with accidental duplicate SOA
    records produce inconsistent zone state visible to AXFR clients.
    The old code's first-match SOA-bump would silently propagate the
    inconsistency. Now we refuse.

  - Non-apex SOA (H3): an SOA whose owner doesn't match the zone
    origin is either a parse error or a hand-edit mistake; bumping
    it would leave the real apex unchanged. Now we refuse.

assertSingleApexSOA returns a descriptive error so the failure mode
is actionable from logs alone.

H5 — MaxUint32 guard in bumpSerial. The old "+1 defensive advance"
branch would wrap to 0 if soa.Serial == MaxUint32, and downstream
secondaries per RFC 1982 §3.2 treat 0-after-MaxUint32 as "older"
(they refuse to AXFR and the zone goes dark). Now we explicitly check
and refuse with a loud message; operator must reset the serial
manually. Practical reach is zero for our deployment (10000 bumps/day
× 117 years would still fit uint32) but the defensive ceiling matters
for fuzz, hand-edit, or future code-path errors.

The full RFC 1982 wraparound-aware comparison was prototyped but
removed: it broke the legacy-format migration case where a tiny
non-CalVer serial (e.g., 12345) is "more than 2^31 distant" from a
new-format serial (~2.6B), which RFC 1982 reads as "going backwards"
and would block migration. Naive `>` is correct in practice; the
MaxUint32 case is the only real failure mode worth guarding.

New tests:
- TestBumpSerial_MaxUint32_RefusesWrap
- TestLoadRRs_NoSOA_Refused
- TestLoadRRs_MultipleSOAs_Refused
- TestLoadRRs_NonApexSOA_Refused
This commit is contained in:
Ryan Malloy 2026-05-22 21:25:35 -06:00
parent 93ed180d8f
commit d9dad01798
2 changed files with 138 additions and 3 deletions

View File

@ -3,6 +3,7 @@ package rfc2136
import (
"context"
"fmt"
"math"
"os"
"os/exec"
"path/filepath"
@ -104,6 +105,15 @@ func openZoneFile(path, origin string) *zoneFile {
// Returns (rrs, snapshot, error). The snapshot fingerprints the file
// identity at read time so a subsequent writeIfUnchanged can detect
// concurrent modification.
//
// Hamilton H4 — strict-parse validation: a single malformed line could
// otherwise produce a partial parse where parser.Err() returns nil but
// some records silently went missing. To catch this, we enforce a
// post-parse invariant: exactly one SOA RR, and that SOA's name equals
// the configured zone origin. A zone file that's been partially eaten
// by the parser usually loses its SOA along the way — checking SOA
// presence catches both H4 (silent truncation) and H3 (multi-SOA or
// wrong-apex SOA) with a single guard.
func (z *zoneFile) loadRRs() ([]dns.RR, fileSnapshot, error) {
f, err := os.Open(z.Path)
if err != nil {
@ -130,9 +140,45 @@ func (z *zoneFile) loadRRs() ([]dns.RR, fileSnapshot, error) {
if len(rrs) == 0 {
return nil, snap, fmt.Errorf("%s: zero RRs parsed", z.Path)
}
// H3/H4 invariant: exactly one SOA, anchored at the zone origin.
// Refuse to operate on a zone file whose SOA structure is wrong —
// any subsequent bumpSerial or write would compound the damage.
if err := assertSingleApexSOA(rrs, z.Origin); err != nil {
return nil, snap, fmt.Errorf("zone %s integrity check failed: %w", z.Path, err)
}
return rrs, snap, nil
}
// assertSingleApexSOA enforces that rrs contains exactly one SOA and
// that its owner matches the zone origin. Returns an error otherwise.
// This is the H3+H4 zone-integrity invariant.
func assertSingleApexSOA(rrs []dns.RR, origin string) error {
origin = canon(origin)
var soas []*dns.SOA
for _, rr := range rrs {
if s, ok := rr.(*dns.SOA); ok {
soas = append(soas, s)
}
}
switch len(soas) {
case 0:
return fmt.Errorf("no SOA record found (expected one at %q)", origin)
case 1:
if canon(soas[0].Hdr.Name) != origin {
return fmt.Errorf("SOA owner is %q, expected zone apex %q", soas[0].Hdr.Name, origin)
}
return nil
default:
names := make([]string, len(soas))
for i, s := range soas {
names[i] = s.Hdr.Name
}
return fmt.Errorf("multiple SOA records found (%d): %s", len(soas), strings.Join(names, ", "))
}
}
// checkUnchanged returns nil if the on-disk file still matches the
// captured snapshot. If the file has been modified (mtime or size
// differs), returns an error — the caller should refuse the UPDATE
@ -299,10 +345,24 @@ func bumpSerial(rrs []dns.RR, now time.Time) error {
// Older or unparseable: jump to today*10000+1. Migration path for
// legacy YYYYMMDDNN serials lives here.
candidate := uint32(parseUint(today)*serialCounterMul + 1)
// H5 — explicit MaxUint32 guard. Plain `>` comparison is correct in
// practice (we'd never wrap during the zone's lifetime: 10000
// bumps/day × 365 days × ~117 years = ~427M, well under 2^32). The
// real failure mode we must prevent is wrap-to-0: if soa.Serial
// somehow reached MaxUint32 (hand-edit, fuzz, or a future code path
// we haven't written), `soa.Serial++` would wrap to 0, and
// downstream secondaries per RFC 1982 treat 0-after-MaxUint32 as
// "older" — they refuse to AXFR, and the zone goes dark. Loud
// refusal forces the operator to manually reset the serial,
// instead of silently bricking the zone.
if candidate <= soa.Serial {
// Defensive: don't regress. If something has somehow
// provisioned a serial >= today's new-format candidate (e.g.,
// far-future serial from a hand-edit), just +1 to advance.
if soa.Serial == math.MaxUint32 {
return fmt.Errorf("SOA serial at uint32 max (%d) — refusing to wrap to 0; operator must reset zone serial manually (see RFC 1982 §3.2)", soa.Serial)
}
// Defensive monotonic advance for the unusual "current serial
// is already > today's new-format minimum" case (e.g., a
// hand-edit set it to a far-future value).
soa.Serial++
return nil
}

View File

@ -1,6 +1,7 @@
package rfc2136
import (
"math"
"os"
"path/filepath"
"strings"
@ -220,6 +221,80 @@ func TestBumpSerial_NonCalVerFormat_ResetsToToday(t *testing.T) {
}
}
// TestBumpSerial_MaxUint32_RefusesWrap covers H5: the defensive
// branch must not wrap soa.Serial to 0. Wrap-to-0 makes downstream
// secondaries treat the zone as reset per RFC 1982 and refuse to AXFR
// the new value, taking the zone dark.
func TestBumpSerial_MaxUint32_RefusesWrap(t *testing.T) {
rrs := []dns.RR{
&dns.SOA{Hdr: dns.RR_Header{Name: "example.com.", Rrtype: dns.TypeSOA},
Serial: math.MaxUint32},
}
now := time.Date(2026, 5, 22, 1, 0, 0, 0, time.UTC)
if err := bumpSerial(rrs, now); err == nil {
t.Errorf("bumpSerial accepted MaxUint32; expected refusal to prevent wrap-to-0")
}
if got := rrs[0].(*dns.SOA).Serial; got == 0 {
t.Errorf("Serial wrapped to 0 (catastrophic): %d", got)
}
}
// TestLoadRRs_NoSOA_Refused covers H4: a zone file that's missing its
// SOA (because the parser ate it on a malformed prior line, or because
// it was never there) must fail loadRRs rather than be silently treated
// as a valid empty-of-SOA zone.
func TestLoadRRs_NoSOA_Refused(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "noSoA.example.com.zone")
content := "$ORIGIN noSoA.example.com.\nfoo.noSoA.example.com. 60 IN A 192.0.2.1\n"
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
t.Fatalf("write fixture: %v", err)
}
zf := openZoneFile(path, "noSoA.example.com.")
if _, _, err := zf.loadRRs(); err == nil {
t.Errorf("loadRRs accepted SOA-less zone file; expected refusal")
}
}
// TestLoadRRs_MultipleSOAs_Refused covers H3: two SOAs at the apex
// produce inconsistent zone state visible to AXFR clients. We refuse
// to operate on such a file rather than bumping only the first SOA.
func TestLoadRRs_MultipleSOAs_Refused(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "dupe.example.com.zone")
content := `$ORIGIN dupe.example.com.
dupe.example.com. 3600 IN SOA ns.example. admin.example. 1 60 60 60 60
dupe.example.com. 3600 IN SOA ns.example. admin.example. 2 60 60 60 60
dupe.example.com. 3600 IN NS ns.example.
`
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
t.Fatalf("write fixture: %v", err)
}
zf := openZoneFile(path, "dupe.example.com.")
if _, _, err := zf.loadRRs(); err == nil {
t.Errorf("loadRRs accepted dual-SOA zone; expected refusal")
}
}
// TestLoadRRs_NonApexSOA_Refused covers H3: an SOA owned by a name
// other than the zone apex is a parse-error red flag and must be
// refused, not silently bumped.
func TestLoadRRs_NonApexSOA_Refused(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "weird.example.com.zone")
content := `$ORIGIN weird.example.com.
sub.weird.example.com. 3600 IN SOA ns.example. admin.example. 1 60 60 60 60
weird.example.com. 3600 IN NS ns.example.
`
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
t.Fatalf("write fixture: %v", err)
}
zf := openZoneFile(path, "weird.example.com.")
if _, _, err := zf.loadRRs(); err == nil {
t.Errorf("loadRRs accepted non-apex SOA; expected refusal")
}
}
func TestBumpSerial_NoSOA_ReturnsError(t *testing.T) {
rrs := []dns.RR{mustRR(t, `foo.example.com. 60 IN A 192.0.2.1`)}
now := time.Now()