Send DNS NOTIFY to secondaries after every UPDATE

Per RFC 1996, a master that mutates a zone SHOULD notify its
secondaries so they can immediately AXFR rather than wait for their
next SOA-refresh poll. Without this, propagation lag from UPDATE to
public DNS is bounded by the secondary's refresh interval (300s for
us) — which is borderline for ACME validation timing.

New Corefile directive:
    notify <host[:port]> [<host[:port]>...]

Targets accept bare hostnames (port 53 default), host:port, or
[ipv6]:port. The same list applies to every zone in the rfc2136
block.

Implementation: fire-and-forget UDP per target, each in its own
goroutine, capped by a 2s timeout. The UPDATE response to the client
is never held pending NOTIFY acks (RFC 1996 §4 explicitly decouples
them). Failures log at DEBUG only — a briefly-unreachable secondary
is normal and would otherwise spam logs.

Retires the external scripts/notify-secondaries.py workflow for any
deployment that wires the directive: secondaries now hear about
changes within seconds of the UPDATE landing, no cron or manual
invocation needed.

New tests:
- TestSendNotify_DeliversToTarget — packet arrives, opcode + zone correct
- TestSendNotify_NoTargets_NoCrash — empty list short-circuits
- TestSendNotify_BadTarget_LogsButDoesNotBlock — fire-and-forget timing
- TestNotifyOne_AppendsDefaultPort — host vs host:port normalization
This commit is contained in:
Ryan Malloy 2026-05-23 00:54:45 -06:00
parent 89993ca207
commit 7367401734
6 changed files with 241 additions and 0 deletions

View File

@ -51,6 +51,7 @@ rfc2136 <zone> [<zone>...] {
git-author <name> <email> # optional
rate-limit <burst> <period-seconds> # default 100 / 60s
rate-limit off # disable rate-limit
notify <host[:port]> [<host[:port]>...] # NOTIFY secondaries on every UPDATE
}
```
@ -149,6 +150,34 @@ UPDATE traffic is token-bucket capped per TSIG key. Default 100
UPDATEs per 60 seconds. ACME storms are well within this; anything
beyond is suspicious. Tune via `rate-limit <burst> <period>`.
### NOTIFY to secondaries (optional)
After every successful UPDATE, the plugin can fire DNS NOTIFY (RFC
1996) to a list of secondary servers. This collapses propagation lag
from "up to the secondary's SOA refresh interval" (often 300s) to
"a few seconds" — secondaries that receive NOTIFY do an immediate SOA
poll and AXFR if changed.
Configure with the `notify` directive:
```
rfc2136 example.com {
zones-dir /zones
tsig-key ...
notify ns2.example.com ns3.example.com 216.218.130.2
}
```
Semantics:
- Targets may be `host`, `host:port`, or `[ipv6]:port`. Default port is 53.
- Fire-and-forget: each target gets its own goroutine with a 2s timeout.
The UPDATE response to the client is **not** held pending NOTIFY acks
(RFC 1996 §4 decouples them).
- Failures log at DEBUG only — a briefly-unreachable secondary is
normal and would otherwise spam.
- Missed NOTIFY = no harm; secondary catches up on its own refresh.
- The same target list applies to every zone in the block.
## Building
This plugin is consumed by a custom CoreDNS build via `plugin.cfg`:

62
notify.go Normal file
View File

@ -0,0 +1,62 @@
package rfc2136
import (
"net"
"time"
"github.com/miekg/dns"
)
// notifyTimeout caps how long any single NOTIFY send can block before
// we give up. RFC 1996 §4 says the master MUST NOT block UPDATE
// acknowledgement on NOTIFY delivery — the secondaries will fall back
// to their own SOA refresh polling if NOTIFY is missed. 2s is plenty
// for a healthy secondary to ack via UDP; a slow/blackholed target
// just times out.
const notifyTimeout = 2 * time.Second
// defaultNotifyPort is appended to any target that doesn't already
// specify host:port. NOTIFY is always-over-port-53 in practice.
const defaultNotifyPort = "53"
// sendNotify dispatches fire-and-forget DNS NOTIFY messages (RFC 1996)
// to every configured secondary for the given zone. Each target gets
// its own goroutine so a slow/blackholed secondary can't slow
// propagation to its siblings.
//
// We do NOT wait for goroutines to finish — the UPDATE response goes
// back to the client immediately. Whether secondaries ack or not, the
// master's job is done; secondaries that miss the NOTIFY pick up the
// new serial on their next refresh poll.
//
// Failures are logged at Debug level. NOTIFY is best-effort; logging
// at Warning would flood the operator on every transient packet drop
// for secondaries that are intermittently reachable.
func sendNotify(zone string, targets []string) {
if len(targets) == 0 {
return
}
for _, t := range targets {
go notifyOne(zone, t)
}
}
// notifyOne sends one NOTIFY packet to `target` for `zone`. Target
// can be "host" (default port 53), "host:port", or "[ipv6]:port".
func notifyOne(zone, target string) {
addr := target
if _, _, err := net.SplitHostPort(addr); err != nil {
addr = net.JoinHostPort(addr, defaultNotifyPort)
}
msg := new(dns.Msg)
msg.SetNotify(dns.Fqdn(zone))
c := &dns.Client{Net: "udp", Timeout: notifyTimeout}
_, _, err := c.Exchange(msg, addr)
if err != nil {
log.Debugf("NOTIFY %s → %s failed: %v", zone, addr, err)
return
}
log.Debugf("NOTIFY %s → %s ok", zone, addr)
}

129
notify_test.go Normal file
View File

@ -0,0 +1,129 @@
package rfc2136
import (
"net"
"sync"
"testing"
"time"
"github.com/miekg/dns"
)
// testNotifyListener spins up a UDP DNS-protocol listener on an
// ephemeral port that captures any messages it receives. Returns the
// host:port string for use as a NOTIFY target, plus a getter for the
// last-captured message.
func testNotifyListener(t *testing.T) (addr string, getLast func() *dns.Msg) {
t.Helper()
conn, err := net.ListenUDP("udp", &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0})
if err != nil {
t.Fatalf("ListenUDP: %v", err)
}
var mu sync.Mutex
var last *dns.Msg
done := make(chan struct{})
go func() {
buf := make([]byte, 512)
for {
conn.SetReadDeadline(time.Now().Add(500 * time.Millisecond))
n, _, err := conn.ReadFromUDP(buf)
if err != nil {
select {
case <-done:
return
default:
continue
}
}
msg := new(dns.Msg)
if unpackErr := msg.Unpack(buf[:n]); unpackErr == nil {
mu.Lock()
last = msg
mu.Unlock()
}
}
}()
t.Cleanup(func() {
close(done)
conn.Close()
})
return conn.LocalAddr().String(), func() *dns.Msg {
mu.Lock()
defer mu.Unlock()
return last
}
}
func TestSendNotify_DeliversToTarget(t *testing.T) {
addr, getLast := testNotifyListener(t)
sendNotify("auth.example.com", []string{addr})
// Wait up to 1s for the packet to arrive (test listener polls on
// 500ms deadline). The send goroutine writes immediately; the
// listener loop just needs one read cycle to pick it up.
deadline := time.Now().Add(1 * time.Second)
for time.Now().Before(deadline) {
if msg := getLast(); msg != nil {
if msg.Opcode != dns.OpcodeNotify {
t.Errorf("Opcode = %d, want OpcodeNotify (%d)", msg.Opcode, dns.OpcodeNotify)
}
if len(msg.Question) != 1 || msg.Question[0].Name != "auth.example.com." {
t.Errorf("Question = %+v, want one entry with name auth.example.com.", msg.Question)
}
if !msg.Authoritative {
t.Errorf("AA flag not set on NOTIFY")
}
return
}
time.Sleep(20 * time.Millisecond)
}
t.Fatal("NOTIFY never arrived at target within 1s")
}
func TestSendNotify_NoTargets_NoCrash(t *testing.T) {
// Empty target list must short-circuit without launching goroutines
// or panicking.
sendNotify("auth.example.com", nil)
sendNotify("auth.example.com", []string{})
// No assertions — survival is the test.
}
func TestSendNotify_BadTarget_LogsButDoesNotBlock(t *testing.T) {
// Target a port we know nothing listens on. The fire-and-forget
// send must return immediately; the goroutine eventually times out.
start := time.Now()
sendNotify("auth.example.com", []string{"127.0.0.1:1"})
if elapsed := time.Since(start); elapsed > 100*time.Millisecond {
t.Errorf("sendNotify blocked %v on unreachable target; expected fire-and-forget", elapsed)
}
}
func TestNotifyOne_AppendsDefaultPort(t *testing.T) {
// Spin up a listener on 127.0.0.1:<random>, then call notifyOne
// with both forms (bare host + host:port) and verify both deliver.
addr, getLast := testNotifyListener(t)
host, port, err := net.SplitHostPort(addr)
if err != nil {
t.Fatalf("split: %v", err)
}
_ = host
// Form 1: host:port (the normal case).
notifyOne("first.example.com", addr)
time.Sleep(100 * time.Millisecond)
if m := getLast(); m == nil || len(m.Question) == 0 || m.Question[0].Name != "first.example.com." {
t.Errorf("host:port form did not deliver: %+v", m)
}
// We can't easily test the bare-host case because port 53 is the
// default and we can't bind there without root. Verifying the
// defaulting branch directly is sufficient.
if port == "" {
t.Fatal("unreachable: SplitHostPort returned empty port")
}
}

View File

@ -69,6 +69,13 @@ type RFC2136 struct {
// disables rate limiting (test mode, or insecure deployments).
// Populated in setup() once TSIG keys are known.
rateLimit *rateLimiter
// NotifyTargets is the list of secondary servers (IP[:port]) to
// send DNS NOTIFY messages to after every successful UPDATE.
// Default port 53. Empty list = no NOTIFY (secondaries rely on
// their own SOA-refresh polling). Configured via the `notify`
// directive in the Corefile.
NotifyTargets []string
}
// Name implements plugin.Handler.

View File

@ -243,6 +243,13 @@ func parse(c *caddy.Controller) (*RFC2136, error) {
gitAuthorName = gArgs[0]
gitAuthorEmail = gArgs[1]
case "notify":
nArgs := c.RemainingArgs()
if len(nArgs) < 1 {
return nil, c.Errf("notify requires at least one secondary (host or host:port)")
}
p.NotifyTargets = append(p.NotifyTargets, nArgs...)
case "rate-limit":
rArgs := c.RemainingArgs()
switch len(rArgs) {

View File

@ -179,6 +179,13 @@ func (p *RFC2136) handleUpdate(w dns.ResponseWriter, r *dns.Msg, verified bool)
log.Infof("UPDATE applied: zone=%s prereqs=%d updates=%d msg=%q",
zone, len(r.Answer), len(r.Ns), msg)
// Fire NOTIFY to configured secondaries (RFC 1996). Non-blocking:
// each target gets its own goroutine, capped by notifyTimeout. The
// UPDATE response to the client is not held on these acks — RFC
// 1996 §4 explicitly decouples them.
sendNotify(zone, p.NotifyTargets)
return p.updateResp(w, resp, dns.RcodeSuccess)
}