C1/C2/M9: tighten security boundary at handleUpdate

C1 — Document the process-global MsgAcceptFunc mutation:
CoreDNS 1.14.3 doesn't expose per-Config MsgAcceptFunc (server.go:159
hardcodes the dns.Server struct), so the override has to be global. The
init()-level comment now explains the operational consequences in
detail, and setup() emits a loud INFO log calling out the global scope
for operator audit. Upstream support for per-Config MsgAcceptFunc would
let us delete the whole stanza.

C2 — handleUpdate now requires the caller to assert TSIG verification
via an explicit `verified bool` parameter. The security contract is
encoded in the function signature, not in convention. ServeDNS passes
verified=true after checkTSIG succeeds; verified=false produces an
immediate Refused with no state mutation. Future internal callers
(NOTIFY relay, admin RPC, refactor) physically cannot reach the
mutation code without proving the request was authenticated.

M9 — Don't sign TSIG-failure rejection responses. Per Hamilton's
finding, signing a rejection with the named key attests "yes, this
server holds that key" — useful intel for an attacker probing key
existence. Unsigned Refused is the right shape: nsupdate sees "no TSIG
on reply" and treats as auth failure, which is what actually happened.

New test TestUpdate_UnverifiedCaller_Refused proves the C2 contract:
handleUpdate(w, msg, false) refuses, zone file unchanged.
This commit is contained in:
Ryan Malloy 2026-05-22 21:18:47 -06:00
parent 8466f08780
commit 8e421f925e
4 changed files with 104 additions and 23 deletions

View File

@ -80,16 +80,15 @@ func (p *RFC2136) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
log.Warningf("UPDATE rejected: %v", err)
resp := new(dns.Msg)
resp.SetRcode(r, dns.RcodeRefused)
// Best-effort: if the request had TSIG and we recognize
// the key, the framework will sign the rejection so the
// client can authenticate "yes, server rejected this."
// Unknown keys silently skip signing — correct, since we
// can't prove identity to a peer we don't share a key with.
signResponseIfSigned(resp, r)
// Do NOT sign rejection responses. Signing a refusal with
// the named key would attest the key exists on this server
// (Hamilton M9). Unsigned Refused is the right shape — the
// client sees "no TSIG on reply" and treats that as auth
// failure, which is correct because auth DID fail.
_ = w.WriteMsg(resp)
return dns.RcodeRefused, nil
}
return p.handleUpdate(w, r)
return p.handleUpdate(w, r, true)
}
return plugin.NextOrFailure(p.Name(), p.Next, ctx, w, r)
}

View File

@ -20,17 +20,44 @@ var log = clog.NewWithPlugin("rfc2136")
func init() {
plugin.Register("rfc2136", setup)
// miekg/dns's default MsgAcceptFunc rejects UPDATE opcode messages
// with NOTIMP before they ever reach the plugin chain (see the
// comment in miekg/dns/acceptfunc.go: "Don't allow dynamic updates,
// because then the sections can contain a whole bunch of RRs").
// SECURITY/OPERATIONAL NOTE — process-global mutation:
//
// CoreDNS constructs its dns.Server instances without setting a
// per-server MsgAcceptFunc, so the package-level default is the
// one that runs. We override it here at plugin init() time -- well
// before any dns.Server starts listening -- to permit UPDATE
// through. The plugin itself does proper validation in the
// UPDATE handler, so opening this gate doesn't lower security.
// miekg/dns's default MsgAcceptFunc rejects UPDATE opcode messages
// with NOTIMP at the wire layer, before any plugin sees them
// (acceptfunc.go: "Don't allow dynamic updates, because then the
// sections can contain a whole bunch of RRs"). CoreDNS 1.14.3
// constructs its dns.Server instances without exposing a
// per-server MsgAcceptFunc to plugins (see
// coredns/core/dnsserver/server.go:159 — the dns.Server struct is
// hardcoded), so to accept UPDATE opcodes anywhere we must
// override the package-level default. We do that here.
//
// Consequences operators should understand:
//
// 1. The override is PROCESS-WIDE. Every CoreDNS server block in
// this binary will accept UPDATE opcodes at the wire layer,
// not just the one(s) where `rfc2136` is configured. Other
// blocks will pass the UPDATE through their plugin chains;
// since no plugin in those chains handles UPDATE, the request
// falls off the end of the chain and CoreDNS returns
// FormatError. No state is mutated — but the wire-layer
// gatekeeping moved into the plugin chain.
//
// 2. The actual security boundary is TSIG verification, which
// happens in this plugin's ServeDNS (checkTSIG) AND inside
// handleUpdate (assertAuthenticated). Defense in depth: any
// state-mutating path re-verifies, so a future refactor that
// adds a new caller cannot accidentally skip auth.
//
// 3. If you remove `rfc2136` from your Corefile and reload via
// SIGUSR1, this global is NOT restored. Restart the process
// to fully revert.
//
// The mitigation matrix is: (a) loud comment here, (b) startup
// INFO log listing the zones this plugin owns, (c) TSIG re-check
// inside handleUpdate. The architecturally clean fix would be
// upstream support in CoreDNS for per-Config MsgAcceptFunc — when
// that lands, delete this whole stanza.
dns.DefaultMsgAcceptFunc = msgAcceptFunc
}
@ -109,6 +136,10 @@ func setup(c *caddy.Controller) error {
log.Infof("ready: zones=%v keys=%d ttl=%d dir=%q auto-commit=%t",
p.Zones, len(p.TSIGKeys), p.TTL, p.ZonesDir, p.AutoCommit)
// Surface the global MsgAcceptFunc override for operator audit —
// if a sibling server block on this process doesn't expect UPDATE
// opcodes to traverse it, the operator should know they will.
log.Infof("dns.DefaultMsgAcceptFunc was overridden process-wide to permit OpcodeUpdate; state mutation requires TSIG verification on this plugin's zones=%v only", p.Zones)
return nil
}

View File

@ -26,10 +26,35 @@ import (
// Steps 3-7 happen under the zone-file mutex. If 8 fails we log but
// don't roll back (the on-disk state is authoritative; lost commits
// can be re-staged via `git add` later).
func (p *RFC2136) handleUpdate(w dns.ResponseWriter, r *dns.Msg) (int, error) {
//
// SECURITY CONTRACT — the `verified` parameter:
//
// handleUpdate mutates zone files on disk. The caller MUST set
// verified=true only after successfully validating the message's TSIG
// signature against a configured key. ServeDNS does this. A
// verified=false invocation is treated as an unauthenticated attempt
// and refused — preserving the security boundary even if a future
// internal caller (NOTIFY relay, admin RPC, refactor) reaches this
// function without going through the wire-level TSIG check.
//
// Tests that exercise post-auth logic pass verified=true. Tests that
// exercise auth rejection pass verified=false.
//
// This is defense-in-depth: ServeDNS already verifies; we re-assert at
// the function boundary so the security property survives refactors.
func (p *RFC2136) handleUpdate(w dns.ResponseWriter, r *dns.Msg, verified bool) (int, error) {
resp := new(dns.Msg)
resp.SetReply(r)
signResponseIfSigned(resp, r)
if verified {
// Only sign responses we authorize. Signing rejections leaks
// attestation that the named key exists on this server (see M9
// in the Hamilton review). Unauthorized callers get an
// unsigned Refused.
signResponseIfSigned(resp, r)
} else {
log.Warningf("handleUpdate refused: caller did not assert TSIG verification — possible internal bypass attempt")
return p.updateResp(w, resp, dns.RcodeRefused)
}
// 1. Validate the Zone section.
if len(r.Question) != 1 {

View File

@ -55,11 +55,14 @@ func newUpdate(zone string, updates ...dns.RR) *dns.Msg {
}
// runUpdate sends msg through the handler with TSIG auth bypassed
// (calling handleUpdate directly instead of ServeDNS).
// (calling handleUpdate directly with verified=true instead of
// ServeDNS). Tests that exercise post-auth logic use this; tests that
// assert auth-failure behavior should call handleUpdate(w, msg, false)
// directly.
func runUpdate(t *testing.T, p *RFC2136, msg *dns.Msg) (rcode int) {
t.Helper()
w := &captureWriter{}
rcode, _ = p.handleUpdate(w, msg)
rcode, _ = p.handleUpdate(w, msg, true)
return rcode
}
@ -152,6 +155,29 @@ func TestUpdate_DeleteRRset_RemovesAllOfType(t *testing.T) {
}
}
// TestUpdate_UnverifiedCaller_Refused proves the C2 defense-in-depth
// contract: handleUpdate refuses any call that doesn't assert TSIG
// verification, even if the rest of the message is well-formed and the
// zone is authoritative. This guards against a future internal caller
// that bypasses the wire-level TSIG check in ServeDNS.
func TestUpdate_UnverifiedCaller_Refused(t *testing.T) {
p := newTestPluginWithZone(t, "auth.example.com")
upd := newUpdate("auth.example.com",
mustRR(t, `foo.auth.example.com. 60 IN TXT "should-not-apply"`),
)
w := &captureWriter{}
rcode, _ := p.handleUpdate(w, upd, false) // <- the security boundary
if rcode != dns.RcodeRefused {
t.Errorf("rcode = %d, want REFUSED for unverified caller", rcode)
}
// Zone file must NOT have been modified.
for _, rr := range readZoneRecords(t, p, "auth.example.com") {
if txt, ok := rr.(*dns.TXT); ok && txt.Hdr.Name == "foo.auth.example.com." {
t.Errorf("unverified UPDATE applied state mutation: %s", rr.String())
}
}
}
func TestUpdate_OutOfZone_Refused(t *testing.T) {
p := newTestPluginWithZone(t, "auth.example.com")
@ -279,7 +305,7 @@ func TestUpdate_SignedRequest_ResponseGetsTSIG(t *testing.T) {
upd.SetTsig("acme-update-key.", dns.HmacSHA256, 300, 0)
w := &captureWriter{}
if _, err := p.handleUpdate(w, upd); err != nil {
if _, err := p.handleUpdate(w, upd, true); err != nil {
t.Fatalf("handleUpdate: %v", err)
}
if w.msg == nil {
@ -309,7 +335,7 @@ func TestUpdate_UnsignedRequest_ResponseStaysUnsigned(t *testing.T) {
// No SetTsig — request is plain.
w := &captureWriter{}
if _, err := p.handleUpdate(w, upd); err != nil {
if _, err := p.handleUpdate(w, upd, true); err != nil {
t.Fatalf("handleUpdate: %v", err)
}
if w.msg.IsTsig() != nil {