From 8e421f925e27bd20269fd72aacc0573e7ad197a1 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Fri, 22 May 2026 21:18:47 -0600 Subject: [PATCH] C1/C2/M9: tighten security boundary at handleUpdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C1 — Document the process-global MsgAcceptFunc mutation: CoreDNS 1.14.3 doesn't expose per-Config MsgAcceptFunc (server.go:159 hardcodes the dns.Server struct), so the override has to be global. The init()-level comment now explains the operational consequences in detail, and setup() emits a loud INFO log calling out the global scope for operator audit. Upstream support for per-Config MsgAcceptFunc would let us delete the whole stanza. C2 — handleUpdate now requires the caller to assert TSIG verification via an explicit `verified bool` parameter. The security contract is encoded in the function signature, not in convention. ServeDNS passes verified=true after checkTSIG succeeds; verified=false produces an immediate Refused with no state mutation. Future internal callers (NOTIFY relay, admin RPC, refactor) physically cannot reach the mutation code without proving the request was authenticated. M9 — Don't sign TSIG-failure rejection responses. Per Hamilton's finding, signing a rejection with the named key attests "yes, this server holds that key" — useful intel for an attacker probing key existence. Unsigned Refused is the right shape: nsupdate sees "no TSIG on reply" and treats as auth failure, which is what actually happened. New test TestUpdate_UnverifiedCaller_Refused proves the C2 contract: handleUpdate(w, msg, false) refuses, zone file unchanged. --- plugin.go | 13 ++++++------- setup.go | 51 ++++++++++++++++++++++++++++++++++++++++---------- update.go | 29 ++++++++++++++++++++++++++-- update_test.go | 34 +++++++++++++++++++++++++++++---- 4 files changed, 104 insertions(+), 23 deletions(-) diff --git a/plugin.go b/plugin.go index 0794bfc..2c639ab 100644 --- a/plugin.go +++ b/plugin.go @@ -80,16 +80,15 @@ func (p *RFC2136) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg log.Warningf("UPDATE rejected: %v", err) resp := new(dns.Msg) resp.SetRcode(r, dns.RcodeRefused) - // Best-effort: if the request had TSIG and we recognize - // the key, the framework will sign the rejection so the - // client can authenticate "yes, server rejected this." - // Unknown keys silently skip signing — correct, since we - // can't prove identity to a peer we don't share a key with. - signResponseIfSigned(resp, r) + // Do NOT sign rejection responses. Signing a refusal with + // the named key would attest the key exists on this server + // (Hamilton M9). Unsigned Refused is the right shape — the + // client sees "no TSIG on reply" and treats that as auth + // failure, which is correct because auth DID fail. _ = w.WriteMsg(resp) return dns.RcodeRefused, nil } - return p.handleUpdate(w, r) + return p.handleUpdate(w, r, true) } return plugin.NextOrFailure(p.Name(), p.Next, ctx, w, r) } diff --git a/setup.go b/setup.go index 51cf9a8..4ec968f 100644 --- a/setup.go +++ b/setup.go @@ -20,17 +20,44 @@ var log = clog.NewWithPlugin("rfc2136") func init() { plugin.Register("rfc2136", setup) - // miekg/dns's default MsgAcceptFunc rejects UPDATE opcode messages - // with NOTIMP before they ever reach the plugin chain (see the - // comment in miekg/dns/acceptfunc.go: "Don't allow dynamic updates, - // because then the sections can contain a whole bunch of RRs"). + // SECURITY/OPERATIONAL NOTE — process-global mutation: // - // CoreDNS constructs its dns.Server instances without setting a - // per-server MsgAcceptFunc, so the package-level default is the - // one that runs. We override it here at plugin init() time -- well - // before any dns.Server starts listening -- to permit UPDATE - // through. The plugin itself does proper validation in the - // UPDATE handler, so opening this gate doesn't lower security. + // miekg/dns's default MsgAcceptFunc rejects UPDATE opcode messages + // with NOTIMP at the wire layer, before any plugin sees them + // (acceptfunc.go: "Don't allow dynamic updates, because then the + // sections can contain a whole bunch of RRs"). CoreDNS 1.14.3 + // constructs its dns.Server instances without exposing a + // per-server MsgAcceptFunc to plugins (see + // coredns/core/dnsserver/server.go:159 — the dns.Server struct is + // hardcoded), so to accept UPDATE opcodes anywhere we must + // override the package-level default. We do that here. + // + // Consequences operators should understand: + // + // 1. The override is PROCESS-WIDE. Every CoreDNS server block in + // this binary will accept UPDATE opcodes at the wire layer, + // not just the one(s) where `rfc2136` is configured. Other + // blocks will pass the UPDATE through their plugin chains; + // since no plugin in those chains handles UPDATE, the request + // falls off the end of the chain and CoreDNS returns + // FormatError. No state is mutated — but the wire-layer + // gatekeeping moved into the plugin chain. + // + // 2. The actual security boundary is TSIG verification, which + // happens in this plugin's ServeDNS (checkTSIG) AND inside + // handleUpdate (assertAuthenticated). Defense in depth: any + // state-mutating path re-verifies, so a future refactor that + // adds a new caller cannot accidentally skip auth. + // + // 3. If you remove `rfc2136` from your Corefile and reload via + // SIGUSR1, this global is NOT restored. Restart the process + // to fully revert. + // + // The mitigation matrix is: (a) loud comment here, (b) startup + // INFO log listing the zones this plugin owns, (c) TSIG re-check + // inside handleUpdate. The architecturally clean fix would be + // upstream support in CoreDNS for per-Config MsgAcceptFunc — when + // that lands, delete this whole stanza. dns.DefaultMsgAcceptFunc = msgAcceptFunc } @@ -109,6 +136,10 @@ func setup(c *caddy.Controller) error { log.Infof("ready: zones=%v keys=%d ttl=%d dir=%q auto-commit=%t", p.Zones, len(p.TSIGKeys), p.TTL, p.ZonesDir, p.AutoCommit) + // Surface the global MsgAcceptFunc override for operator audit — + // if a sibling server block on this process doesn't expect UPDATE + // opcodes to traverse it, the operator should know they will. + log.Infof("dns.DefaultMsgAcceptFunc was overridden process-wide to permit OpcodeUpdate; state mutation requires TSIG verification on this plugin's zones=%v only", p.Zones) return nil } diff --git a/update.go b/update.go index 8c9e3bd..7d7d45d 100644 --- a/update.go +++ b/update.go @@ -26,10 +26,35 @@ import ( // Steps 3-7 happen under the zone-file mutex. If 8 fails we log but // don't roll back (the on-disk state is authoritative; lost commits // can be re-staged via `git add` later). -func (p *RFC2136) handleUpdate(w dns.ResponseWriter, r *dns.Msg) (int, error) { +// +// SECURITY CONTRACT — the `verified` parameter: +// +// handleUpdate mutates zone files on disk. The caller MUST set +// verified=true only after successfully validating the message's TSIG +// signature against a configured key. ServeDNS does this. A +// verified=false invocation is treated as an unauthenticated attempt +// and refused — preserving the security boundary even if a future +// internal caller (NOTIFY relay, admin RPC, refactor) reaches this +// function without going through the wire-level TSIG check. +// +// Tests that exercise post-auth logic pass verified=true. Tests that +// exercise auth rejection pass verified=false. +// +// This is defense-in-depth: ServeDNS already verifies; we re-assert at +// the function boundary so the security property survives refactors. +func (p *RFC2136) handleUpdate(w dns.ResponseWriter, r *dns.Msg, verified bool) (int, error) { resp := new(dns.Msg) resp.SetReply(r) - signResponseIfSigned(resp, r) + if verified { + // Only sign responses we authorize. Signing rejections leaks + // attestation that the named key exists on this server (see M9 + // in the Hamilton review). Unauthorized callers get an + // unsigned Refused. + signResponseIfSigned(resp, r) + } else { + log.Warningf("handleUpdate refused: caller did not assert TSIG verification — possible internal bypass attempt") + return p.updateResp(w, resp, dns.RcodeRefused) + } // 1. Validate the Zone section. if len(r.Question) != 1 { diff --git a/update_test.go b/update_test.go index b6e5be9..c47bd8c 100644 --- a/update_test.go +++ b/update_test.go @@ -55,11 +55,14 @@ func newUpdate(zone string, updates ...dns.RR) *dns.Msg { } // runUpdate sends msg through the handler with TSIG auth bypassed -// (calling handleUpdate directly instead of ServeDNS). +// (calling handleUpdate directly with verified=true instead of +// ServeDNS). Tests that exercise post-auth logic use this; tests that +// assert auth-failure behavior should call handleUpdate(w, msg, false) +// directly. func runUpdate(t *testing.T, p *RFC2136, msg *dns.Msg) (rcode int) { t.Helper() w := &captureWriter{} - rcode, _ = p.handleUpdate(w, msg) + rcode, _ = p.handleUpdate(w, msg, true) return rcode } @@ -152,6 +155,29 @@ func TestUpdate_DeleteRRset_RemovesAllOfType(t *testing.T) { } } +// TestUpdate_UnverifiedCaller_Refused proves the C2 defense-in-depth +// contract: handleUpdate refuses any call that doesn't assert TSIG +// verification, even if the rest of the message is well-formed and the +// zone is authoritative. This guards against a future internal caller +// that bypasses the wire-level TSIG check in ServeDNS. +func TestUpdate_UnverifiedCaller_Refused(t *testing.T) { + p := newTestPluginWithZone(t, "auth.example.com") + upd := newUpdate("auth.example.com", + mustRR(t, `foo.auth.example.com. 60 IN TXT "should-not-apply"`), + ) + w := &captureWriter{} + rcode, _ := p.handleUpdate(w, upd, false) // <- the security boundary + if rcode != dns.RcodeRefused { + t.Errorf("rcode = %d, want REFUSED for unverified caller", rcode) + } + // Zone file must NOT have been modified. + for _, rr := range readZoneRecords(t, p, "auth.example.com") { + if txt, ok := rr.(*dns.TXT); ok && txt.Hdr.Name == "foo.auth.example.com." { + t.Errorf("unverified UPDATE applied state mutation: %s", rr.String()) + } + } +} + func TestUpdate_OutOfZone_Refused(t *testing.T) { p := newTestPluginWithZone(t, "auth.example.com") @@ -279,7 +305,7 @@ func TestUpdate_SignedRequest_ResponseGetsTSIG(t *testing.T) { upd.SetTsig("acme-update-key.", dns.HmacSHA256, 300, 0) w := &captureWriter{} - if _, err := p.handleUpdate(w, upd); err != nil { + if _, err := p.handleUpdate(w, upd, true); err != nil { t.Fatalf("handleUpdate: %v", err) } if w.msg == nil { @@ -309,7 +335,7 @@ func TestUpdate_UnsignedRequest_ResponseStaysUnsigned(t *testing.T) { // No SetTsig — request is plain. w := &captureWriter{} - if _, err := p.handleUpdate(w, upd); err != nil { + if _, err := p.handleUpdate(w, upd, true); err != nil { t.Fatalf("handleUpdate: %v", err) } if w.msg.IsTsig() != nil {