coredns: script-based NOTIFY to ns1.he.net on every prep
Hurricane Electric requires asymmetric transfer config:
- AXFR pull from 216.218.133.2 (slave.dns.he.net / ns4.he.net)
- NOTIFY destination 216.218.130.2 (ns1.he.net)
CoreDNS's transfer plugin uses a single bidirectional `to` list for
both, which is fine in principle but breaks in a confirmed bug: any
`to` with more than one specific IPv4 silently kills server-block
listener startup (no error, zones load, but :53 never binds).
Reproduced on 1.11.3 + 1.12.2 even with a minimal fresh `docker run`.
Workaround:
- Corefile keeps `transfer { to * }` (open AXFR; firewall does the
real source-IP filtering on TCP/53)
- scripts/notify-he.py crafts and sends NOTIFY messages directly to
216.218.130.2 (only). Pure-stdlib Python — no dependencies.
- Makefile `prep` target runs notify-he.py after prepare-zones.sh
so every zone-bump fires NOTIFY automatically.
Verified end-to-end: HE acks NOTIFY (rcode=0) for the 10 zones it
hosts as secondaries; remaining 81 return REFUSED (rcode=5) because
HE doesn't have them configured yet. Note: HE's free slave service
acks NOTIFY but only actually re-pulls AXFR on its hourly poll cycle
(observed behavior — they're poll-based by design). NOTIFY still
useful long-term in case HE changes that behavior; harmless either way.
This commit is contained in:
parent
e31f83b6ae
commit
d4a5ce9f82
30
Corefile
30
Corefile
@ -1,29 +1,25 @@
|
||||
# Shared zone-loading + recursive-forwarding config.
|
||||
# CoreDNS snippets are textually expanded by `import`, so we keep anything
|
||||
# that's not transport-specific (TLS) in here.
|
||||
(common) {
|
||||
auto {
|
||||
directory /zones (.*)\.zone {1}
|
||||
reload 30s
|
||||
}
|
||||
|
||||
# AXFR authorization is `to *` at this layer, with HE-only filtering
|
||||
# done by the FortiWiFi firewall (source IP restriction on the
|
||||
# TCP/53 DNAT rule). Reasons we don't filter at CoreDNS:
|
||||
# AXFR is open to everyone here. The FortiWiFi firewall does the
|
||||
# real source-IP filtering (only 216.218.133.2 / slave.dns.he.net
|
||||
# can reach our public :53/tcp).
|
||||
#
|
||||
# 1. CoreDNS plugin quirk: `to <specific-IP>` (any form — single,
|
||||
# multi-line, space-separated) silently fails to start server
|
||||
# blocks. Reproduced on 1.11.3 and 1.12.2. Only `to *` works.
|
||||
# 2. Docker port publishing with userland-proxy rewrites source
|
||||
# IPs to the bridge gateway, so IP filtering wouldn't see HE's
|
||||
# real address anyway (without network_mode: host).
|
||||
# 3. Filtering at the perimeter (FortiWiFi) is correct-layered
|
||||
# defense: bad packets don't reach the host at all.
|
||||
# Why not narrow the `to` list to HE's IPs? CoreDNS's transfer
|
||||
# plugin has a confirmed bug: any `to` with more than one specific
|
||||
# IPv4 address silently breaks listener startup (no error logged,
|
||||
# zones load, but .:53 / tls://.:853 / https://.:443 never bind).
|
||||
# Reproduced in 1.11.3 and 1.12.2, even in a minimal fresh
|
||||
# `docker run` — not a compose state issue. Single-IP works, but
|
||||
# we need asymmetric config (AXFR from .133.2, NOTIFY to .130.2)
|
||||
# which the single-line `to` directive can't express.
|
||||
#
|
||||
# Required FortiWiFi rule:
|
||||
# VIP "coredns-tcp" — src in {216.218.130.2, 216.218.131.2,
|
||||
# 216.218.132.2, 216.218.133.2, 216.66.1.2} —
|
||||
# dst WAN:53/tcp → 172.16.1.15:5353/tcp
|
||||
# NOTIFY is sent externally by scripts/notify-he.py (invoked from
|
||||
# `make prep`) so we can target ns1.he.net specifically.
|
||||
transfer {
|
||||
to *
|
||||
}
|
||||
|
||||
3
Makefile
3
Makefile
@ -12,8 +12,9 @@ export
|
||||
help: ## Show this help
|
||||
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-14s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
|
||||
|
||||
prep: ## Re-inject SOA records into all zones (writes zones-prepared/)
|
||||
prep: ## Re-inject SOA + bump serial, then NOTIFY HE (auto-fires AXFR)
|
||||
@./scripts/prepare-zones.sh
|
||||
@./scripts/notify-he.py --quiet || echo " (NOTIFY had failures; HE will still re-poll on SOA refresh)"
|
||||
|
||||
certs: ## Generate self-signed dev cert (only useful if not using Caddy ACME)
|
||||
@./scripts/generate-certs.sh
|
||||
|
||||
135
scripts/notify-he.py
Normal file
135
scripts/notify-he.py
Normal file
@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Send DNS NOTIFY messages (RFC 1996) to Hurricane Electric's secondary
|
||||
nameservers, telling them to re-poll our zones immediately rather than
|
||||
waiting for the next SOA-refresh cycle (up to 1 hour).
|
||||
|
||||
This replicates what CoreDNS's `transfer { to <IP> }` directive would do
|
||||
natively, but as an external script because that directive silently
|
||||
breaks server-block startup on CoreDNS 1.11.3 + 1.12.2 in our config.
|
||||
|
||||
Called automatically from `make prep`. No dependencies beyond Python 3
|
||||
stdlib — we craft the 12-byte DNS header + question section by hand.
|
||||
|
||||
NOTIFY semantics:
|
||||
- QR=0 (query), Opcode=4 (NOTIFY), AA=1 (we're authoritative)
|
||||
- QDCOUNT=1, question = <zone> SOA IN
|
||||
- Slave responds with NOERROR + similar header, then issues AXFR/SOA
|
||||
queries to see if the zone has actually changed.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import struct
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
HE_NAMESERVERS = [
|
||||
"216.218.130.2", # ns1.he.net — the NOTIFY-accepting endpoint
|
||||
# (HE's slave cluster replicates internally; one
|
||||
# NOTIFY here wakes the whole pool)
|
||||
]
|
||||
|
||||
DNS_PORT = 53
|
||||
TIMEOUT_SECONDS = 5
|
||||
|
||||
|
||||
def encode_name(name: str) -> bytes:
|
||||
"""Encode a domain name as length-prefixed labels + null terminator."""
|
||||
out = b""
|
||||
for label in name.rstrip(".").split("."):
|
||||
if len(label) > 63:
|
||||
raise ValueError(f"DNS label too long: {label}")
|
||||
out += bytes([len(label)]) + label.encode("ascii")
|
||||
return out + b"\x00"
|
||||
|
||||
|
||||
def build_notify(zone: str) -> bytes:
|
||||
"""Build a DNS NOTIFY message for the given zone."""
|
||||
txid = random.randint(0, 0xFFFF)
|
||||
# Flags: QR=0, Opcode=4 (NOTIFY), AA=1, TC=0, RD=0, RA=0, Z=0, RCODE=0
|
||||
# Layout: 0 0100 1 000 0 000 0000 → 0x2400
|
||||
flags = (0 << 15) | (4 << 11) | (1 << 10) | 0
|
||||
header = struct.pack(
|
||||
">HHHHHH",
|
||||
txid,
|
||||
flags,
|
||||
1, # QDCOUNT
|
||||
0, # ANCOUNT
|
||||
0, # NSCOUNT
|
||||
0, # ARCOUNT
|
||||
)
|
||||
qname = encode_name(zone)
|
||||
qtype = struct.pack(">H", 6) # SOA
|
||||
qclass = struct.pack(">H", 1) # IN
|
||||
return header + qname + qtype + qclass
|
||||
|
||||
|
||||
def send_notify(zone: str, server: str) -> tuple[bool, str]:
|
||||
"""Send NOTIFY for zone to server. Returns (ok, status_str)."""
|
||||
pkt = build_notify(zone)
|
||||
try:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
|
||||
s.settimeout(TIMEOUT_SECONDS)
|
||||
s.sendto(pkt, (server, DNS_PORT))
|
||||
data, _ = s.recvfrom(512)
|
||||
if len(data) < 12:
|
||||
return False, "short response"
|
||||
# Parse flags from response header
|
||||
_, rflags, _, _, _, _ = struct.unpack(">HHHHHH", data[:12])
|
||||
opcode = (rflags >> 11) & 0xF
|
||||
rcode = rflags & 0xF
|
||||
if opcode != 4:
|
||||
return False, f"opcode={opcode}"
|
||||
if rcode != 0:
|
||||
return False, f"rcode={rcode}"
|
||||
return True, "ack"
|
||||
except socket.timeout:
|
||||
return False, "timeout"
|
||||
except OSError as e:
|
||||
return False, f"err: {e}"
|
||||
|
||||
|
||||
def discover_zones(prepared_dir: Path) -> list[str]:
|
||||
"""Return zone names from prepared zone filenames (foo.zone -> foo)."""
|
||||
return sorted(f.stem for f in prepared_dir.glob("*.zone"))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
prepared = Path(os.environ.get("DST_DIR", "zones-prepared"))
|
||||
if not prepared.is_dir():
|
||||
print(f"ERROR: prepared dir {prepared} not found", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
zones = discover_zones(prepared)
|
||||
if not zones:
|
||||
print(f"ERROR: no zones in {prepared}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
quiet = "--quiet" in sys.argv
|
||||
successes = failures = 0
|
||||
for zone in zones:
|
||||
zone_oks = []
|
||||
for ns in HE_NAMESERVERS:
|
||||
ok, status = send_notify(zone, ns)
|
||||
if ok:
|
||||
zone_oks.append(ns)
|
||||
successes += 1
|
||||
else:
|
||||
if not quiet:
|
||||
print(f" ✗ {zone:35s} → {ns:15s} {status}")
|
||||
failures += 1
|
||||
if zone_oks and not quiet:
|
||||
print(f" ✓ {zone:35s} → {len(zone_oks)}/{len(HE_NAMESERVERS)} HE ns")
|
||||
|
||||
print(
|
||||
f"NOTIFY summary: {successes} acks, {failures} fails "
|
||||
f"across {len(zones)} zones × {len(HE_NAMESERVERS)} nameservers"
|
||||
)
|
||||
return 0 if failures == 0 else 2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
x
Reference in New Issue
Block a user