coredns/scripts/notify-he.py
Ryan Malloy d4a5ce9f82 coredns: script-based NOTIFY to ns1.he.net on every prep
Hurricane Electric requires asymmetric transfer config:
  - AXFR pull from 216.218.133.2 (slave.dns.he.net / ns4.he.net)
  - NOTIFY destination 216.218.130.2 (ns1.he.net)

CoreDNS's transfer plugin uses a single bidirectional `to` list for
both, which is fine in principle but breaks in a confirmed bug: any
`to` with more than one specific IPv4 silently kills server-block
listener startup (no error, zones load, but :53 never binds).
Reproduced on 1.11.3 + 1.12.2 even with a minimal fresh `docker run`.

Workaround:
  - Corefile keeps `transfer { to * }` (open AXFR; firewall does the
    real source-IP filtering on TCP/53)
  - scripts/notify-he.py crafts and sends NOTIFY messages directly to
    216.218.130.2 (only). Pure-stdlib Python — no dependencies.
  - Makefile `prep` target runs notify-he.py after prepare-zones.sh
    so every zone-bump fires NOTIFY automatically.

Verified end-to-end: HE acks NOTIFY (rcode=0) for the 10 zones it
hosts as secondaries; remaining 81 return REFUSED (rcode=5) because
HE doesn't have them configured yet. Note: HE's free slave service
acks NOTIFY but only actually re-pulls AXFR on its hourly poll cycle
(observed behavior — they're poll-based by design). NOTIFY still
useful long-term in case HE changes that behavior; harmless either way.
2026-05-18 16:57:54 -06:00

136 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Send DNS NOTIFY messages (RFC 1996) to Hurricane Electric's secondary
nameservers, telling them to re-poll our zones immediately rather than
waiting for the next SOA-refresh cycle (up to 1 hour).
This replicates what CoreDNS's `transfer { to <IP> }` directive would do
natively, but as an external script because that directive silently
breaks server-block startup on CoreDNS 1.11.3 + 1.12.2 in our config.
Called automatically from `make prep`. No dependencies beyond Python 3
stdlib — we craft the 12-byte DNS header + question section by hand.
NOTIFY semantics:
- QR=0 (query), Opcode=4 (NOTIFY), AA=1 (we're authoritative)
- QDCOUNT=1, question = <zone> SOA IN
- Slave responds with NOERROR + similar header, then issues AXFR/SOA
queries to see if the zone has actually changed.
"""
from __future__ import annotations
import os
import random
import socket
import struct
import sys
from pathlib import Path
HE_NAMESERVERS = [
"216.218.130.2", # ns1.he.net — the NOTIFY-accepting endpoint
# (HE's slave cluster replicates internally; one
# NOTIFY here wakes the whole pool)
]
DNS_PORT = 53
TIMEOUT_SECONDS = 5
def encode_name(name: str) -> bytes:
"""Encode a domain name as length-prefixed labels + null terminator."""
out = b""
for label in name.rstrip(".").split("."):
if len(label) > 63:
raise ValueError(f"DNS label too long: {label}")
out += bytes([len(label)]) + label.encode("ascii")
return out + b"\x00"
def build_notify(zone: str) -> bytes:
"""Build a DNS NOTIFY message for the given zone."""
txid = random.randint(0, 0xFFFF)
# Flags: QR=0, Opcode=4 (NOTIFY), AA=1, TC=0, RD=0, RA=0, Z=0, RCODE=0
# Layout: 0 0100 1 000 0 000 0000 → 0x2400
flags = (0 << 15) | (4 << 11) | (1 << 10) | 0
header = struct.pack(
">HHHHHH",
txid,
flags,
1, # QDCOUNT
0, # ANCOUNT
0, # NSCOUNT
0, # ARCOUNT
)
qname = encode_name(zone)
qtype = struct.pack(">H", 6) # SOA
qclass = struct.pack(">H", 1) # IN
return header + qname + qtype + qclass
def send_notify(zone: str, server: str) -> tuple[bool, str]:
"""Send NOTIFY for zone to server. Returns (ok, status_str)."""
pkt = build_notify(zone)
try:
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.settimeout(TIMEOUT_SECONDS)
s.sendto(pkt, (server, DNS_PORT))
data, _ = s.recvfrom(512)
if len(data) < 12:
return False, "short response"
# Parse flags from response header
_, rflags, _, _, _, _ = struct.unpack(">HHHHHH", data[:12])
opcode = (rflags >> 11) & 0xF
rcode = rflags & 0xF
if opcode != 4:
return False, f"opcode={opcode}"
if rcode != 0:
return False, f"rcode={rcode}"
return True, "ack"
except socket.timeout:
return False, "timeout"
except OSError as e:
return False, f"err: {e}"
def discover_zones(prepared_dir: Path) -> list[str]:
"""Return zone names from prepared zone filenames (foo.zone -> foo)."""
return sorted(f.stem for f in prepared_dir.glob("*.zone"))
def main() -> int:
prepared = Path(os.environ.get("DST_DIR", "zones-prepared"))
if not prepared.is_dir():
print(f"ERROR: prepared dir {prepared} not found", file=sys.stderr)
return 1
zones = discover_zones(prepared)
if not zones:
print(f"ERROR: no zones in {prepared}", file=sys.stderr)
return 1
quiet = "--quiet" in sys.argv
successes = failures = 0
for zone in zones:
zone_oks = []
for ns in HE_NAMESERVERS:
ok, status = send_notify(zone, ns)
if ok:
zone_oks.append(ns)
successes += 1
else:
if not quiet:
print(f"{zone:35s}{ns:15s} {status}")
failures += 1
if zone_oks and not quiet:
print(f"{zone:35s}{len(zone_oks)}/{len(HE_NAMESERVERS)} HE ns")
print(
f"NOTIFY summary: {successes} acks, {failures} fails "
f"across {len(zones)} zones × {len(HE_NAMESERVERS)} nameservers"
)
return 0 if failures == 0 else 2
if __name__ == "__main__":
sys.exit(main())