From c1afe77b272aa7b8eed29aca07751130d0e20363 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Thu, 14 May 2026 01:34:57 -0600 Subject: [PATCH] coredns: production Let's Encrypt cert via Caddy sidecar (DNS-01 + Vultr) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the self-signed dev cert flow with a real LE prod cert for dns.l.supported.systems, issued and auto-renewed by a Caddy sidecar using DNS-01 challenge against the Vultr API. Components: - caddy/Dockerfile builds Caddy 2.10.0 with caddy-dns/vultr plugin via xcaddy. GOTOOLCHAIN=auto so xcaddy can fetch newer Go on demand when plugin versions advance their minimum Go. - caddy/Caddyfile uses DNS-01 with explicit public resolvers (1.1.1.1, 9.9.9.9) for the propagation check. Without that, Docker's embedded DNS leaks the container into the host's split-horizon LAN DNS, which returns LAN IPs for ns1.vultr.com and the propagation check fails. - docker-compose: caddy service shares ./caddy-data with coredns via a read-only subpath mount that excludes /acme (account private key). - Healthcheck doubles as a symlinker: maintains stable cert.pem / key.pem names at /data/caddy/ and chmods cert files + their dirs to be readable by CoreDNS's nonroot user. Flips to "healthy" only once the symlinks dereference (i.e. cert exists), gating CoreDNS start via depends_on: service_healthy. - Corefile unchanged — same /etc/coredns/certs/cert.pem path; only the bind-mount source switches from ./certs to ./caddy-data/caddy. - New Makefile target: tls-up orchestrates the bring-up sequence. Cert is valid until Aug 12 2026. Verified end-to-end: dig @127.0.0.1 -p 8853 +tls +tls-hostname=dns.l.supported.systems ... dig @127.0.0.1 -p 8443 +https +tls-hostname=dns.l.supported.systems ... --- .env | 12 +++++ .gitignore | 5 +++ Makefile | 109 ++++++++++++++++++++++++++++++++------------- caddy/Caddyfile | 40 +++++++++++++++++ caddy/Dockerfile | 14 ++++++ docker-compose.yml | 47 ++++++++++++++++++- 6 files changed, 195 insertions(+), 32 deletions(-) create mode 100644 caddy/Caddyfile create mode 100644 caddy/Dockerfile diff --git a/.env b/.env index 079ee15..b7d5452 100644 --- a/.env +++ b/.env @@ -16,3 +16,15 @@ DOT_PORT=8853 # DoH (DNS-over-HTTPS, RFC 8484) — typically 443. Host port 8443 # because Caddy already owns 443 on this host. DOH_PORT=8443 + +# --- Production cert provisioning (Caddy sidecar + Let's Encrypt) --- +# Hostname the cert is issued for. Must be a name you control and that +# resolves via the public DNS server holding the zone (Vultr's NS). +CADDY_HOSTNAME=dns.l.supported.systems + +# Contact email registered with Let's Encrypt for expiry notifications. +ACME_EMAIL=rpm@malloys.us + +# VULTR_API_KEY is intentionally NOT stored here. Caddy reads it from +# the shell environment via docker compose's variable interpolation — +# export it in your shell (or in ~/.zshenv) before `make tls-up`. diff --git a/.gitignore b/.gitignore index b4fd195..14d1699 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,10 @@ zones-prepared/*.zone # Self-signed certs (re-generated by scripts/generate-certs.sh) certs/*.pem +# Caddy's runtime data: issued certs, ACME account keys, transient state. +# NEVER commit — contains the ACME account private key. +caddy-data/ +caddy-config/ + # Local-only env overrides .env.local diff --git a/Makefile b/Makefile index 99bb8d5..def0086 100644 --- a/Makefile +++ b/Makefile @@ -2,28 +2,68 @@ SHELL := /usr/bin/env bash COMPOSE := docker compose -.PHONY: help prep certs up down restart logs ps test test-tls reload clean +# Source .env so $(CADDY_HOSTNAME) etc. are available in recipes. +include .env +export + +.PHONY: help prep certs up down restart logs logs-caddy ps test test-tls \ + test-public reload clean tls-up cert-watch caddy-rebuild help: ## Show this help - @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-12s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) + @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-14s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) prep: ## Re-inject SOA records into all zones (writes zones-prepared/) @./scripts/prepare-zones.sh -certs: ## Generate self-signed cert for DoT/DoH (re-run with FORCE=1 to rotate) +certs: ## Generate self-signed dev cert (only useful if not using Caddy ACME) @./scripts/generate-certs.sh -up: prep certs ## Start CoreDNS (prepares zones + ensures certs exist first) - $(COMPOSE) up -d - @sleep 2 && $(COMPOSE) logs --tail=20 coredns +caddy-rebuild: ## Rebuild the Caddy image (after editing caddy/Dockerfile) + $(COMPOSE) build --no-cache caddy -down: ## Stop & remove the container +# --------------------------------------------------------------------------- +# Production / Let's Encrypt flow +# --------------------------------------------------------------------------- + +tls-up: prep ## Bring up Caddy → wait for cert → start CoreDNS (one command) + @if [ -z "$$VULTR_API_KEY" ]; then \ + echo "ERROR: VULTR_API_KEY is not exported. Set it in your shell:"; \ + echo " export VULTR_API_KEY=..."; \ + exit 1; \ + fi + @mkdir -p caddy-data caddy-config + $(COMPOSE) up -d caddy + @echo "" + @echo "Waiting for Caddy to provision cert for $(CADDY_HOSTNAME)..." + @echo "(DNS-01 via Vultr typically takes 30-90s; press Ctrl-C to abort)" + @for i in $$(seq 1 90); do \ + if [ -e caddy-data/caddy/cert.pem ]; then \ + echo ""; echo " ✓ cert ready after $${i}0s"; break; \ + fi; \ + printf '.'; sleep 10; \ + done + @test -e caddy-data/caddy/cert.pem || \ + (echo ""; echo "FAILED — see logs: make logs-caddy"; exit 1) + $(COMPOSE) up -d coredns + @sleep 3 && $(COMPOSE) logs --tail=15 coredns + +cert-watch: ## Tail Caddy logs while it provisions the cert + $(COMPOSE) logs -f caddy + +logs-caddy: ## Tail Caddy logs + $(COMPOSE) logs -f caddy + +# --------------------------------------------------------------------------- +# Day-to-day operations +# --------------------------------------------------------------------------- + +down: ## Stop & remove all containers $(COMPOSE) down -restart: ## Restart CoreDNS (does not re-prep zones) +restart: ## Restart CoreDNS (does not re-prep zones / re-issue cert) $(COMPOSE) restart coredns -reload: prep ## Re-prep zones; CoreDNS auto-plugin will pick changes up +reload: prep ## Re-prep zones; CoreDNS auto-plugin picks changes up @echo "Zones re-prepared. CoreDNS reloads files every 30s (auto plugin)." logs: ## Tail CoreDNS logs @@ -32,28 +72,35 @@ logs: ## Tail CoreDNS logs ps: ## Show container status $(COMPOSE) ps -test: ## Smoke-test plain DNS (uses DNS_PORT from .env) - @. ./.env && echo "Querying acrazy.org @ 127.0.0.1:$$DNS_PORT (plain DNS)" && \ - dig @127.0.0.1 -p $$DNS_PORT acrazy.org SOA +short && \ - dig @127.0.0.1 -p $$DNS_PORT acrazy.org NS +short && \ - dig @127.0.0.1 -p $$DNS_PORT or.acrazy.org A +short +# --------------------------------------------------------------------------- +# Smoke tests +# --------------------------------------------------------------------------- -test-tls: ## Smoke-test DoT + DoH (pins self-signed cert via +tls-ca) - @. ./.env && \ - echo "=== DoT @ 127.0.0.1:$$DOT_PORT ===" && \ - dig @127.0.0.1 -p $$DOT_PORT +tls +tls-ca=certs/cert.pem \ - +tls-hostname=localhost acrazy.org SOA +short && \ - echo "" && \ - echo "=== DoH @ https://localhost:$$DOH_PORT/dns-query ===" && \ - dig @localhost -p $$DOH_PORT +https +tls-ca=certs/cert.pem \ - acrazy.org A +short && \ - echo "" && \ - echo "=== DoH via curl (raw wire-format) ===" && \ - curl -sk --cacert certs/cert.pem \ - -H 'accept: application/dns-message' \ - --data-binary @<(printf '\x00\x00\x01\x20\x00\x01\x00\x00\x00\x00\x00\x00\x06acrazy\x03org\x00\x00\x01\x00\x01') \ - -H 'content-type: application/dns-message' \ - "https://localhost:$$DOH_PORT/dns-query" | xxd | head -5 +test: ## Smoke-test plain DNS + @echo "Querying acrazy.org @ 127.0.0.1:$(DNS_PORT) (plain DNS)" + @dig @127.0.0.1 -p $(DNS_PORT) acrazy.org SOA +short + @dig @127.0.0.1 -p $(DNS_PORT) acrazy.org NS +short + @dig @127.0.0.1 -p $(DNS_PORT) or.acrazy.org A +short -clean: down ## Remove containers + prepared zones + certs +test-tls: ## Smoke-test DoT + DoH against LOCAL endpoints (trusts cert via system CAs) + @echo "=== DoT @ 127.0.0.1:$(DOT_PORT), expecting cert for $(CADDY_HOSTNAME) ===" + @dig @127.0.0.1 -p $(DOT_PORT) +tls +tls-hostname=$(CADDY_HOSTNAME) \ + acrazy.org SOA +short + @echo "" + @echo "=== DoH @ https://$(CADDY_HOSTNAME):$(DOH_PORT)/dns-query ===" + @dig @$(CADDY_HOSTNAME) -p $(DOH_PORT) +https acrazy.org A +short + +test-public: ## Smoke-test using the public hostname (DoT/DoH ports must be open + DNS A record set) + @echo "=== DoT on public hostname @ port 853 ===" + @dig @$(CADDY_HOSTNAME) +tls cloudflare.com A +short + @echo "=== DoH on public hostname @ port 443 ===" + @dig @$(CADDY_HOSTNAME) +https cloudflare.com A +short + +clean: down ## Remove containers + prepared zones + dev self-signed certs rm -rf zones-prepared/*.zone certs/*.pem + +clean-caddy: down ## Also wipe Caddy's data dir (forces re-issuance from scratch!) + @echo "About to delete caddy-data/ — this will force re-issuance from LE." + @echo "Hit Ctrl-C in 5s to abort..." + @sleep 5 + rm -rf caddy-data caddy-config diff --git a/caddy/Caddyfile b/caddy/Caddyfile new file mode 100644 index 0000000..5d9ec5b --- /dev/null +++ b/caddy/Caddyfile @@ -0,0 +1,40 @@ +# Caddy is used here purely as an ACME client + cert renewer for CoreDNS. +# The HTTPS site is technically served (Caddy can't issue without a site +# block), but we don't expose port 443 from this container — only the +# cert files in /data/caddy/ are consumed by the CoreDNS sidecar. + +{ + # Operator contact for Let's Encrypt; also used for expiry warnings. + email {$ACME_EMAIL} + + # Skip the HTTP-to-HTTPS redirect server (we have nothing to redirect). + # Caddy still binds :443 inside the container for the cert site, which + # is fine because we don't publish those ports to the host. + auto_https disable_redirects +} + +{$CADDY_HOSTNAME} { + tls { + # DNS-01 challenge via Vultr API. The plugin reads the token from + # the named env var; setting via {env.VULTR_API_KEY} would also + # work but the bare reference is clearer with Caddy's modules. + dns vultr {env.VULTR_API_KEY} + + # Use PUBLIC resolvers for the propagation check, not Docker's + # embedded DNS. Without this, Caddy follows the container's + # resolv.conf → host's resolv.conf → local LAN resolvers, which + # on a split-horizon DNS setup will return LAN IPs for vultr.com + # nameservers and the propagation check fails with connection + # refused. Hitting 1.1.1.1 / 9.9.9.9 directly sidesteps it. + resolvers 1.1.1.1 9.9.9.9 1.0.0.1 + + # Vultr's NS propagation is generally fast (<30s) but LE checks + # multiple resolvers; cushion the wait to avoid flaky issuance. + propagation_delay 30s + propagation_timeout 300s + } + + # A sensible response if anyone hits this on 443. Doubles as a + # "Caddy is alive" sanity check inside the compose network. + respond "CoreDNS DoT/DoH endpoint. DoT: port 853. DoH: /dns-query" 200 +} diff --git a/caddy/Dockerfile b/caddy/Dockerfile new file mode 100644 index 0000000..7adcd92 --- /dev/null +++ b/caddy/Dockerfile @@ -0,0 +1,14 @@ +# Custom Caddy build that bundles the Vultr DNS provider plugin. +# Stock caddy:2 doesn't include DNS-provider modules — they're plugins. +# xcaddy compiles them in at build time. +FROM caddy:2.10.0-builder AS builder +# The Caddy builder image bakes in Go 1.23, but caddy-dns/vultr now +# requires Go >= 1.24. GOTOOLCHAIN=auto lets `go get` fetch a newer +# toolchain on demand so we don't have to bump base images every time +# a plugin's minimum Go version moves. +ENV GOTOOLCHAIN=auto +RUN xcaddy build \ + --with github.com/caddy-dns/vultr + +FROM caddy:2.10.0 +COPY --from=builder /usr/bin/caddy /usr/bin/caddy diff --git a/docker-compose.yml b/docker-compose.yml index 52211b7..13dc2b6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,49 @@ services: + # Caddy runs as a dedicated ACME client + cert renewer. It provisions + # a Let's Encrypt cert for ${CADDY_HOSTNAME} via DNS-01 (Vultr API) + # and persists it to ./caddy-data. CoreDNS reads from that same path + # read-only. The container's HTTP/HTTPS ports are NOT published — we + # only care about the cert files on disk. + caddy: + build: ./caddy + container_name: coredns-caddy + restart: unless-stopped + environment: + - CADDY_HOSTNAME=${CADDY_HOSTNAME} + - ACME_EMAIL=${ACME_EMAIL} + - VULTR_API_KEY=${VULTR_API_KEY:?VULTR_API_KEY must be exported in your shell} + volumes: + - ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro + - ./caddy-data:/data + - ./caddy-config:/config + healthcheck: + # Two-jobs-in-one: (1) maintain stable filenames (cert.pem / key.pem) + # as symlinks into Caddy's hostname-keyed storage, so the Corefile + # doesn't have to encode the hostname. (2) Flip to "healthy" once + # the symlink dereferences successfully (i.e. Caddy has issued). + # Relative symlink targets so paths work the same from host or + # from any container mounting this directory. + test: + - "CMD-SHELL" + - > + ln -sf certificates/acme-v02.api.letsencrypt.org-directory/${CADDY_HOSTNAME}/${CADDY_HOSTNAME}.crt /data/caddy/cert.pem && + ln -sf certificates/acme-v02.api.letsencrypt.org-directory/${CADDY_HOSTNAME}/${CADDY_HOSTNAME}.key /data/caddy/key.pem && + chmod 755 /data/caddy && + chmod -R a+rX /data/caddy/certificates 2>/dev/null; + test -e /data/caddy/cert.pem + interval: 10s + timeout: 3s + retries: 60 # ~10 min ceiling for initial issuance + start_period: 5s + coredns: image: ${COREDNS_IMAGE} container_name: coredns restart: unless-stopped command: ["-conf", "/etc/coredns/Corefile"] + depends_on: + caddy: + condition: service_healthy ports: - "${DNS_PORT}:53/udp" - "${DNS_PORT}:53/tcp" @@ -14,7 +54,12 @@ services: volumes: - ./Corefile:/etc/coredns/Corefile:ro - ./zones-prepared:/zones:ro - - ./certs:/etc/coredns/certs:ro + # Subpath mount of Caddy's data dir. The healthcheck maintains + # cert.pem / key.pem symlinks at the top of this tree, so CoreDNS + # sees stable filenames regardless of hostname. The /accounts dir + # (ACME registration private key) is sibling to /caddy and is NOT + # exposed to CoreDNS — only /caddy is mounted. + - ./caddy-data/caddy:/etc/coredns/certs:ro healthcheck: test: ["CMD", "wget", "-qO-", "http://127.0.0.1:8080/health"] interval: 30s