coredns: production Let's Encrypt cert via Caddy sidecar (DNS-01 + Vultr)

Replaces the self-signed dev cert flow with a real LE prod cert for
dns.l.supported.systems, issued and auto-renewed by a Caddy sidecar
using DNS-01 challenge against the Vultr API.

Components:
- caddy/Dockerfile builds Caddy 2.10.0 with caddy-dns/vultr plugin
  via xcaddy. GOTOOLCHAIN=auto so xcaddy can fetch newer Go on demand
  when plugin versions advance their minimum Go.
- caddy/Caddyfile uses DNS-01 with explicit public resolvers (1.1.1.1,
  9.9.9.9) for the propagation check. Without that, Docker's embedded
  DNS leaks the container into the host's split-horizon LAN DNS, which
  returns LAN IPs for ns1.vultr.com and the propagation check fails.
- docker-compose: caddy service shares ./caddy-data with coredns via a
  read-only subpath mount that excludes /acme (account private key).
- Healthcheck doubles as a symlinker: maintains stable cert.pem /
  key.pem names at /data/caddy/ and chmods cert files + their dirs to
  be readable by CoreDNS's nonroot user. Flips to "healthy" only once
  the symlinks dereference (i.e. cert exists), gating CoreDNS start
  via depends_on: service_healthy.
- Corefile unchanged — same /etc/coredns/certs/cert.pem path; only the
  bind-mount source switches from ./certs to ./caddy-data/caddy.
- New Makefile target: tls-up orchestrates the bring-up sequence.

Cert is valid until Aug 12 2026. Verified end-to-end:
  dig @127.0.0.1 -p 8853 +tls +tls-hostname=dns.l.supported.systems ...
  dig @127.0.0.1 -p 8443 +https +tls-hostname=dns.l.supported.systems ...
This commit is contained in:
Ryan Malloy 2026-05-14 01:34:57 -06:00
parent 066ba1892a
commit c1afe77b27
6 changed files with 195 additions and 32 deletions

12
.env
View File

@ -16,3 +16,15 @@ DOT_PORT=8853
# DoH (DNS-over-HTTPS, RFC 8484) — typically 443. Host port 8443
# because Caddy already owns 443 on this host.
DOH_PORT=8443
# --- Production cert provisioning (Caddy sidecar + Let's Encrypt) ---
# Hostname the cert is issued for. Must be a name you control and that
# resolves via the public DNS server holding the zone (Vultr's NS).
CADDY_HOSTNAME=dns.l.supported.systems
# Contact email registered with Let's Encrypt for expiry notifications.
ACME_EMAIL=rpm@malloys.us
# VULTR_API_KEY is intentionally NOT stored here. Caddy reads it from
# the shell environment via docker compose's variable interpolation —
# export it in your shell (or in ~/.zshenv) before `make tls-up`.

5
.gitignore vendored
View File

@ -4,5 +4,10 @@ zones-prepared/*.zone
# Self-signed certs (re-generated by scripts/generate-certs.sh)
certs/*.pem
# Caddy's runtime data: issued certs, ACME account keys, transient state.
# NEVER commit — contains the ACME account private key.
caddy-data/
caddy-config/
# Local-only env overrides
.env.local

109
Makefile
View File

@ -2,28 +2,68 @@
SHELL := /usr/bin/env bash
COMPOSE := docker compose
.PHONY: help prep certs up down restart logs ps test test-tls reload clean
# Source .env so $(CADDY_HOSTNAME) etc. are available in recipes.
include .env
export
.PHONY: help prep certs up down restart logs logs-caddy ps test test-tls \
test-public reload clean tls-up cert-watch caddy-rebuild
help: ## Show this help
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-12s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-14s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
prep: ## Re-inject SOA records into all zones (writes zones-prepared/)
@./scripts/prepare-zones.sh
certs: ## Generate self-signed cert for DoT/DoH (re-run with FORCE=1 to rotate)
certs: ## Generate self-signed dev cert (only useful if not using Caddy ACME)
@./scripts/generate-certs.sh
up: prep certs ## Start CoreDNS (prepares zones + ensures certs exist first)
$(COMPOSE) up -d
@sleep 2 && $(COMPOSE) logs --tail=20 coredns
caddy-rebuild: ## Rebuild the Caddy image (after editing caddy/Dockerfile)
$(COMPOSE) build --no-cache caddy
down: ## Stop & remove the container
# ---------------------------------------------------------------------------
# Production / Let's Encrypt flow
# ---------------------------------------------------------------------------
tls-up: prep ## Bring up Caddy → wait for cert → start CoreDNS (one command)
@if [ -z "$$VULTR_API_KEY" ]; then \
echo "ERROR: VULTR_API_KEY is not exported. Set it in your shell:"; \
echo " export VULTR_API_KEY=..."; \
exit 1; \
fi
@mkdir -p caddy-data caddy-config
$(COMPOSE) up -d caddy
@echo ""
@echo "Waiting for Caddy to provision cert for $(CADDY_HOSTNAME)..."
@echo "(DNS-01 via Vultr typically takes 30-90s; press Ctrl-C to abort)"
@for i in $$(seq 1 90); do \
if [ -e caddy-data/caddy/cert.pem ]; then \
echo ""; echo " ✓ cert ready after $${i}0s"; break; \
fi; \
printf '.'; sleep 10; \
done
@test -e caddy-data/caddy/cert.pem || \
(echo ""; echo "FAILED — see logs: make logs-caddy"; exit 1)
$(COMPOSE) up -d coredns
@sleep 3 && $(COMPOSE) logs --tail=15 coredns
cert-watch: ## Tail Caddy logs while it provisions the cert
$(COMPOSE) logs -f caddy
logs-caddy: ## Tail Caddy logs
$(COMPOSE) logs -f caddy
# ---------------------------------------------------------------------------
# Day-to-day operations
# ---------------------------------------------------------------------------
down: ## Stop & remove all containers
$(COMPOSE) down
restart: ## Restart CoreDNS (does not re-prep zones)
restart: ## Restart CoreDNS (does not re-prep zones / re-issue cert)
$(COMPOSE) restart coredns
reload: prep ## Re-prep zones; CoreDNS auto-plugin will pick changes up
reload: prep ## Re-prep zones; CoreDNS auto-plugin picks changes up
@echo "Zones re-prepared. CoreDNS reloads files every 30s (auto plugin)."
logs: ## Tail CoreDNS logs
@ -32,28 +72,35 @@ logs: ## Tail CoreDNS logs
ps: ## Show container status
$(COMPOSE) ps
test: ## Smoke-test plain DNS (uses DNS_PORT from .env)
@. ./.env && echo "Querying acrazy.org @ 127.0.0.1:$$DNS_PORT (plain DNS)" && \
dig @127.0.0.1 -p $$DNS_PORT acrazy.org SOA +short && \
dig @127.0.0.1 -p $$DNS_PORT acrazy.org NS +short && \
dig @127.0.0.1 -p $$DNS_PORT or.acrazy.org A +short
# ---------------------------------------------------------------------------
# Smoke tests
# ---------------------------------------------------------------------------
test-tls: ## Smoke-test DoT + DoH (pins self-signed cert via +tls-ca)
@. ./.env && \
echo "=== DoT @ 127.0.0.1:$$DOT_PORT ===" && \
dig @127.0.0.1 -p $$DOT_PORT +tls +tls-ca=certs/cert.pem \
+tls-hostname=localhost acrazy.org SOA +short && \
echo "" && \
echo "=== DoH @ https://localhost:$$DOH_PORT/dns-query ===" && \
dig @localhost -p $$DOH_PORT +https +tls-ca=certs/cert.pem \
acrazy.org A +short && \
echo "" && \
echo "=== DoH via curl (raw wire-format) ===" && \
curl -sk --cacert certs/cert.pem \
-H 'accept: application/dns-message' \
--data-binary @<(printf '\x00\x00\x01\x20\x00\x01\x00\x00\x00\x00\x00\x00\x06acrazy\x03org\x00\x00\x01\x00\x01') \
-H 'content-type: application/dns-message' \
"https://localhost:$$DOH_PORT/dns-query" | xxd | head -5
test: ## Smoke-test plain DNS
@echo "Querying acrazy.org @ 127.0.0.1:$(DNS_PORT) (plain DNS)"
@dig @127.0.0.1 -p $(DNS_PORT) acrazy.org SOA +short
@dig @127.0.0.1 -p $(DNS_PORT) acrazy.org NS +short
@dig @127.0.0.1 -p $(DNS_PORT) or.acrazy.org A +short
clean: down ## Remove containers + prepared zones + certs
test-tls: ## Smoke-test DoT + DoH against LOCAL endpoints (trusts cert via system CAs)
@echo "=== DoT @ 127.0.0.1:$(DOT_PORT), expecting cert for $(CADDY_HOSTNAME) ==="
@dig @127.0.0.1 -p $(DOT_PORT) +tls +tls-hostname=$(CADDY_HOSTNAME) \
acrazy.org SOA +short
@echo ""
@echo "=== DoH @ https://$(CADDY_HOSTNAME):$(DOH_PORT)/dns-query ==="
@dig @$(CADDY_HOSTNAME) -p $(DOH_PORT) +https acrazy.org A +short
test-public: ## Smoke-test using the public hostname (DoT/DoH ports must be open + DNS A record set)
@echo "=== DoT on public hostname @ port 853 ==="
@dig @$(CADDY_HOSTNAME) +tls cloudflare.com A +short
@echo "=== DoH on public hostname @ port 443 ==="
@dig @$(CADDY_HOSTNAME) +https cloudflare.com A +short
clean: down ## Remove containers + prepared zones + dev self-signed certs
rm -rf zones-prepared/*.zone certs/*.pem
clean-caddy: down ## Also wipe Caddy's data dir (forces re-issuance from scratch!)
@echo "About to delete caddy-data/ — this will force re-issuance from LE."
@echo "Hit Ctrl-C in 5s to abort..."
@sleep 5
rm -rf caddy-data caddy-config

40
caddy/Caddyfile Normal file
View File

@ -0,0 +1,40 @@
# Caddy is used here purely as an ACME client + cert renewer for CoreDNS.
# The HTTPS site is technically served (Caddy can't issue without a site
# block), but we don't expose port 443 from this container only the
# cert files in /data/caddy/ are consumed by the CoreDNS sidecar.
{
# Operator contact for Let's Encrypt; also used for expiry warnings.
email {$ACME_EMAIL}
# Skip the HTTP-to-HTTPS redirect server (we have nothing to redirect).
# Caddy still binds :443 inside the container for the cert site, which
# is fine because we don't publish those ports to the host.
auto_https disable_redirects
}
{$CADDY_HOSTNAME} {
tls {
# DNS-01 challenge via Vultr API. The plugin reads the token from
# the named env var; setting via {env.VULTR_API_KEY} would also
# work but the bare reference is clearer with Caddy's modules.
dns vultr {env.VULTR_API_KEY}
# Use PUBLIC resolvers for the propagation check, not Docker's
# embedded DNS. Without this, Caddy follows the container's
# resolv.conf host's resolv.conf local LAN resolvers, which
# on a split-horizon DNS setup will return LAN IPs for vultr.com
# nameservers and the propagation check fails with connection
# refused. Hitting 1.1.1.1 / 9.9.9.9 directly sidesteps it.
resolvers 1.1.1.1 9.9.9.9 1.0.0.1
# Vultr's NS propagation is generally fast (<30s) but LE checks
# multiple resolvers; cushion the wait to avoid flaky issuance.
propagation_delay 30s
propagation_timeout 300s
}
# A sensible response if anyone hits this on 443. Doubles as a
# "Caddy is alive" sanity check inside the compose network.
respond "CoreDNS DoT/DoH endpoint. DoT: port 853. DoH: /dns-query" 200
}

14
caddy/Dockerfile Normal file
View File

@ -0,0 +1,14 @@
# Custom Caddy build that bundles the Vultr DNS provider plugin.
# Stock caddy:2 doesn't include DNS-provider modules — they're plugins.
# xcaddy compiles them in at build time.
FROM caddy:2.10.0-builder AS builder
# The Caddy builder image bakes in Go 1.23, but caddy-dns/vultr now
# requires Go >= 1.24. GOTOOLCHAIN=auto lets `go get` fetch a newer
# toolchain on demand so we don't have to bump base images every time
# a plugin's minimum Go version moves.
ENV GOTOOLCHAIN=auto
RUN xcaddy build \
--with github.com/caddy-dns/vultr
FROM caddy:2.10.0
COPY --from=builder /usr/bin/caddy /usr/bin/caddy

View File

@ -1,9 +1,49 @@
services:
# Caddy runs as a dedicated ACME client + cert renewer. It provisions
# a Let's Encrypt cert for ${CADDY_HOSTNAME} via DNS-01 (Vultr API)
# and persists it to ./caddy-data. CoreDNS reads from that same path
# read-only. The container's HTTP/HTTPS ports are NOT published — we
# only care about the cert files on disk.
caddy:
build: ./caddy
container_name: coredns-caddy
restart: unless-stopped
environment:
- CADDY_HOSTNAME=${CADDY_HOSTNAME}
- ACME_EMAIL=${ACME_EMAIL}
- VULTR_API_KEY=${VULTR_API_KEY:?VULTR_API_KEY must be exported in your shell}
volumes:
- ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
- ./caddy-data:/data
- ./caddy-config:/config
healthcheck:
# Two-jobs-in-one: (1) maintain stable filenames (cert.pem / key.pem)
# as symlinks into Caddy's hostname-keyed storage, so the Corefile
# doesn't have to encode the hostname. (2) Flip to "healthy" once
# the symlink dereferences successfully (i.e. Caddy has issued).
# Relative symlink targets so paths work the same from host or
# from any container mounting this directory.
test:
- "CMD-SHELL"
- >
ln -sf certificates/acme-v02.api.letsencrypt.org-directory/${CADDY_HOSTNAME}/${CADDY_HOSTNAME}.crt /data/caddy/cert.pem &&
ln -sf certificates/acme-v02.api.letsencrypt.org-directory/${CADDY_HOSTNAME}/${CADDY_HOSTNAME}.key /data/caddy/key.pem &&
chmod 755 /data/caddy &&
chmod -R a+rX /data/caddy/certificates 2>/dev/null;
test -e /data/caddy/cert.pem
interval: 10s
timeout: 3s
retries: 60 # ~10 min ceiling for initial issuance
start_period: 5s
coredns:
image: ${COREDNS_IMAGE}
container_name: coredns
restart: unless-stopped
command: ["-conf", "/etc/coredns/Corefile"]
depends_on:
caddy:
condition: service_healthy
ports:
- "${DNS_PORT}:53/udp"
- "${DNS_PORT}:53/tcp"
@ -14,7 +54,12 @@ services:
volumes:
- ./Corefile:/etc/coredns/Corefile:ro
- ./zones-prepared:/zones:ro
- ./certs:/etc/coredns/certs:ro
# Subpath mount of Caddy's data dir. The healthcheck maintains
# cert.pem / key.pem symlinks at the top of this tree, so CoreDNS
# sees stable filenames regardless of hostname. The /accounts dir
# (ACME registration private key) is sibling to /caddy and is NOT
# exposed to CoreDNS — only /caddy is mounted.
- ./caddy-data/caddy:/etc/coredns/certs:ro
healthcheck:
test: ["CMD", "wget", "-qO-", "http://127.0.0.1:8080/health"]
interval: 30s