From 89ea0246b6ab8d6ef5fd6c352a05cf86a1b3ab98 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Wed, 18 Feb 2026 10:26:00 -0700 Subject: [PATCH] Rewrite load_bench.sh to use pg-orrery-catalog with curl fallback Three-tier discovery: pg-orrery-catalog in PATH, sibling dev checkout, or original build_catalog.py + curl. Indexes use IF NOT EXISTS for idempotent re-runs. --- bench/load_bench.sh | 145 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100755 bench/load_bench.sh diff --git a/bench/load_bench.sh b/bench/load_bench.sh new file mode 100755 index 0000000..851d473 --- /dev/null +++ b/bench/load_bench.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Load pg_orrery benchmark catalog into PostgreSQL. +# +# Uses pg-orrery-catalog if available, falls back to pre-generated SQL. +# +# Usage: +# ./bench/load_bench.sh # Load from cached SQL or TLE files +# ./bench/load_bench.sh --rebuild # Re-merge from individual source files +# ./bench/load_bench.sh --download # Re-download sources + rebuild + load +# +# Environment: +# PGPORT PostgreSQL port (default: 5499) +# PGDATABASE Target database (default: contrib_regression) +# SOCKS_PROXY SOCKS5 proxy for CelesTrak (default: none) +# +set -euo pipefail + +BENCH_DIR="$(cd "$(dirname "$0")" && pwd)" +PGPORT="${PGPORT:-5499}" +PGDATABASE="${PGDATABASE:-contrib_regression}" +TABLE="bench_catalog" +REBUILD=false +DOWNLOAD=false + +for arg in "$@"; do + case "$arg" in + --rebuild) REBUILD=true ;; + --download) DOWNLOAD=true; REBUILD=true ;; + --help|-h) + head -14 "$0" | tail -13 | sed 's/^# \?//' + exit 0 ;; + esac +done + +# ── Check for pg-orrery-catalog ────────────────────────────── +HAS_CATALOG=false +if command -v pg-orrery-catalog &>/dev/null; then + HAS_CATALOG=true +elif [ -f "$BENCH_DIR/../pg-orrery-catalog/.venv/bin/pg-orrery-catalog" ]; then + # Sibling development checkout + export PATH="$BENCH_DIR/../pg-orrery-catalog/.venv/bin:$PATH" + HAS_CATALOG=true +fi + +# ── Download sources ───────────────────────────────────────── +if $DOWNLOAD; then + if $HAS_CATALOG; then + echo "==> Downloading TLE sources via pg-orrery-catalog..." + pg-orrery-catalog download --force + else + echo "==> pg-orrery-catalog not found, downloading via curl..." + + CURL_PROXY="" + [ -n "${SOCKS_PROXY:-}" ] && CURL_PROXY="--socks5-hostname $SOCKS_PROXY" + + # CelesTrak active (no auth needed) + CURL_CT="/usr/bin/curl -s $CURL_PROXY --connect-timeout 15 --max-time 120" + echo " CelesTrak active..." + $CURL_CT "https://celestrak.org/NORAD/elements/gp.php?GROUP=active&FORMAT=3le" \ + -o "$BENCH_DIR/celestrak_active.tle" 2>/dev/null || echo " FAILED" + + # CelesTrak supplemental GP + for group in starlink oneweb planet orbcomm; do + echo " CelesTrak SupGP ${group}..." + $CURL_CT "https://celestrak.org/NORAD/elements/supplemental/sup-gp.php?FILE=${group}&FORMAT=3le" \ + -o "$BENCH_DIR/supgp_${group}.tle" 2>/dev/null || true + done + + REBUILD=true + fi +fi + +# ── Build SQL ──────────────────────────────────────────────── +if $REBUILD; then + if $HAS_CATALOG; then + echo "==> Building catalog via pg-orrery-catalog..." + # Use cached downloads if available, fall back to bench/ TLE files + SOURCES=() + for f in "$BENCH_DIR"/*.tle; do + [ -f "$f" ] && SOURCES+=("$f") + done + if [ ${#SOURCES[@]} -gt 0 ]; then + pg-orrery-catalog build "${SOURCES[@]}" --table "$TABLE" \ + > "$BENCH_DIR/load_mega_catalog.sql" + else + pg-orrery-catalog build --table "$TABLE" \ + > "$BENCH_DIR/load_mega_catalog.sql" + fi + echo " Generated load_mega_catalog.sql" + else + echo "==> Building catalog via build_catalog.py..." + SOURCES=() + for f in spacetrack_everything.tle celestrak_active.tle satnogs_full.tle \ + supgp_starlink.tle supgp_oneweb.tle supgp_planet.tle supgp_orbcomm.tle; do + [ -f "$BENCH_DIR/$f" ] && SOURCES+=("$BENCH_DIR/$f") + done + + if [ ${#SOURCES[@]} -eq 0 ]; then + echo "ERROR: No source TLE files found in $BENCH_DIR" >&2 + exit 1 + fi + + python3 "$BENCH_DIR/build_catalog.py" "${SOURCES[@]}" \ + > "$BENCH_DIR/load_mega_catalog.sql" + echo " Generated load_mega_catalog.sql" + fi +fi + +# ── Load into PostgreSQL ───────────────────────────────────── +if [ ! -f "$BENCH_DIR/load_mega_catalog.sql" ]; then + echo "ERROR: $BENCH_DIR/load_mega_catalog.sql not found" >&2 + echo " Run with --rebuild or --download first" >&2 + exit 1 +fi + +echo "==> Loading catalog into $PGDATABASE (port $PGPORT)..." +PGPORT=$PGPORT psql -d "$PGDATABASE" -f "$BENCH_DIR/load_mega_catalog.sql" -q 2>&1 | tail -3 + +# ── Create indexes ─────────────────────────────────────────── +echo "==> Creating indexes..." +PGPORT=$PGPORT psql -d "$PGDATABASE" -q << 'SQL' +\timing on +CREATE INDEX IF NOT EXISTS bench_spgist_idx ON bench_catalog USING spgist (tle tle_spgist_ops); +CREATE INDEX IF NOT EXISTS bench_gist_idx ON bench_catalog USING gist (tle); +\timing off +SQL + +# ── Summary ────────────────────────────────────────────────── +PGPORT=$PGPORT psql -d "$PGDATABASE" -q << 'SQL' +SELECT count(*) || ' objects loaded' AS status FROM bench_catalog; +SELECT + CASE + WHEN tle_mean_motion(tle) > 11.25 THEN 'LEO' + WHEN tle_mean_motion(tle) > 1.8 THEN 'MEO' + WHEN tle_mean_motion(tle) > 0.9 THEN 'GEO' + ELSE 'HEO' + END AS regime, + count(*) AS count +FROM bench_catalog +GROUP BY 1 +ORDER BY 2 DESC; +SQL + +echo "==> Done. Run benchmarks with:" +echo " PGPORT=$PGPORT psql -d $PGDATABASE -f bench/benchmark.sql"