#!/bin/bash # Load pg_orrery benchmark catalog into PostgreSQL. # # Uses pg-orrery-catalog if available, falls back to pre-generated SQL. # # Usage: # ./bench/load_bench.sh # Load from cached SQL or TLE files # ./bench/load_bench.sh --rebuild # Re-merge from individual source files # ./bench/load_bench.sh --download # Re-download sources + rebuild + load # # Environment: # PGPORT PostgreSQL port (default: 5499) # PGDATABASE Target database (default: contrib_regression) # SOCKS_PROXY SOCKS5 proxy for CelesTrak (default: none) # set -euo pipefail BENCH_DIR="$(cd "$(dirname "$0")" && pwd)" PGPORT="${PGPORT:-5499}" PGDATABASE="${PGDATABASE:-contrib_regression}" TABLE="bench_catalog" REBUILD=false DOWNLOAD=false for arg in "$@"; do case "$arg" in --rebuild) REBUILD=true ;; --download) DOWNLOAD=true; REBUILD=true ;; --help|-h) head -14 "$0" | tail -13 | sed 's/^# \?//' exit 0 ;; esac done # ── Check for pg-orrery-catalog ────────────────────────────── HAS_CATALOG=false if command -v pg-orrery-catalog &>/dev/null; then HAS_CATALOG=true elif [ -f "$BENCH_DIR/../pg-orrery-catalog/.venv/bin/pg-orrery-catalog" ]; then # Sibling development checkout export PATH="$BENCH_DIR/../pg-orrery-catalog/.venv/bin:$PATH" HAS_CATALOG=true fi # ── Download sources ───────────────────────────────────────── if $DOWNLOAD; then if $HAS_CATALOG; then echo "==> Downloading TLE sources via pg-orrery-catalog..." pg-orrery-catalog download --force else echo "==> pg-orrery-catalog not found, downloading via curl..." CURL_PROXY="" [ -n "${SOCKS_PROXY:-}" ] && CURL_PROXY="--socks5-hostname $SOCKS_PROXY" # CelesTrak active (no auth needed) CURL_CT="/usr/bin/curl -s $CURL_PROXY --connect-timeout 15 --max-time 120" echo " CelesTrak active..." $CURL_CT "https://celestrak.org/NORAD/elements/gp.php?GROUP=active&FORMAT=3le" \ -o "$BENCH_DIR/celestrak_active.tle" 2>/dev/null || echo " FAILED" # CelesTrak supplemental GP for group in starlink oneweb planet orbcomm; do echo " CelesTrak SupGP ${group}..." $CURL_CT "https://celestrak.org/NORAD/elements/supplemental/sup-gp.php?FILE=${group}&FORMAT=3le" \ -o "$BENCH_DIR/supgp_${group}.tle" 2>/dev/null || true done REBUILD=true fi fi # ── Build SQL ──────────────────────────────────────────────── if $REBUILD; then if $HAS_CATALOG; then echo "==> Building catalog via pg-orrery-catalog..." # Use cached downloads if available, fall back to bench/ TLE files SOURCES=() for f in "$BENCH_DIR"/*.tle; do [ -f "$f" ] && SOURCES+=("$f") done if [ ${#SOURCES[@]} -gt 0 ]; then pg-orrery-catalog build "${SOURCES[@]}" --table "$TABLE" \ > "$BENCH_DIR/load_mega_catalog.sql" else pg-orrery-catalog build --table "$TABLE" \ > "$BENCH_DIR/load_mega_catalog.sql" fi echo " Generated load_mega_catalog.sql" else echo "==> Building catalog via build_catalog.py..." SOURCES=() for f in spacetrack_everything.tle celestrak_active.tle satnogs_full.tle \ supgp_starlink.tle supgp_oneweb.tle supgp_planet.tle supgp_orbcomm.tle; do [ -f "$BENCH_DIR/$f" ] && SOURCES+=("$BENCH_DIR/$f") done if [ ${#SOURCES[@]} -eq 0 ]; then echo "ERROR: No source TLE files found in $BENCH_DIR" >&2 exit 1 fi python3 "$BENCH_DIR/build_catalog.py" "${SOURCES[@]}" \ > "$BENCH_DIR/load_mega_catalog.sql" echo " Generated load_mega_catalog.sql" fi fi # ── Load into PostgreSQL ───────────────────────────────────── if [ ! -f "$BENCH_DIR/load_mega_catalog.sql" ]; then echo "ERROR: $BENCH_DIR/load_mega_catalog.sql not found" >&2 echo " Run with --rebuild or --download first" >&2 exit 1 fi echo "==> Loading catalog into $PGDATABASE (port $PGPORT)..." PGPORT=$PGPORT psql -d "$PGDATABASE" -f "$BENCH_DIR/load_mega_catalog.sql" -q 2>&1 | tail -3 # ── Create indexes ─────────────────────────────────────────── echo "==> Creating indexes..." PGPORT=$PGPORT psql -d "$PGDATABASE" -q << 'SQL' \timing on CREATE INDEX IF NOT EXISTS bench_spgist_idx ON bench_catalog USING spgist (tle tle_spgist_ops); CREATE INDEX IF NOT EXISTS bench_gist_idx ON bench_catalog USING gist (tle); \timing off SQL # ── Summary ────────────────────────────────────────────────── PGPORT=$PGPORT psql -d "$PGDATABASE" -q << 'SQL' SELECT count(*) || ' objects loaded' AS status FROM bench_catalog; SELECT CASE WHEN tle_mean_motion(tle) > 11.25 THEN 'LEO' WHEN tle_mean_motion(tle) > 1.8 THEN 'MEO' WHEN tle_mean_motion(tle) > 0.9 THEN 'GEO' ELSE 'HEO' END AS regime, count(*) AS count FROM bench_catalog GROUP BY 1 ORDER BY 2 DESC; SQL echo "==> Done. Run benchmarks with:" echo " PGPORT=$PGPORT psql -d $PGDATABASE -f bench/benchmark.sql"