#!/usr/bin/env python3 """ Build a merged TLE catalog from multiple sources for pg_orrery benchmarks. Usage: # Merge existing TLE files into SQL ./build_catalog.py bench/spacetrack_everything.tle bench/celestrak_active.tle ... # Pipe to psql ./build_catalog.py bench/*.tle | PGPORT=5499 psql -d contrib_regression # Or generate SQL file ./build_catalog.py bench/*.tle > bench/load_catalog.sql Deduplication: when the same NORAD ID appears in multiple files, the entry with the newest epoch wins. This means CelesTrak SupGP data (fresher epochs) automatically overrides stale Space-Track entries. Alpha-5 NORAD IDs (T0002 etc.) are handled transparently — they parse into integers >100,000 via the same logic as Bill Gray's get_el.c. """ import sys import os import re from collections import OrderedDict # Alpha-5 NORAD decoding — mirrors get_norad_number() in src/sgp4/get_el.c _ALPHA5_SKIP = {'I', 'O'} # skipped in Alpha-5 encoding def decode_norad(s): """Decode a 5-character NORAD field to integer. Handles Alpha-5.""" s = s.strip() if not s: return None first = s[0] if first.isdigit(): try: return int(s) except ValueError: return None elif first.isalpha() and first.isupper(): # Alpha-5: letter + 4 digits val = ord(first) - ord('A') if first > 'I': val -= 1 if first > 'O': val -= 1 try: return val * 10000 + int(s[1:]) + 100000 except ValueError: return None return None def parse_3le_file(filepath): """Parse a 3LE (or 2LE) file into a dict of norad_str -> (line1, line2, name, epoch).""" objects = {} try: lines = open(filepath, errors='replace').readlines() except FileNotFoundError: print(f"# SKIP {filepath}: not found", file=sys.stderr) return objects i = 0 while i < len(lines): line = lines[i].rstrip('\r\n') if line.startswith('1 ') and i + 1 < len(lines) and lines[i + 1].rstrip('\r\n').startswith('2 '): line1 = line.rstrip('\r\n') line2 = lines[i + 1].rstrip('\r\n') # Look back for name line (3LE format) name = '' if i > 0: prev = lines[i - 1].rstrip('\r\n') if prev and not prev.startswith(('1 ', '2 ')): name = prev.strip() # Extract NORAD ID (works for both standard and Alpha-5) norad_field = line1[2:7] norad_int = decode_norad(norad_field) if norad_int is None: i += 2 continue norad_str = str(norad_int) # Extract epoch (column 18-32 of line 1) try: epoch = float(line1[18:32].strip()) except (ValueError, IndexError): epoch = 0.0 # Keep the entry with the newest epoch if norad_str not in objects or epoch > objects[norad_str][3]: objects[norad_str] = (line1, line2, name, epoch) i += 2 else: i += 1 return objects def main(): if len(sys.argv) < 2: print(__doc__, file=sys.stderr) sys.exit(1) # Parse --table-name option table_name = 'bench_catalog' files = [] i = 1 while i < len(sys.argv): if sys.argv[i] == '--table' and i + 1 < len(sys.argv): table_name = sys.argv[i + 1] i += 2 elif sys.argv[i].startswith('--table='): table_name = sys.argv[i].split('=', 1)[1] i += 1 else: files.append(sys.argv[i]) i += 1 # Merge all sources (later files override earlier for same NORAD ID if newer epoch) mega = {} for filepath in files: objs = parse_3le_file(filepath) new = updated = 0 for k, v in objs.items(): if k not in mega: new += 1 mega[k] = v elif v[3] > mega[k][3]: updated += 1 mega[k] = v basename = os.path.basename(filepath) print(f"-- {basename}: {len(objs)} objects ({new} new, {updated} updated)", file=sys.stderr) print(f"-- Total: {len(mega)} unique objects", file=sys.stderr) # Emit SQL print(f"-- pg_orrery benchmark catalog ({len(mega)} objects)") print(f"-- Generated from {len(files)} TLE source files") print(f"-- Sources: {', '.join(os.path.basename(f) for f in files)}") print() print(f"DROP TABLE IF EXISTS {table_name};") print(f"CREATE TABLE {table_name} (") print(f" id serial,") print(f" name text,") print(f" tle tle") print(f");") print() count = 0 for norad_str in sorted(mega.keys(), key=lambda x: int(x)): line1, line2, name, epoch = mega[norad_str] if not name: name = f'NORAD {norad_str}' name_sql = name.replace("'", "''").replace('\\', '\\\\') tle_str = f"{line1}\\n{line2}" print(f"INSERT INTO {table_name} (name, tle) VALUES ('{name_sql}', E'{tle_str}');") count += 1 print() print(f"-- Loaded {count} objects") if __name__ == '__main__': main()