From 77ddec149ce7faa0cecb390dc1493bea6f280b04 Mon Sep 17 00:00:00 2001 From: Ryan Malloy Date: Tue, 24 Feb 2026 14:15:23 -0700 Subject: [PATCH] Add audio download script and real signal demo fetch_apollo_audio.py downloads Apollo 11 audio highlights from Archive.org and extracts clips using ffmpeg (48 kHz mono WAV). Supports --list, --clip, --all with idempotent downloads and progress reporting. real_signal_demo.py auto-discovers downloaded clips and runs them through the full USB downlink TX/RX chain (PCM telemetry + FM voice), saving recovered audio for comparison. Falls back to the bundled demo clip if no downloads exist. Also adds .gitignore to keep large audio files out of the repo while preserving the small apollo11_crew.wav demo clip. --- .gitignore | 17 ++ examples/fetch_apollo_audio.py | 289 +++++++++++++++++++++++++ examples/real_signal_demo.py | 379 +++++++++++++++++++++++++++++++++ 3 files changed, 685 insertions(+) create mode 100644 .gitignore create mode 100755 examples/fetch_apollo_audio.py create mode 100755 examples/real_signal_demo.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c7201a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +# Python +__pycache__/ +*.pyc +*.egg-info/ +dist/ +build/ + +# Audio files (large, downloaded on demand) +examples/audio/*.wav +examples/audio/*.flac +# Keep the existing small demo clip +!examples/audio/apollo11_crew.wav + +# Environment +.env +*.env.local +node_modules/ diff --git a/examples/fetch_apollo_audio.py b/examples/fetch_apollo_audio.py new file mode 100755 index 0000000..67cc837 --- /dev/null +++ b/examples/fetch_apollo_audio.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python3 +""" +Fetch Apollo audio recordings from Archive.org. + +Downloads the Apollo 11 audio highlights compilation and extracts individual +clips using ffmpeg. Source material is from the Internet Archive's Apollo 11 +collection. + +Clips are saved as 48 kHz mono WAV files in examples/audio/ for use with +the gr-apollo signal processing demos. + +Usage: + uv run python examples/fetch_apollo_audio.py --list + uv run python examples/fetch_apollo_audio.py --all + uv run python examples/fetch_apollo_audio.py --clip eagle_has_landed + uv run python examples/fetch_apollo_audio.py --clip liftoff --force +""" + +import argparse +import os +import shutil +import subprocess +import sys +import urllib.request + +# Output directory: examples/audio/ relative to this script +AUDIO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "audio") + +# Source FLAC from the Internet Archive +FLAC_URL = "https://archive.org/download/Apollo11AudioHighlights/Apollo11Highlights.flac" +FLAC_FILENAME = "Apollo11Highlights.flac" + +# Clip definitions -- timestamps are approximate offsets into the highlights reel. +# The important thing is having a working extraction pipeline; timestamps can be +# refined once someone listens through the actual source file. +CLIPS = { + "liftoff": { + "start": "00:00:05", + "duration": "00:00:30", + "description": "Apollo 11 liftoff", + }, + "eagle_has_landed": { + "start": "00:06:45", + "duration": "00:00:30", + "description": "The Eagle has landed", + }, + "one_small_step": { + "start": "00:15:30", + "duration": "00:00:25", + "description": "One small step for man", + }, + "houston_problem": { + "start": "00:20:00", + "duration": "00:00:15", + "description": "Houston, we've had a problem", + }, + "splashdown": { + "start": "00:42:00", + "duration": "00:00:20", + "description": "Splashdown", + }, +} + + +def check_ffmpeg(): + """Verify ffmpeg is available on PATH.""" + if shutil.which("ffmpeg") is None: + print("ERROR: ffmpeg not found on PATH.", file=sys.stderr) + print("Install it with your package manager:", file=sys.stderr) + print(" Arch: pacman -S ffmpeg", file=sys.stderr) + print(" Debian: apt install ffmpeg", file=sys.stderr) + print(" macOS: brew install ffmpeg", file=sys.stderr) + sys.exit(1) + + +def _progress_hook(block_num, block_size, total_size): + """Report download progress to stderr.""" + downloaded = block_num * block_size + if total_size > 0: + pct = min(100.0, downloaded * 100.0 / total_size) + mb_down = downloaded / (1024 * 1024) + mb_total = total_size / (1024 * 1024) + bar_width = 40 + filled = int(bar_width * pct / 100.0) + bar = "#" * filled + "-" * (bar_width - filled) + sys.stderr.write(f"\r [{bar}] {pct:5.1f}% {mb_down:.1f}/{mb_total:.1f} MB") + sys.stderr.flush() + else: + mb_down = downloaded / (1024 * 1024) + sys.stderr.write(f"\r Downloaded {mb_down:.1f} MB (unknown total)") + sys.stderr.flush() + + +def download_flac(output_dir, force=False): + """Download the FLAC source file with progress reporting. + + Returns the path to the downloaded file, or None on failure. + """ + os.makedirs(output_dir, exist_ok=True) + flac_path = os.path.join(output_dir, FLAC_FILENAME) + + if os.path.exists(flac_path) and not force: + size_mb = os.path.getsize(flac_path) / (1024 * 1024) + print(f" FLAC already exists: {flac_path} ({size_mb:.1f} MB)") + print(" Use --force to re-download.") + return flac_path + + print(f" Downloading: {FLAC_URL}") + print(f" Saving to: {flac_path}") + print() + + try: + urllib.request.urlretrieve(FLAC_URL, flac_path, reporthook=_progress_hook) + sys.stderr.write("\n") + sys.stderr.flush() + except (urllib.error.URLError, OSError) as exc: + print(f"\n Download failed: {exc}", file=sys.stderr) + # Clean up partial file + if os.path.exists(flac_path): + os.remove(flac_path) + return None + + size_mb = os.path.getsize(flac_path) / (1024 * 1024) + print(f" Downloaded {size_mb:.1f} MB") + return flac_path + + +def extract_clip(flac_path, clip_name, clip_info, output_dir, force=False): + """Extract a clip segment from the FLAC source using ffmpeg. + + Outputs a 48 kHz mono WAV file. + Returns True on success, False on failure. + """ + out_path = os.path.join(output_dir, f"apollo11_{clip_name}.wav") + + if os.path.exists(out_path) and not force: + print(f" [{clip_name}] Already exists: {out_path}") + return True + + cmd = [ + "ffmpeg", + "-y", # overwrite without asking + "-ss", clip_info["start"], # seek to start + "-t", clip_info["duration"], # extract duration + "-i", flac_path, # input + "-ac", "1", # mono + "-ar", "48000", # 48 kHz + "-sample_fmt", "s16", # 16-bit + out_path, + ] + + print(f" [{clip_name}] Extracting: {clip_info['description']}") + print(f" start={clip_info['start']} duration={clip_info['duration']}") + + result = subprocess.run(cmd, capture_output=True) + + if result.returncode != 0: + print(f" [{clip_name}] ffmpeg failed (exit {result.returncode}):", file=sys.stderr) + stderr_text = result.stderr.decode("utf-8", errors="replace") + # Print last few lines of ffmpeg output for diagnostics + for line in stderr_text.strip().splitlines()[-5:]: + print(f" {line}", file=sys.stderr) + return False + + size_kb = os.path.getsize(out_path) / 1024 + print(f" -> {out_path} ({size_kb:.0f} KB)") + return True + + +def list_clips(): + """Print available clip names and descriptions.""" + print("Available clips:") + print() + max_name = max(len(n) for n in CLIPS) + for name, info in CLIPS.items(): + print(f" {name:<{max_name}} {info['start']} ({info['duration']}) {info['description']}") + print() + print(f" {len(CLIPS)} clips defined.") + print(" Extract with: --clip NAME or --all") + + +def main(): + parser = argparse.ArgumentParser( + description="Fetch Apollo 11 audio from Archive.org and extract clips.", + epilog="Clips are saved as 48 kHz mono WAV in examples/audio/.", + ) + parser.add_argument( + "--list", + action="store_true", + help="List available clip names and timestamps", + ) + parser.add_argument( + "--clip", + metavar="NAME", + help="Extract a specific clip by name", + ) + parser.add_argument( + "--all", + action="store_true", + help="Extract all defined clips", + ) + parser.add_argument( + "--keep-flac", + action="store_true", + help="Keep the downloaded FLAC file after extraction", + ) + parser.add_argument( + "--force", + action="store_true", + help="Re-download and re-extract even if files already exist", + ) + parser.add_argument( + "--output-dir", + default=AUDIO_DIR, + help=f"Output directory (default: {AUDIO_DIR})", + ) + args = parser.parse_args() + + # --list doesn't need ffmpeg + if args.list: + list_clips() + return + + # Everything else requires ffmpeg + check_ffmpeg() + + # Validate arguments + if not args.clip and not args.all: + parser.print_help() + print() + print("Specify --clip NAME, --all, or --list.") + sys.exit(1) + + if args.clip and args.clip not in CLIPS: + print(f"Unknown clip: {args.clip}", file=sys.stderr) + print(f"Available: {', '.join(CLIPS.keys())}", file=sys.stderr) + sys.exit(1) + + # Determine which clips to extract + clip_names = list(CLIPS.keys()) if args.all else [args.clip] + + print("=" * 60) + print("Apollo 11 Audio Fetch") + print("=" * 60) + print() + + # Download the source FLAC + print("Step 1: Download source FLAC") + flac_path = download_flac(args.output_dir, force=args.force) + if flac_path is None: + sys.exit(1) + print() + + # Extract clips + print(f"Step 2: Extract {len(clip_names)} clip(s)") + print() + + ok_count = 0 + fail_count = 0 + for name in clip_names: + success = extract_clip(flac_path, name, CLIPS[name], args.output_dir, force=args.force) + if success: + ok_count += 1 + else: + fail_count += 1 + print() + + # Clean up FLAC unless --keep-flac + if not args.keep_flac and os.path.exists(flac_path): + size_mb = os.path.getsize(flac_path) / (1024 * 1024) + os.remove(flac_path) + print(f"Removed source FLAC ({size_mb:.1f} MB). Use --keep-flac to retain.") + elif args.keep_flac and os.path.exists(flac_path): + print(f"Kept source FLAC: {flac_path}") + print() + + # Summary + print("=" * 60) + print(f" Extracted: {ok_count} Failed: {fail_count}") + if ok_count > 0: + print(f" Output: {args.output_dir}/apollo11_*.wav") + print("=" * 60) + + if fail_count > 0: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/examples/real_signal_demo.py b/examples/real_signal_demo.py new file mode 100755 index 0000000..f9a2e32 --- /dev/null +++ b/examples/real_signal_demo.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python3 +""" +Apollo Real Signal Demo -- process downloaded Apollo recordings through USB. + +Auto-discovers WAV files in examples/audio/ (from fetch_apollo_audio.py) and +runs them through the full USB downlink chain: transmit (NRZ + BPSK + voice FM +onto PM carrier) then receive (PCM frame recovery + voice demodulation). + +This proves the gr-apollo signal chain works on real-world audio, not just +synthetic test tones. + +Signal path (same as full_downlink_demo.py): + TX: + pcm_frame_source -> nrz -> bpsk_mod (1.024 MHz) --+ + audio_clip -> fm_voice_mod (1.25 MHz, +/-29kHz) ---+-> add -> pm_mod -> [signal] + + RX: + [signal] -> usb_downlink_receiver -> PCM frames + [signal] -> pm_demod -> voice_subcarrier_demod -> recovered audio + +Usage: + uv run python examples/real_signal_demo.py + uv run python examples/real_signal_demo.py --clip eagle_has_landed + uv run python examples/real_signal_demo.py --snr 25 + uv run python examples/real_signal_demo.py --clip liftoff --play +""" + +import argparse +import glob +import os +import sys +import time +from math import gcd + +import numpy as np +from gnuradio import blocks, gr +from scipy.io import wavfile +from scipy.signal import resample_poly + +from apollo.bpsk_subcarrier_mod import bpsk_subcarrier_mod +from apollo.constants import ( + PCM_HIGH_BIT_RATE, + PCM_HIGH_WORDS_PER_FRAME, + PCM_SUBCARRIER_HZ, + PCM_WORD_LENGTH, + PM_PEAK_DEVIATION_RAD, + SAMPLE_RATE_BASEBAND, + VOICE_FM_DEVIATION_HZ, + VOICE_SUBCARRIER_HZ, +) +from apollo.fm_voice_subcarrier_mod import fm_voice_subcarrier_mod +from apollo.nrz_encoder import nrz_encoder +from apollo.pcm_frame_source import pcm_frame_source +from apollo.pm_demod import pm_demod +from apollo.pm_mod import pm_mod +from apollo.usb_downlink_receiver import usb_downlink_receiver +from apollo.voice_subcarrier_demod import voice_subcarrier_demod + +# Audio directory relative to this script +AUDIO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "audio") + +# Fallback clip if no downloaded audio exists +FALLBACK_CLIP = os.path.join(AUDIO_DIR, "apollo11_crew.wav") + + +def discover_clips(): + """Find WAV files in the audio directory. + + Returns a dict of {name: path} for all apollo11_*.wav files, + excluding *_recovered.wav and *_fullchain.wav (our own output). + """ + clips = {} + pattern = os.path.join(AUDIO_DIR, "apollo11_*.wav") + for path in sorted(glob.glob(pattern)): + basename = os.path.basename(path) + # Skip output files from previous runs + if basename.endswith("_recovered.wav") or basename.endswith("_fullchain.wav"): + continue + # Extract clip name: apollo11_eagle_has_landed.wav -> eagle_has_landed + name = basename.replace("apollo11_", "").replace(".wav", "") + # Skip the small demo clip unless it's the only option + if name == "crew": + continue + clips[name] = path + + return clips + + +def load_and_upsample_audio(audio_path, sample_rate): + """Load audio file and upsample to baseband rate.""" + input_rate, audio_data = wavfile.read(audio_path) + if audio_data.ndim > 1: + audio_data = audio_data[:, 0] + + # Normalize to [-1, 1] + if audio_data.dtype == np.int16: + audio_float = audio_data.astype(np.float32) / 32768.0 + elif audio_data.dtype == np.int32: + audio_float = audio_data.astype(np.float32) / 2147483648.0 + else: + audio_float = audio_data.astype(np.float32) + + duration = len(audio_float) / input_rate + + # Resample to 8 kHz first (Apollo voice bandwidth) + audio_rate = 8000 + if input_rate != audio_rate: + g = gcd(audio_rate, input_rate) + audio_float = resample_poly( + audio_float, audio_rate // g, input_rate // g + ).astype(np.float32) + + # Upsample to baseband + g = gcd(sample_rate, audio_rate) + upsampled = resample_poly( + audio_float, sample_rate // g, audio_rate // g + ).astype(np.float32) + + return upsampled, duration, audio_rate + + +def build_tx_signal(audio_samples, n_samples, sample_rate, snr_db): + """Build the combined TX signal: PCM + voice -> PM modulation. + + Same manual assembly as full_downlink_demo.py so we can inject + external audio into the voice channel. + """ + tb = gr.top_block() + + # --- PCM telemetry path --- + frame_src = pcm_frame_source(bit_rate=PCM_HIGH_BIT_RATE) + nrz = nrz_encoder(bit_rate=PCM_HIGH_BIT_RATE, sample_rate=sample_rate) + bpsk = bpsk_subcarrier_mod( + subcarrier_freq=PCM_SUBCARRIER_HZ, + sample_rate=sample_rate, + ) + tb.connect(frame_src, nrz, bpsk) + + # --- Voice subcarrier path (real audio) --- + voice_src = blocks.vector_source_f(audio_samples[:n_samples].tolist()) + voice_mod = fm_voice_subcarrier_mod( + sample_rate=sample_rate, + subcarrier_freq=VOICE_SUBCARRIER_HZ, + fm_deviation=VOICE_FM_DEVIATION_HZ, + audio_input=True, + ) + # Scale voice relative to PCM: 1.68/2.2 per IMPL_SPEC + voice_gain = blocks.multiply_const_ff(1.68 / 2.2) + tb.connect(voice_src, voice_mod, voice_gain) + + # --- Sum subcarriers --- + adder = blocks.add_ff(1) + tb.connect(bpsk, (adder, 0)) + tb.connect(voice_gain, (adder, 1)) + + # --- PM modulation --- + pm = pm_mod(pm_deviation=PM_PEAK_DEVIATION_RAD, sample_rate=sample_rate) + head = blocks.head(gr.sizeof_gr_complex, n_samples) + tb.connect(adder, pm, head) + + # --- Optional AWGN --- + if snr_db is not None: + import math + + noise_power = 1.0 / (10.0 ** (snr_db / 10.0)) + noise_amp = math.sqrt(noise_power / 2.0) + noise = blocks.vector_source_c( + (np.random.randn(n_samples) + 1j * np.random.randn(n_samples)).astype( + np.complex64 + ) + * noise_amp + ) + summer = blocks.add_cc(1) + snk = blocks.vector_sink_c() + tb.connect(head, (summer, 0)) + tb.connect(noise, (summer, 1)) + tb.connect(summer, snk) + else: + snk = blocks.vector_sink_c() + tb.connect(head, snk) + + tb.run() + return np.array(snk.data()) + + +def receive_pcm(signal_data, sample_rate): + """Run the PCM receive chain and return the message debug sink.""" + tb = gr.top_block() + src = blocks.vector_source_c(signal_data.tolist()) + rx = usb_downlink_receiver( + sample_rate=sample_rate, + bit_rate=PCM_HIGH_BIT_RATE, + output_format="raw", + ) + snk = blocks.message_debug() + tb.connect(src, rx) + tb.msg_connect(rx, "frames", snk, "store") + tb.run() + return snk + + +def receive_voice(signal_data, sample_rate, audio_rate=8000): + """Run the voice receive chain and return recovered audio samples.""" + tb = gr.top_block() + src = blocks.vector_source_c(signal_data.tolist()) + pm = pm_demod(sample_rate=sample_rate) + voice = voice_subcarrier_demod(sample_rate=sample_rate, audio_rate=audio_rate) + snk = blocks.vector_sink_f() + tb.connect(src, pm, voice, snk) + tb.run() + return np.array(snk.data(), dtype=np.float32) + + +def process_clip(clip_name, clip_path, sample_rate, audio_rate, snr_db): + """Process a single audio clip through the full TX/RX chain. + + Returns a dict with stats about the processing. + """ + print(f" Loading: {clip_path}") + audio_upsampled, duration, _ = load_and_upsample_audio(clip_path, sample_rate) + print(f" Duration: {duration:.2f}s, {len(audio_upsampled):,} baseband samples") + + # Calculate frame timing + bits_per_frame = PCM_HIGH_WORDS_PER_FRAME * PCM_WORD_LENGTH + samples_per_frame = int(bits_per_frame * sample_rate / PCM_HIGH_BIT_RATE) + n_frames = int(duration * 50) + 2 # 50 fps + margin + n_samples = min(len(audio_upsampled), n_frames * samples_per_frame) + + snr_desc = f"{snr_db} dB" if snr_db is not None else "clean" + print(f" TX: {n_samples:,} samples, ~{n_frames} PCM frames, SNR={snr_desc}") + + # === TRANSMIT === + t0 = time.time() + signal = build_tx_signal(audio_upsampled, n_samples, sample_rate, snr_db) + t_tx = time.time() - t0 + print(f" TX complete: {len(signal):,} complex samples ({t_tx:.1f}s)") + + # === RECEIVE: PCM === + t0 = time.time() + frame_sink = receive_pcm(signal, sample_rate) + t_pcm = time.time() - t0 + n_recovered_frames = frame_sink.num_messages() + print(f" RX PCM: {n_recovered_frames} frames recovered ({t_pcm:.1f}s)") + + # === RECEIVE: Voice === + t0 = time.time() + recovered_audio = receive_voice(signal, sample_rate, audio_rate) + t_voice = time.time() - t0 + recovered_duration = len(recovered_audio) / audio_rate + print( + f" RX voice: {len(recovered_audio):,} samples," + f" {recovered_duration:.2f}s ({t_voice:.1f}s)" + ) + + # Normalize and save recovered audio + output_path = os.path.join(AUDIO_DIR, f"apollo11_{clip_name}_recovered.wav") + peak = np.max(np.abs(recovered_audio)) + if peak > 0: + recovered_audio = recovered_audio / peak * 0.9 + recovered_int16 = (recovered_audio * 32767).astype(np.int16) + wavfile.write(output_path, audio_rate, recovered_int16) + print(f" Saved: {output_path}") + + return { + "clip_name": clip_name, + "input_path": clip_path, + "output_path": output_path, + "input_duration": duration, + "recovered_duration": recovered_duration, + "pcm_frames": n_recovered_frames, + "expected_frames": n_frames, + "snr": snr_desc, + "time_tx": t_tx, + "time_pcm": t_pcm, + "time_voice": t_voice, + } + + +def main(): + parser = argparse.ArgumentParser( + description="Process real Apollo audio through the full USB downlink chain." + ) + parser.add_argument( + "--clip", + metavar="NAME", + default=None, + help="Process a specific clip (default: first discovered)", + ) + parser.add_argument( + "--snr", + type=float, + default=None, + help="Add AWGN noise at this SNR in dB", + ) + parser.add_argument( + "--play", + action="store_true", + help="Play recovered audio with aplay after processing", + ) + args = parser.parse_args() + + sample_rate = int(SAMPLE_RATE_BASEBAND) + audio_rate = 8000 + + print("=" * 60) + print("Apollo Real Signal Demo") + print(" Full USB downlink: PCM telemetry + crew voice") + print("=" * 60) + print() + + # Discover available clips + clips = discover_clips() + + if not clips: + # Fall back to the bundled demo clip + if os.path.exists(FALLBACK_CLIP): + print(" No downloaded clips found. Using bundled demo clip.") + clips = {"crew": FALLBACK_CLIP} + else: + print("No audio files found in examples/audio/.", file=sys.stderr) + print("Run fetch_apollo_audio.py first:", file=sys.stderr) + print(" uv run python examples/fetch_apollo_audio.py --all", file=sys.stderr) + sys.exit(1) + + print(f" Found {len(clips)} clip(s): {', '.join(clips.keys())}") + print() + + # Select which clip to process + if args.clip: + if args.clip not in clips: + print(f"Clip not found: {args.clip}", file=sys.stderr) + print(f"Available: {', '.join(clips.keys())}", file=sys.stderr) + sys.exit(1) + selected_name = args.clip + else: + selected_name = next(iter(clips)) + + selected_path = clips[selected_name] + + print(f"Processing: {selected_name}") + print("-" * 60) + + stats = process_clip(selected_name, selected_path, sample_rate, audio_rate, args.snr) + + # === SUMMARY === + print() + print("=" * 60) + print("Summary") + print("=" * 60) + print(f" Clip: {stats['clip_name']}") + print(f" Input duration: {stats['input_duration']:.2f}s") + print(f" Recovered audio: {stats['recovered_duration']:.2f}s") + pcm_f = stats['pcm_frames'] + exp_f = stats['expected_frames'] + print(f" PCM frames: {pcm_f} recovered (expected ~{exp_f})") + print(f" SNR: {stats['snr']}") + t_tx = stats['time_tx'] + t_pcm = stats['time_pcm'] + t_voice = stats['time_voice'] + print( + f" Processing time: TX={t_tx:.1f}s" + f" PCM-RX={t_pcm:.1f}s Voice-RX={t_voice:.1f}s" + ) + print(f" Output: {stats['output_path']}") + print("=" * 60) + + if args.play: + import subprocess + + print() + print("Playing recovered audio...") + subprocess.run(["aplay", stats["output_path"]], check=False) + else: + print() + print(f"Play recovered: aplay {stats['output_path']}") + + +if __name__ == "__main__": + main()