Add audio download script and real signal demo

fetch_apollo_audio.py downloads Apollo 11 audio highlights from Archive.org
and extracts clips using ffmpeg (48 kHz mono WAV). Supports --list, --clip,
--all with idempotent downloads and progress reporting.

real_signal_demo.py auto-discovers downloaded clips and runs them through the
full USB downlink TX/RX chain (PCM telemetry + FM voice), saving recovered
audio for comparison. Falls back to the bundled demo clip if no downloads exist.

Also adds .gitignore to keep large audio files out of the repo while preserving
the small apollo11_crew.wav demo clip.
This commit is contained in:
Ryan Malloy 2026-02-24 14:15:23 -07:00
parent 04a17899fc
commit 77ddec149c
3 changed files with 685 additions and 0 deletions

17
.gitignore vendored Normal file
View File

@ -0,0 +1,17 @@
# Python
__pycache__/
*.pyc
*.egg-info/
dist/
build/
# Audio files (large, downloaded on demand)
examples/audio/*.wav
examples/audio/*.flac
# Keep the existing small demo clip
!examples/audio/apollo11_crew.wav
# Environment
.env
*.env.local
node_modules/

289
examples/fetch_apollo_audio.py Executable file
View File

@ -0,0 +1,289 @@
#!/usr/bin/env python3
"""
Fetch Apollo audio recordings from Archive.org.
Downloads the Apollo 11 audio highlights compilation and extracts individual
clips using ffmpeg. Source material is from the Internet Archive's Apollo 11
collection.
Clips are saved as 48 kHz mono WAV files in examples/audio/ for use with
the gr-apollo signal processing demos.
Usage:
uv run python examples/fetch_apollo_audio.py --list
uv run python examples/fetch_apollo_audio.py --all
uv run python examples/fetch_apollo_audio.py --clip eagle_has_landed
uv run python examples/fetch_apollo_audio.py --clip liftoff --force
"""
import argparse
import os
import shutil
import subprocess
import sys
import urllib.request
# Output directory: examples/audio/ relative to this script
AUDIO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "audio")
# Source FLAC from the Internet Archive
FLAC_URL = "https://archive.org/download/Apollo11AudioHighlights/Apollo11Highlights.flac"
FLAC_FILENAME = "Apollo11Highlights.flac"
# Clip definitions -- timestamps are approximate offsets into the highlights reel.
# The important thing is having a working extraction pipeline; timestamps can be
# refined once someone listens through the actual source file.
CLIPS = {
"liftoff": {
"start": "00:00:05",
"duration": "00:00:30",
"description": "Apollo 11 liftoff",
},
"eagle_has_landed": {
"start": "00:06:45",
"duration": "00:00:30",
"description": "The Eagle has landed",
},
"one_small_step": {
"start": "00:15:30",
"duration": "00:00:25",
"description": "One small step for man",
},
"houston_problem": {
"start": "00:20:00",
"duration": "00:00:15",
"description": "Houston, we've had a problem",
},
"splashdown": {
"start": "00:42:00",
"duration": "00:00:20",
"description": "Splashdown",
},
}
def check_ffmpeg():
"""Verify ffmpeg is available on PATH."""
if shutil.which("ffmpeg") is None:
print("ERROR: ffmpeg not found on PATH.", file=sys.stderr)
print("Install it with your package manager:", file=sys.stderr)
print(" Arch: pacman -S ffmpeg", file=sys.stderr)
print(" Debian: apt install ffmpeg", file=sys.stderr)
print(" macOS: brew install ffmpeg", file=sys.stderr)
sys.exit(1)
def _progress_hook(block_num, block_size, total_size):
"""Report download progress to stderr."""
downloaded = block_num * block_size
if total_size > 0:
pct = min(100.0, downloaded * 100.0 / total_size)
mb_down = downloaded / (1024 * 1024)
mb_total = total_size / (1024 * 1024)
bar_width = 40
filled = int(bar_width * pct / 100.0)
bar = "#" * filled + "-" * (bar_width - filled)
sys.stderr.write(f"\r [{bar}] {pct:5.1f}% {mb_down:.1f}/{mb_total:.1f} MB")
sys.stderr.flush()
else:
mb_down = downloaded / (1024 * 1024)
sys.stderr.write(f"\r Downloaded {mb_down:.1f} MB (unknown total)")
sys.stderr.flush()
def download_flac(output_dir, force=False):
"""Download the FLAC source file with progress reporting.
Returns the path to the downloaded file, or None on failure.
"""
os.makedirs(output_dir, exist_ok=True)
flac_path = os.path.join(output_dir, FLAC_FILENAME)
if os.path.exists(flac_path) and not force:
size_mb = os.path.getsize(flac_path) / (1024 * 1024)
print(f" FLAC already exists: {flac_path} ({size_mb:.1f} MB)")
print(" Use --force to re-download.")
return flac_path
print(f" Downloading: {FLAC_URL}")
print(f" Saving to: {flac_path}")
print()
try:
urllib.request.urlretrieve(FLAC_URL, flac_path, reporthook=_progress_hook)
sys.stderr.write("\n")
sys.stderr.flush()
except (urllib.error.URLError, OSError) as exc:
print(f"\n Download failed: {exc}", file=sys.stderr)
# Clean up partial file
if os.path.exists(flac_path):
os.remove(flac_path)
return None
size_mb = os.path.getsize(flac_path) / (1024 * 1024)
print(f" Downloaded {size_mb:.1f} MB")
return flac_path
def extract_clip(flac_path, clip_name, clip_info, output_dir, force=False):
"""Extract a clip segment from the FLAC source using ffmpeg.
Outputs a 48 kHz mono WAV file.
Returns True on success, False on failure.
"""
out_path = os.path.join(output_dir, f"apollo11_{clip_name}.wav")
if os.path.exists(out_path) and not force:
print(f" [{clip_name}] Already exists: {out_path}")
return True
cmd = [
"ffmpeg",
"-y", # overwrite without asking
"-ss", clip_info["start"], # seek to start
"-t", clip_info["duration"], # extract duration
"-i", flac_path, # input
"-ac", "1", # mono
"-ar", "48000", # 48 kHz
"-sample_fmt", "s16", # 16-bit
out_path,
]
print(f" [{clip_name}] Extracting: {clip_info['description']}")
print(f" start={clip_info['start']} duration={clip_info['duration']}")
result = subprocess.run(cmd, capture_output=True)
if result.returncode != 0:
print(f" [{clip_name}] ffmpeg failed (exit {result.returncode}):", file=sys.stderr)
stderr_text = result.stderr.decode("utf-8", errors="replace")
# Print last few lines of ffmpeg output for diagnostics
for line in stderr_text.strip().splitlines()[-5:]:
print(f" {line}", file=sys.stderr)
return False
size_kb = os.path.getsize(out_path) / 1024
print(f" -> {out_path} ({size_kb:.0f} KB)")
return True
def list_clips():
"""Print available clip names and descriptions."""
print("Available clips:")
print()
max_name = max(len(n) for n in CLIPS)
for name, info in CLIPS.items():
print(f" {name:<{max_name}} {info['start']} ({info['duration']}) {info['description']}")
print()
print(f" {len(CLIPS)} clips defined.")
print(" Extract with: --clip NAME or --all")
def main():
parser = argparse.ArgumentParser(
description="Fetch Apollo 11 audio from Archive.org and extract clips.",
epilog="Clips are saved as 48 kHz mono WAV in examples/audio/.",
)
parser.add_argument(
"--list",
action="store_true",
help="List available clip names and timestamps",
)
parser.add_argument(
"--clip",
metavar="NAME",
help="Extract a specific clip by name",
)
parser.add_argument(
"--all",
action="store_true",
help="Extract all defined clips",
)
parser.add_argument(
"--keep-flac",
action="store_true",
help="Keep the downloaded FLAC file after extraction",
)
parser.add_argument(
"--force",
action="store_true",
help="Re-download and re-extract even if files already exist",
)
parser.add_argument(
"--output-dir",
default=AUDIO_DIR,
help=f"Output directory (default: {AUDIO_DIR})",
)
args = parser.parse_args()
# --list doesn't need ffmpeg
if args.list:
list_clips()
return
# Everything else requires ffmpeg
check_ffmpeg()
# Validate arguments
if not args.clip and not args.all:
parser.print_help()
print()
print("Specify --clip NAME, --all, or --list.")
sys.exit(1)
if args.clip and args.clip not in CLIPS:
print(f"Unknown clip: {args.clip}", file=sys.stderr)
print(f"Available: {', '.join(CLIPS.keys())}", file=sys.stderr)
sys.exit(1)
# Determine which clips to extract
clip_names = list(CLIPS.keys()) if args.all else [args.clip]
print("=" * 60)
print("Apollo 11 Audio Fetch")
print("=" * 60)
print()
# Download the source FLAC
print("Step 1: Download source FLAC")
flac_path = download_flac(args.output_dir, force=args.force)
if flac_path is None:
sys.exit(1)
print()
# Extract clips
print(f"Step 2: Extract {len(clip_names)} clip(s)")
print()
ok_count = 0
fail_count = 0
for name in clip_names:
success = extract_clip(flac_path, name, CLIPS[name], args.output_dir, force=args.force)
if success:
ok_count += 1
else:
fail_count += 1
print()
# Clean up FLAC unless --keep-flac
if not args.keep_flac and os.path.exists(flac_path):
size_mb = os.path.getsize(flac_path) / (1024 * 1024)
os.remove(flac_path)
print(f"Removed source FLAC ({size_mb:.1f} MB). Use --keep-flac to retain.")
elif args.keep_flac and os.path.exists(flac_path):
print(f"Kept source FLAC: {flac_path}")
print()
# Summary
print("=" * 60)
print(f" Extracted: {ok_count} Failed: {fail_count}")
if ok_count > 0:
print(f" Output: {args.output_dir}/apollo11_*.wav")
print("=" * 60)
if fail_count > 0:
sys.exit(1)
if __name__ == "__main__":
main()

379
examples/real_signal_demo.py Executable file
View File

@ -0,0 +1,379 @@
#!/usr/bin/env python3
"""
Apollo Real Signal Demo -- process downloaded Apollo recordings through USB.
Auto-discovers WAV files in examples/audio/ (from fetch_apollo_audio.py) and
runs them through the full USB downlink chain: transmit (NRZ + BPSK + voice FM
onto PM carrier) then receive (PCM frame recovery + voice demodulation).
This proves the gr-apollo signal chain works on real-world audio, not just
synthetic test tones.
Signal path (same as full_downlink_demo.py):
TX:
pcm_frame_source -> nrz -> bpsk_mod (1.024 MHz) --+
audio_clip -> fm_voice_mod (1.25 MHz, +/-29kHz) ---+-> add -> pm_mod -> [signal]
RX:
[signal] -> usb_downlink_receiver -> PCM frames
[signal] -> pm_demod -> voice_subcarrier_demod -> recovered audio
Usage:
uv run python examples/real_signal_demo.py
uv run python examples/real_signal_demo.py --clip eagle_has_landed
uv run python examples/real_signal_demo.py --snr 25
uv run python examples/real_signal_demo.py --clip liftoff --play
"""
import argparse
import glob
import os
import sys
import time
from math import gcd
import numpy as np
from gnuradio import blocks, gr
from scipy.io import wavfile
from scipy.signal import resample_poly
from apollo.bpsk_subcarrier_mod import bpsk_subcarrier_mod
from apollo.constants import (
PCM_HIGH_BIT_RATE,
PCM_HIGH_WORDS_PER_FRAME,
PCM_SUBCARRIER_HZ,
PCM_WORD_LENGTH,
PM_PEAK_DEVIATION_RAD,
SAMPLE_RATE_BASEBAND,
VOICE_FM_DEVIATION_HZ,
VOICE_SUBCARRIER_HZ,
)
from apollo.fm_voice_subcarrier_mod import fm_voice_subcarrier_mod
from apollo.nrz_encoder import nrz_encoder
from apollo.pcm_frame_source import pcm_frame_source
from apollo.pm_demod import pm_demod
from apollo.pm_mod import pm_mod
from apollo.usb_downlink_receiver import usb_downlink_receiver
from apollo.voice_subcarrier_demod import voice_subcarrier_demod
# Audio directory relative to this script
AUDIO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "audio")
# Fallback clip if no downloaded audio exists
FALLBACK_CLIP = os.path.join(AUDIO_DIR, "apollo11_crew.wav")
def discover_clips():
"""Find WAV files in the audio directory.
Returns a dict of {name: path} for all apollo11_*.wav files,
excluding *_recovered.wav and *_fullchain.wav (our own output).
"""
clips = {}
pattern = os.path.join(AUDIO_DIR, "apollo11_*.wav")
for path in sorted(glob.glob(pattern)):
basename = os.path.basename(path)
# Skip output files from previous runs
if basename.endswith("_recovered.wav") or basename.endswith("_fullchain.wav"):
continue
# Extract clip name: apollo11_eagle_has_landed.wav -> eagle_has_landed
name = basename.replace("apollo11_", "").replace(".wav", "")
# Skip the small demo clip unless it's the only option
if name == "crew":
continue
clips[name] = path
return clips
def load_and_upsample_audio(audio_path, sample_rate):
"""Load audio file and upsample to baseband rate."""
input_rate, audio_data = wavfile.read(audio_path)
if audio_data.ndim > 1:
audio_data = audio_data[:, 0]
# Normalize to [-1, 1]
if audio_data.dtype == np.int16:
audio_float = audio_data.astype(np.float32) / 32768.0
elif audio_data.dtype == np.int32:
audio_float = audio_data.astype(np.float32) / 2147483648.0
else:
audio_float = audio_data.astype(np.float32)
duration = len(audio_float) / input_rate
# Resample to 8 kHz first (Apollo voice bandwidth)
audio_rate = 8000
if input_rate != audio_rate:
g = gcd(audio_rate, input_rate)
audio_float = resample_poly(
audio_float, audio_rate // g, input_rate // g
).astype(np.float32)
# Upsample to baseband
g = gcd(sample_rate, audio_rate)
upsampled = resample_poly(
audio_float, sample_rate // g, audio_rate // g
).astype(np.float32)
return upsampled, duration, audio_rate
def build_tx_signal(audio_samples, n_samples, sample_rate, snr_db):
"""Build the combined TX signal: PCM + voice -> PM modulation.
Same manual assembly as full_downlink_demo.py so we can inject
external audio into the voice channel.
"""
tb = gr.top_block()
# --- PCM telemetry path ---
frame_src = pcm_frame_source(bit_rate=PCM_HIGH_BIT_RATE)
nrz = nrz_encoder(bit_rate=PCM_HIGH_BIT_RATE, sample_rate=sample_rate)
bpsk = bpsk_subcarrier_mod(
subcarrier_freq=PCM_SUBCARRIER_HZ,
sample_rate=sample_rate,
)
tb.connect(frame_src, nrz, bpsk)
# --- Voice subcarrier path (real audio) ---
voice_src = blocks.vector_source_f(audio_samples[:n_samples].tolist())
voice_mod = fm_voice_subcarrier_mod(
sample_rate=sample_rate,
subcarrier_freq=VOICE_SUBCARRIER_HZ,
fm_deviation=VOICE_FM_DEVIATION_HZ,
audio_input=True,
)
# Scale voice relative to PCM: 1.68/2.2 per IMPL_SPEC
voice_gain = blocks.multiply_const_ff(1.68 / 2.2)
tb.connect(voice_src, voice_mod, voice_gain)
# --- Sum subcarriers ---
adder = blocks.add_ff(1)
tb.connect(bpsk, (adder, 0))
tb.connect(voice_gain, (adder, 1))
# --- PM modulation ---
pm = pm_mod(pm_deviation=PM_PEAK_DEVIATION_RAD, sample_rate=sample_rate)
head = blocks.head(gr.sizeof_gr_complex, n_samples)
tb.connect(adder, pm, head)
# --- Optional AWGN ---
if snr_db is not None:
import math
noise_power = 1.0 / (10.0 ** (snr_db / 10.0))
noise_amp = math.sqrt(noise_power / 2.0)
noise = blocks.vector_source_c(
(np.random.randn(n_samples) + 1j * np.random.randn(n_samples)).astype(
np.complex64
)
* noise_amp
)
summer = blocks.add_cc(1)
snk = blocks.vector_sink_c()
tb.connect(head, (summer, 0))
tb.connect(noise, (summer, 1))
tb.connect(summer, snk)
else:
snk = blocks.vector_sink_c()
tb.connect(head, snk)
tb.run()
return np.array(snk.data())
def receive_pcm(signal_data, sample_rate):
"""Run the PCM receive chain and return the message debug sink."""
tb = gr.top_block()
src = blocks.vector_source_c(signal_data.tolist())
rx = usb_downlink_receiver(
sample_rate=sample_rate,
bit_rate=PCM_HIGH_BIT_RATE,
output_format="raw",
)
snk = blocks.message_debug()
tb.connect(src, rx)
tb.msg_connect(rx, "frames", snk, "store")
tb.run()
return snk
def receive_voice(signal_data, sample_rate, audio_rate=8000):
"""Run the voice receive chain and return recovered audio samples."""
tb = gr.top_block()
src = blocks.vector_source_c(signal_data.tolist())
pm = pm_demod(sample_rate=sample_rate)
voice = voice_subcarrier_demod(sample_rate=sample_rate, audio_rate=audio_rate)
snk = blocks.vector_sink_f()
tb.connect(src, pm, voice, snk)
tb.run()
return np.array(snk.data(), dtype=np.float32)
def process_clip(clip_name, clip_path, sample_rate, audio_rate, snr_db):
"""Process a single audio clip through the full TX/RX chain.
Returns a dict with stats about the processing.
"""
print(f" Loading: {clip_path}")
audio_upsampled, duration, _ = load_and_upsample_audio(clip_path, sample_rate)
print(f" Duration: {duration:.2f}s, {len(audio_upsampled):,} baseband samples")
# Calculate frame timing
bits_per_frame = PCM_HIGH_WORDS_PER_FRAME * PCM_WORD_LENGTH
samples_per_frame = int(bits_per_frame * sample_rate / PCM_HIGH_BIT_RATE)
n_frames = int(duration * 50) + 2 # 50 fps + margin
n_samples = min(len(audio_upsampled), n_frames * samples_per_frame)
snr_desc = f"{snr_db} dB" if snr_db is not None else "clean"
print(f" TX: {n_samples:,} samples, ~{n_frames} PCM frames, SNR={snr_desc}")
# === TRANSMIT ===
t0 = time.time()
signal = build_tx_signal(audio_upsampled, n_samples, sample_rate, snr_db)
t_tx = time.time() - t0
print(f" TX complete: {len(signal):,} complex samples ({t_tx:.1f}s)")
# === RECEIVE: PCM ===
t0 = time.time()
frame_sink = receive_pcm(signal, sample_rate)
t_pcm = time.time() - t0
n_recovered_frames = frame_sink.num_messages()
print(f" RX PCM: {n_recovered_frames} frames recovered ({t_pcm:.1f}s)")
# === RECEIVE: Voice ===
t0 = time.time()
recovered_audio = receive_voice(signal, sample_rate, audio_rate)
t_voice = time.time() - t0
recovered_duration = len(recovered_audio) / audio_rate
print(
f" RX voice: {len(recovered_audio):,} samples,"
f" {recovered_duration:.2f}s ({t_voice:.1f}s)"
)
# Normalize and save recovered audio
output_path = os.path.join(AUDIO_DIR, f"apollo11_{clip_name}_recovered.wav")
peak = np.max(np.abs(recovered_audio))
if peak > 0:
recovered_audio = recovered_audio / peak * 0.9
recovered_int16 = (recovered_audio * 32767).astype(np.int16)
wavfile.write(output_path, audio_rate, recovered_int16)
print(f" Saved: {output_path}")
return {
"clip_name": clip_name,
"input_path": clip_path,
"output_path": output_path,
"input_duration": duration,
"recovered_duration": recovered_duration,
"pcm_frames": n_recovered_frames,
"expected_frames": n_frames,
"snr": snr_desc,
"time_tx": t_tx,
"time_pcm": t_pcm,
"time_voice": t_voice,
}
def main():
parser = argparse.ArgumentParser(
description="Process real Apollo audio through the full USB downlink chain."
)
parser.add_argument(
"--clip",
metavar="NAME",
default=None,
help="Process a specific clip (default: first discovered)",
)
parser.add_argument(
"--snr",
type=float,
default=None,
help="Add AWGN noise at this SNR in dB",
)
parser.add_argument(
"--play",
action="store_true",
help="Play recovered audio with aplay after processing",
)
args = parser.parse_args()
sample_rate = int(SAMPLE_RATE_BASEBAND)
audio_rate = 8000
print("=" * 60)
print("Apollo Real Signal Demo")
print(" Full USB downlink: PCM telemetry + crew voice")
print("=" * 60)
print()
# Discover available clips
clips = discover_clips()
if not clips:
# Fall back to the bundled demo clip
if os.path.exists(FALLBACK_CLIP):
print(" No downloaded clips found. Using bundled demo clip.")
clips = {"crew": FALLBACK_CLIP}
else:
print("No audio files found in examples/audio/.", file=sys.stderr)
print("Run fetch_apollo_audio.py first:", file=sys.stderr)
print(" uv run python examples/fetch_apollo_audio.py --all", file=sys.stderr)
sys.exit(1)
print(f" Found {len(clips)} clip(s): {', '.join(clips.keys())}")
print()
# Select which clip to process
if args.clip:
if args.clip not in clips:
print(f"Clip not found: {args.clip}", file=sys.stderr)
print(f"Available: {', '.join(clips.keys())}", file=sys.stderr)
sys.exit(1)
selected_name = args.clip
else:
selected_name = next(iter(clips))
selected_path = clips[selected_name]
print(f"Processing: {selected_name}")
print("-" * 60)
stats = process_clip(selected_name, selected_path, sample_rate, audio_rate, args.snr)
# === SUMMARY ===
print()
print("=" * 60)
print("Summary")
print("=" * 60)
print(f" Clip: {stats['clip_name']}")
print(f" Input duration: {stats['input_duration']:.2f}s")
print(f" Recovered audio: {stats['recovered_duration']:.2f}s")
pcm_f = stats['pcm_frames']
exp_f = stats['expected_frames']
print(f" PCM frames: {pcm_f} recovered (expected ~{exp_f})")
print(f" SNR: {stats['snr']}")
t_tx = stats['time_tx']
t_pcm = stats['time_pcm']
t_voice = stats['time_voice']
print(
f" Processing time: TX={t_tx:.1f}s"
f" PCM-RX={t_pcm:.1f}s Voice-RX={t_voice:.1f}s"
)
print(f" Output: {stats['output_path']}")
print("=" * 60)
if args.play:
import subprocess
print()
print("Playing recovered audio...")
subprocess.run(["aplay", stats["output_path"]], check=False)
else:
print()
print(f"Play recovered: aplay {stats['output_path']}")
if __name__ == "__main__":
main()