Add audio download script and real signal demo

fetch_apollo_audio.py downloads Apollo 11 audio highlights from Archive.org and extracts clips using ffmpeg (48 kHz mono WAV). Supports --list, --clip, --all with idempotent downloads and progress reporting. real_signal_demo.py auto-discovers downloaded clips and runs them through the full USB downlink TX/RX chain (PCM telemetry + FM voice), saving recovered audio for comparison. Falls back to the bundled demo clip if no downloads exist. Also adds .gitignore to keep large audio files out of the repo while preserving the small apollo11_crew.wav demo clip.
2026-02-24 14:15:23 -07:00 · 2026-02-24 14:15:23 -07:00 · 77ddec149c
commit 77ddec149c
parent 04a17899fc
3 changed files with 685 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,17 @@
 # Python
 __pycache__/
 *.pyc
 *.egg-info/
 dist/
 build/
 # Audio files (large, downloaded on demand)
 examples/audio/*.wav
 examples/audio/*.flac
 # Keep the existing small demo clip
 !examples/audio/apollo11_crew.wav
 # Environment
 .env
 *.env.local
 node_modules/
--- a/examples/fetch_apollo_audio.py
+++ b/examples/fetch_apollo_audio.py
@ -0,0 +1,289 @@
 #!/usr/bin/env python3
 """
 Fetch Apollo audio recordings from Archive.org.
 Downloads the Apollo 11 audio highlights compilation and extracts individual
 clips using ffmpeg. Source material is from the Internet Archive's Apollo 11
 collection.
 Clips are saved as 48 kHz mono WAV files in examples/audio/ for use with
 the gr-apollo signal processing demos.
 Usage:
    uv run python examples/fetch_apollo_audio.py --list
    uv run python examples/fetch_apollo_audio.py --all
    uv run python examples/fetch_apollo_audio.py --clip eagle_has_landed
    uv run python examples/fetch_apollo_audio.py --clip liftoff --force
 """
 import argparse
 import os
 import shutil
 import subprocess
 import sys
 import urllib.request
 # Output directory: examples/audio/ relative to this script
 AUDIO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "audio")
 # Source FLAC from the Internet Archive
 FLAC_URL = "https://archive.org/download/Apollo11AudioHighlights/Apollo11Highlights.flac"
 FLAC_FILENAME = "Apollo11Highlights.flac"
 # Clip definitions -- timestamps are approximate offsets into the highlights reel.
 # The important thing is having a working extraction pipeline; timestamps can be
 # refined once someone listens through the actual source file.
 CLIPS = {
    "liftoff": {
        "start": "00:00:05",
        "duration": "00:00:30",
        "description": "Apollo 11 liftoff",
    },
    "eagle_has_landed": {
        "start": "00:06:45",
        "duration": "00:00:30",
        "description": "The Eagle has landed",
    },
    "one_small_step": {
        "start": "00:15:30",
        "duration": "00:00:25",
        "description": "One small step for man",
    },
    "houston_problem": {
        "start": "00:20:00",
        "duration": "00:00:15",
        "description": "Houston, we've had a problem",
    },
    "splashdown": {
        "start": "00:42:00",
        "duration": "00:00:20",
        "description": "Splashdown",
    },
 }
 def check_ffmpeg():
    """Verify ffmpeg is available on PATH."""
    if shutil.which("ffmpeg") is None:
        print("ERROR: ffmpeg not found on PATH.", file=sys.stderr)
        print("Install it with your package manager:", file=sys.stderr)
        print("  Arch:   pacman -S ffmpeg", file=sys.stderr)
        print("  Debian: apt install ffmpeg", file=sys.stderr)
        print("  macOS:  brew install ffmpeg", file=sys.stderr)
        sys.exit(1)
 def _progress_hook(block_num, block_size, total_size):
    """Report download progress to stderr."""
    downloaded = block_num * block_size
    if total_size > 0:
        pct = min(100.0, downloaded * 100.0 / total_size)
        mb_down = downloaded / (1024 * 1024)
        mb_total = total_size / (1024 * 1024)
        bar_width = 40
        filled = int(bar_width * pct / 100.0)
        bar = "#" * filled + "-" * (bar_width - filled)
        sys.stderr.write(f"\r  [{bar}] {pct:5.1f}%  {mb_down:.1f}/{mb_total:.1f} MB")
        sys.stderr.flush()
    else:
        mb_down = downloaded / (1024 * 1024)
        sys.stderr.write(f"\r  Downloaded {mb_down:.1f} MB (unknown total)")
        sys.stderr.flush()
 def download_flac(output_dir, force=False):
    """Download the FLAC source file with progress reporting.
    Returns the path to the downloaded file, or None on failure.
    """
    os.makedirs(output_dir, exist_ok=True)
    flac_path = os.path.join(output_dir, FLAC_FILENAME)
    if os.path.exists(flac_path) and not force:
        size_mb = os.path.getsize(flac_path) / (1024 * 1024)
        print(f"  FLAC already exists: {flac_path} ({size_mb:.1f} MB)")
        print("  Use --force to re-download.")
        return flac_path
    print(f"  Downloading: {FLAC_URL}")
    print(f"  Saving to:   {flac_path}")
    print()
    try:
        urllib.request.urlretrieve(FLAC_URL, flac_path, reporthook=_progress_hook)
        sys.stderr.write("\n")
        sys.stderr.flush()
    except (urllib.error.URLError, OSError) as exc:
        print(f"\n  Download failed: {exc}", file=sys.stderr)
        # Clean up partial file
        if os.path.exists(flac_path):
            os.remove(flac_path)
        return None
    size_mb = os.path.getsize(flac_path) / (1024 * 1024)
    print(f"  Downloaded {size_mb:.1f} MB")
    return flac_path
 def extract_clip(flac_path, clip_name, clip_info, output_dir, force=False):
    """Extract a clip segment from the FLAC source using ffmpeg.
    Outputs a 48 kHz mono WAV file.
    Returns True on success, False on failure.
    """
    out_path = os.path.join(output_dir, f"apollo11_{clip_name}.wav")
    if os.path.exists(out_path) and not force:
        print(f"  [{clip_name}] Already exists: {out_path}")
        return True
    cmd = [
        "ffmpeg",
        "-y",                        # overwrite without asking
        "-ss", clip_info["start"],   # seek to start
        "-t", clip_info["duration"], # extract duration
        "-i", flac_path,             # input
        "-ac", "1",                  # mono
        "-ar", "48000",              # 48 kHz
        "-sample_fmt", "s16",        # 16-bit
        out_path,
    ]
    print(f"  [{clip_name}] Extracting: {clip_info['description']}")
    print(f"    start={clip_info['start']}  duration={clip_info['duration']}")
    result = subprocess.run(cmd, capture_output=True)
    if result.returncode != 0:
        print(f"  [{clip_name}] ffmpeg failed (exit {result.returncode}):", file=sys.stderr)
        stderr_text = result.stderr.decode("utf-8", errors="replace")
        # Print last few lines of ffmpeg output for diagnostics
        for line in stderr_text.strip().splitlines()[-5:]:
            print(f"    {line}", file=sys.stderr)
        return False
    size_kb = os.path.getsize(out_path) / 1024
    print(f"    -> {out_path} ({size_kb:.0f} KB)")
    return True
 def list_clips():
    """Print available clip names and descriptions."""
    print("Available clips:")
    print()
    max_name = max(len(n) for n in CLIPS)
    for name, info in CLIPS.items():
        print(f"  {name:<{max_name}}  {info['start']} ({info['duration']})  {info['description']}")
    print()
    print(f"  {len(CLIPS)} clips defined.")
    print("  Extract with: --clip NAME  or  --all")
 def main():
    parser = argparse.ArgumentParser(
        description="Fetch Apollo 11 audio from Archive.org and extract clips.",
        epilog="Clips are saved as 48 kHz mono WAV in examples/audio/.",
    )
    parser.add_argument(
        "--list",
        action="store_true",
        help="List available clip names and timestamps",
    )
    parser.add_argument(
        "--clip",
        metavar="NAME",
        help="Extract a specific clip by name",
    )
    parser.add_argument(
        "--all",
        action="store_true",
        help="Extract all defined clips",
    )
    parser.add_argument(
        "--keep-flac",
        action="store_true",
        help="Keep the downloaded FLAC file after extraction",
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help="Re-download and re-extract even if files already exist",
    )
    parser.add_argument(
        "--output-dir",
        default=AUDIO_DIR,
        help=f"Output directory (default: {AUDIO_DIR})",
    )
    args = parser.parse_args()
    # --list doesn't need ffmpeg
    if args.list:
        list_clips()
        return
    # Everything else requires ffmpeg
    check_ffmpeg()
    # Validate arguments
    if not args.clip and not args.all:
        parser.print_help()
        print()
        print("Specify --clip NAME, --all, or --list.")
        sys.exit(1)
    if args.clip and args.clip not in CLIPS:
        print(f"Unknown clip: {args.clip}", file=sys.stderr)
        print(f"Available: {', '.join(CLIPS.keys())}", file=sys.stderr)
        sys.exit(1)
    # Determine which clips to extract
    clip_names = list(CLIPS.keys()) if args.all else [args.clip]
    print("=" * 60)
    print("Apollo 11 Audio Fetch")
    print("=" * 60)
    print()
    # Download the source FLAC
    print("Step 1: Download source FLAC")
    flac_path = download_flac(args.output_dir, force=args.force)
    if flac_path is None:
        sys.exit(1)
    print()
    # Extract clips
    print(f"Step 2: Extract {len(clip_names)} clip(s)")
    print()
    ok_count = 0
    fail_count = 0
    for name in clip_names:
        success = extract_clip(flac_path, name, CLIPS[name], args.output_dir, force=args.force)
        if success:
            ok_count += 1
        else:
            fail_count += 1
    print()
    # Clean up FLAC unless --keep-flac
    if not args.keep_flac and os.path.exists(flac_path):
        size_mb = os.path.getsize(flac_path) / (1024 * 1024)
        os.remove(flac_path)
        print(f"Removed source FLAC ({size_mb:.1f} MB). Use --keep-flac to retain.")
    elif args.keep_flac and os.path.exists(flac_path):
        print(f"Kept source FLAC: {flac_path}")
    print()
    # Summary
    print("=" * 60)
    print(f"  Extracted: {ok_count}  Failed: {fail_count}")
    if ok_count > 0:
        print(f"  Output:    {args.output_dir}/apollo11_*.wav")
    print("=" * 60)
    if fail_count > 0:
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/examples/real_signal_demo.py
+++ b/examples/real_signal_demo.py
@ -0,0 +1,379 @@
 #!/usr/bin/env python3
 """
 Apollo Real Signal Demo -- process downloaded Apollo recordings through USB.
 Auto-discovers WAV files in examples/audio/ (from fetch_apollo_audio.py) and
 runs them through the full USB downlink chain: transmit (NRZ + BPSK + voice FM
 onto PM carrier) then receive (PCM frame recovery + voice demodulation).
 This proves the gr-apollo signal chain works on real-world audio, not just
 synthetic test tones.
 Signal path (same as full_downlink_demo.py):
    TX:
      pcm_frame_source -> nrz -> bpsk_mod (1.024 MHz) --+
      audio_clip -> fm_voice_mod (1.25 MHz, +/-29kHz) ---+-> add -> pm_mod -> [signal]
    RX:
      [signal] -> usb_downlink_receiver -> PCM frames
      [signal] -> pm_demod -> voice_subcarrier_demod -> recovered audio
 Usage:
    uv run python examples/real_signal_demo.py
    uv run python examples/real_signal_demo.py --clip eagle_has_landed
    uv run python examples/real_signal_demo.py --snr 25
    uv run python examples/real_signal_demo.py --clip liftoff --play
 """
 import argparse
 import glob
 import os
 import sys
 import time
 from math import gcd
 import numpy as np
 from gnuradio import blocks, gr
 from scipy.io import wavfile
 from scipy.signal import resample_poly
 from apollo.bpsk_subcarrier_mod import bpsk_subcarrier_mod
 from apollo.constants import (
    PCM_HIGH_BIT_RATE,
    PCM_HIGH_WORDS_PER_FRAME,
    PCM_SUBCARRIER_HZ,
    PCM_WORD_LENGTH,
    PM_PEAK_DEVIATION_RAD,
    SAMPLE_RATE_BASEBAND,
    VOICE_FM_DEVIATION_HZ,
    VOICE_SUBCARRIER_HZ,
 )
 from apollo.fm_voice_subcarrier_mod import fm_voice_subcarrier_mod
 from apollo.nrz_encoder import nrz_encoder
 from apollo.pcm_frame_source import pcm_frame_source
 from apollo.pm_demod import pm_demod
 from apollo.pm_mod import pm_mod
 from apollo.usb_downlink_receiver import usb_downlink_receiver
 from apollo.voice_subcarrier_demod import voice_subcarrier_demod
 # Audio directory relative to this script
 AUDIO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "audio")
 # Fallback clip if no downloaded audio exists
 FALLBACK_CLIP = os.path.join(AUDIO_DIR, "apollo11_crew.wav")
 def discover_clips():
    """Find WAV files in the audio directory.
    Returns a dict of {name: path} for all apollo11_*.wav files,
    excluding *_recovered.wav and *_fullchain.wav (our own output).
    """
    clips = {}
    pattern = os.path.join(AUDIO_DIR, "apollo11_*.wav")
    for path in sorted(glob.glob(pattern)):
        basename = os.path.basename(path)
        # Skip output files from previous runs
        if basename.endswith("_recovered.wav") or basename.endswith("_fullchain.wav"):
            continue
        # Extract clip name: apollo11_eagle_has_landed.wav -> eagle_has_landed
        name = basename.replace("apollo11_", "").replace(".wav", "")
        # Skip the small demo clip unless it's the only option
        if name == "crew":
            continue
        clips[name] = path
    return clips
 def load_and_upsample_audio(audio_path, sample_rate):
    """Load audio file and upsample to baseband rate."""
    input_rate, audio_data = wavfile.read(audio_path)
    if audio_data.ndim > 1:
        audio_data = audio_data[:, 0]
    # Normalize to [-1, 1]
    if audio_data.dtype == np.int16:
        audio_float = audio_data.astype(np.float32) / 32768.0
    elif audio_data.dtype == np.int32:
        audio_float = audio_data.astype(np.float32) / 2147483648.0
    else:
        audio_float = audio_data.astype(np.float32)
    duration = len(audio_float) / input_rate
    # Resample to 8 kHz first (Apollo voice bandwidth)
    audio_rate = 8000
    if input_rate != audio_rate:
        g = gcd(audio_rate, input_rate)
        audio_float = resample_poly(
            audio_float, audio_rate // g, input_rate // g
        ).astype(np.float32)
    # Upsample to baseband
    g = gcd(sample_rate, audio_rate)
    upsampled = resample_poly(
        audio_float, sample_rate // g, audio_rate // g
    ).astype(np.float32)
    return upsampled, duration, audio_rate
 def build_tx_signal(audio_samples, n_samples, sample_rate, snr_db):
    """Build the combined TX signal: PCM + voice -> PM modulation.
    Same manual assembly as full_downlink_demo.py so we can inject
    external audio into the voice channel.
    """
    tb = gr.top_block()
    # --- PCM telemetry path ---
    frame_src = pcm_frame_source(bit_rate=PCM_HIGH_BIT_RATE)
    nrz = nrz_encoder(bit_rate=PCM_HIGH_BIT_RATE, sample_rate=sample_rate)
    bpsk = bpsk_subcarrier_mod(
        subcarrier_freq=PCM_SUBCARRIER_HZ,
        sample_rate=sample_rate,
    )
    tb.connect(frame_src, nrz, bpsk)
    # --- Voice subcarrier path (real audio) ---
    voice_src = blocks.vector_source_f(audio_samples[:n_samples].tolist())
    voice_mod = fm_voice_subcarrier_mod(
        sample_rate=sample_rate,
        subcarrier_freq=VOICE_SUBCARRIER_HZ,
        fm_deviation=VOICE_FM_DEVIATION_HZ,
        audio_input=True,
    )
    # Scale voice relative to PCM: 1.68/2.2 per IMPL_SPEC
    voice_gain = blocks.multiply_const_ff(1.68 / 2.2)
    tb.connect(voice_src, voice_mod, voice_gain)
    # --- Sum subcarriers ---
    adder = blocks.add_ff(1)
    tb.connect(bpsk, (adder, 0))
    tb.connect(voice_gain, (adder, 1))
    # --- PM modulation ---
    pm = pm_mod(pm_deviation=PM_PEAK_DEVIATION_RAD, sample_rate=sample_rate)
    head = blocks.head(gr.sizeof_gr_complex, n_samples)
    tb.connect(adder, pm, head)
    # --- Optional AWGN ---
    if snr_db is not None:
        import math
        noise_power = 1.0 / (10.0 ** (snr_db / 10.0))
        noise_amp = math.sqrt(noise_power / 2.0)
        noise = blocks.vector_source_c(
            (np.random.randn(n_samples) + 1j * np.random.randn(n_samples)).astype(
                np.complex64
            )
            * noise_amp
        )
        summer = blocks.add_cc(1)
        snk = blocks.vector_sink_c()
        tb.connect(head, (summer, 0))
        tb.connect(noise, (summer, 1))
        tb.connect(summer, snk)
    else:
        snk = blocks.vector_sink_c()
        tb.connect(head, snk)
    tb.run()
    return np.array(snk.data())
 def receive_pcm(signal_data, sample_rate):
    """Run the PCM receive chain and return the message debug sink."""
    tb = gr.top_block()
    src = blocks.vector_source_c(signal_data.tolist())
    rx = usb_downlink_receiver(
        sample_rate=sample_rate,
        bit_rate=PCM_HIGH_BIT_RATE,
        output_format="raw",
    )
    snk = blocks.message_debug()
    tb.connect(src, rx)
    tb.msg_connect(rx, "frames", snk, "store")
    tb.run()
    return snk
 def receive_voice(signal_data, sample_rate, audio_rate=8000):
    """Run the voice receive chain and return recovered audio samples."""
    tb = gr.top_block()
    src = blocks.vector_source_c(signal_data.tolist())
    pm = pm_demod(sample_rate=sample_rate)
    voice = voice_subcarrier_demod(sample_rate=sample_rate, audio_rate=audio_rate)
    snk = blocks.vector_sink_f()
    tb.connect(src, pm, voice, snk)
    tb.run()
    return np.array(snk.data(), dtype=np.float32)
 def process_clip(clip_name, clip_path, sample_rate, audio_rate, snr_db):
    """Process a single audio clip through the full TX/RX chain.
    Returns a dict with stats about the processing.
    """
    print(f"  Loading: {clip_path}")
    audio_upsampled, duration, _ = load_and_upsample_audio(clip_path, sample_rate)
    print(f"  Duration: {duration:.2f}s, {len(audio_upsampled):,} baseband samples")
    # Calculate frame timing
    bits_per_frame = PCM_HIGH_WORDS_PER_FRAME * PCM_WORD_LENGTH
    samples_per_frame = int(bits_per_frame * sample_rate / PCM_HIGH_BIT_RATE)
    n_frames = int(duration * 50) + 2  # 50 fps + margin
    n_samples = min(len(audio_upsampled), n_frames * samples_per_frame)
    snr_desc = f"{snr_db} dB" if snr_db is not None else "clean"
    print(f"  TX: {n_samples:,} samples, ~{n_frames} PCM frames, SNR={snr_desc}")
    # === TRANSMIT ===
    t0 = time.time()
    signal = build_tx_signal(audio_upsampled, n_samples, sample_rate, snr_db)
    t_tx = time.time() - t0
    print(f"  TX complete: {len(signal):,} complex samples ({t_tx:.1f}s)")
    # === RECEIVE: PCM ===
    t0 = time.time()
    frame_sink = receive_pcm(signal, sample_rate)
    t_pcm = time.time() - t0
    n_recovered_frames = frame_sink.num_messages()
    print(f"  RX PCM: {n_recovered_frames} frames recovered ({t_pcm:.1f}s)")
    # === RECEIVE: Voice ===
    t0 = time.time()
    recovered_audio = receive_voice(signal, sample_rate, audio_rate)
    t_voice = time.time() - t0
    recovered_duration = len(recovered_audio) / audio_rate
    print(
        f"  RX voice: {len(recovered_audio):,} samples,"
        f" {recovered_duration:.2f}s ({t_voice:.1f}s)"
    )
    # Normalize and save recovered audio
    output_path = os.path.join(AUDIO_DIR, f"apollo11_{clip_name}_recovered.wav")
    peak = np.max(np.abs(recovered_audio))
    if peak > 0:
        recovered_audio = recovered_audio / peak * 0.9
    recovered_int16 = (recovered_audio * 32767).astype(np.int16)
    wavfile.write(output_path, audio_rate, recovered_int16)
    print(f"  Saved: {output_path}")
    return {
        "clip_name": clip_name,
        "input_path": clip_path,
        "output_path": output_path,
        "input_duration": duration,
        "recovered_duration": recovered_duration,
        "pcm_frames": n_recovered_frames,
        "expected_frames": n_frames,
        "snr": snr_desc,
        "time_tx": t_tx,
        "time_pcm": t_pcm,
        "time_voice": t_voice,
    }
 def main():
    parser = argparse.ArgumentParser(
        description="Process real Apollo audio through the full USB downlink chain."
    )
    parser.add_argument(
        "--clip",
        metavar="NAME",
        default=None,
        help="Process a specific clip (default: first discovered)",
    )
    parser.add_argument(
        "--snr",
        type=float,
        default=None,
        help="Add AWGN noise at this SNR in dB",
    )
    parser.add_argument(
        "--play",
        action="store_true",
        help="Play recovered audio with aplay after processing",
    )
    args = parser.parse_args()
    sample_rate = int(SAMPLE_RATE_BASEBAND)
    audio_rate = 8000
    print("=" * 60)
    print("Apollo Real Signal Demo")
    print("  Full USB downlink: PCM telemetry + crew voice")
    print("=" * 60)
    print()
    # Discover available clips
    clips = discover_clips()
    if not clips:
        # Fall back to the bundled demo clip
        if os.path.exists(FALLBACK_CLIP):
            print("  No downloaded clips found. Using bundled demo clip.")
            clips = {"crew": FALLBACK_CLIP}
        else:
            print("No audio files found in examples/audio/.", file=sys.stderr)
            print("Run fetch_apollo_audio.py first:", file=sys.stderr)
            print("  uv run python examples/fetch_apollo_audio.py --all", file=sys.stderr)
            sys.exit(1)
    print(f"  Found {len(clips)} clip(s): {', '.join(clips.keys())}")
    print()
    # Select which clip to process
    if args.clip:
        if args.clip not in clips:
            print(f"Clip not found: {args.clip}", file=sys.stderr)
            print(f"Available: {', '.join(clips.keys())}", file=sys.stderr)
            sys.exit(1)
        selected_name = args.clip
    else:
        selected_name = next(iter(clips))
    selected_path = clips[selected_name]
    print(f"Processing: {selected_name}")
    print("-" * 60)
    stats = process_clip(selected_name, selected_path, sample_rate, audio_rate, args.snr)
    # === SUMMARY ===
    print()
    print("=" * 60)
    print("Summary")
    print("=" * 60)
    print(f"  Clip:             {stats['clip_name']}")
    print(f"  Input duration:   {stats['input_duration']:.2f}s")
    print(f"  Recovered audio:  {stats['recovered_duration']:.2f}s")
    pcm_f = stats['pcm_frames']
    exp_f = stats['expected_frames']
    print(f"  PCM frames:       {pcm_f} recovered (expected ~{exp_f})")
    print(f"  SNR:              {stats['snr']}")
    t_tx = stats['time_tx']
    t_pcm = stats['time_pcm']
    t_voice = stats['time_voice']
    print(
        f"  Processing time:  TX={t_tx:.1f}s"
        f"  PCM-RX={t_pcm:.1f}s  Voice-RX={t_voice:.1f}s"
    )
    print(f"  Output:           {stats['output_path']}")
    print("=" * 60)
    if args.play:
        import subprocess
        print()
        print("Playing recovered audio...")
        subprocess.run(["aplay", stats["output_path"]], check=False)
    else:
        print()
        print(f"Play recovered: aplay {stats['output_path']}")
 if __name__ == "__main__":
    main()