244 lines
7.6 KiB
Rust
244 lines
7.6 KiB
Rust
//! Audio player — reads a WAV file and streams it as RTP packets.
|
|
//! Also provides prompt preparation for the leg interaction system.
|
|
|
|
use crate::rtp::{build_rtp_header, rtp_clock_increment};
|
|
use codec_lib::{codec_sample_rate, TranscodeState};
|
|
use std::net::SocketAddr;
|
|
use std::path::Path;
|
|
use std::sync::Arc;
|
|
use tokio::net::UdpSocket;
|
|
use tokio::time::{self, Duration};
|
|
|
|
/// Mixing sample rate used by the mixer (must stay in sync with mixer::MIX_RATE).
|
|
const MIX_RATE: u32 = 48000;
|
|
/// Samples per 20ms frame at the mixing rate.
|
|
const MIX_FRAME_SIZE: usize = 960;
|
|
|
|
/// Play a WAV file as RTP to a destination.
|
|
/// Returns when playback is complete.
|
|
pub async fn play_wav_file(
|
|
file_path: &str,
|
|
socket: Arc<UdpSocket>,
|
|
dest: SocketAddr,
|
|
codec_pt: u8,
|
|
ssrc: u32,
|
|
) -> Result<u32, String> {
|
|
let path = Path::new(file_path);
|
|
if !path.exists() {
|
|
return Err(format!("WAV file not found: {file_path}"));
|
|
}
|
|
|
|
// Read WAV file.
|
|
let mut reader =
|
|
hound::WavReader::open(path).map_err(|e| format!("open WAV {file_path}: {e}"))?;
|
|
let spec = reader.spec();
|
|
let wav_rate = spec.sample_rate;
|
|
|
|
// Read all samples as i16.
|
|
let samples: Vec<i16> = if spec.bits_per_sample == 16 {
|
|
reader.samples::<i16>().filter_map(|s| s.ok()).collect()
|
|
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
|
|
reader
|
|
.samples::<f32>()
|
|
.filter_map(|s| s.ok())
|
|
.map(|s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
|
.collect()
|
|
} else {
|
|
return Err(format!(
|
|
"unsupported WAV format: {}bit {:?}",
|
|
spec.bits_per_sample, spec.sample_format
|
|
));
|
|
};
|
|
|
|
if samples.is_empty() {
|
|
return Ok(0);
|
|
}
|
|
|
|
// Create codec state for encoding.
|
|
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
|
|
|
// Resample to target codec rate.
|
|
let target_rate = codec_sample_rate(codec_pt);
|
|
let resampled = if wav_rate != target_rate {
|
|
transcoder
|
|
.resample(&samples, wav_rate, target_rate)
|
|
.map_err(|e| format!("resample: {e}"))?
|
|
} else {
|
|
samples
|
|
};
|
|
|
|
// Calculate frame size (20ms of audio at target rate).
|
|
let frame_samples = (target_rate as usize) / 50; // 20ms = 1/50 second
|
|
|
|
// Stream as RTP at 20ms intervals.
|
|
let mut seq: u16 = 0;
|
|
let mut ts: u32 = 0;
|
|
let mut offset = 0;
|
|
let mut interval = time::interval(Duration::from_millis(20));
|
|
let mut frames_sent = 0u32;
|
|
|
|
while offset < resampled.len() {
|
|
interval.tick().await;
|
|
|
|
let end = (offset + frame_samples).min(resampled.len());
|
|
let frame = &resampled[offset..end];
|
|
|
|
// Pad short final frame with silence.
|
|
let frame_data = if frame.len() < frame_samples {
|
|
let mut padded = frame.to_vec();
|
|
padded.resize(frame_samples, 0);
|
|
padded
|
|
} else {
|
|
frame.to_vec()
|
|
};
|
|
|
|
// Encode to target codec.
|
|
let encoded = match transcoder.encode_from_pcm(&frame_data, codec_pt) {
|
|
Ok(e) if !e.is_empty() => e,
|
|
_ => {
|
|
offset += frame_samples;
|
|
continue;
|
|
}
|
|
};
|
|
|
|
// Build RTP packet.
|
|
let header = build_rtp_header(codec_pt, seq, ts, ssrc);
|
|
let mut packet = header.to_vec();
|
|
packet.extend_from_slice(&encoded);
|
|
|
|
let _ = socket.send_to(&packet, dest).await;
|
|
|
|
seq = seq.wrapping_add(1);
|
|
ts = ts.wrapping_add(rtp_clock_increment(codec_pt));
|
|
offset += frame_samples;
|
|
frames_sent += 1;
|
|
}
|
|
|
|
Ok(frames_sent)
|
|
}
|
|
|
|
/// Generate and play a beep tone (sine wave) as RTP.
|
|
pub async fn play_beep(
|
|
socket: Arc<UdpSocket>,
|
|
dest: SocketAddr,
|
|
codec_pt: u8,
|
|
ssrc: u32,
|
|
start_seq: u16,
|
|
start_ts: u32,
|
|
freq_hz: u32,
|
|
duration_ms: u32,
|
|
) -> Result<(u16, u32), String> {
|
|
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
|
let target_rate = codec_sample_rate(codec_pt);
|
|
let frame_samples = (target_rate as usize) / 50;
|
|
let total_samples = (target_rate as usize * duration_ms as usize) / 1000;
|
|
|
|
// Generate sine wave.
|
|
let amplitude = 16000i16;
|
|
let sine: Vec<i16> = (0..total_samples)
|
|
.map(|i| {
|
|
let t = i as f64 / target_rate as f64;
|
|
(amplitude as f64 * (2.0 * std::f64::consts::PI * freq_hz as f64 * t).sin()) as i16
|
|
})
|
|
.collect();
|
|
|
|
let mut seq = start_seq;
|
|
let mut ts = start_ts;
|
|
let mut offset = 0;
|
|
let mut interval = time::interval(Duration::from_millis(20));
|
|
|
|
while offset < sine.len() {
|
|
interval.tick().await;
|
|
|
|
let end = (offset + frame_samples).min(sine.len());
|
|
let mut frame = sine[offset..end].to_vec();
|
|
frame.resize(frame_samples, 0);
|
|
|
|
let encoded = match transcoder.encode_from_pcm(&frame, codec_pt) {
|
|
Ok(e) if !e.is_empty() => e,
|
|
_ => {
|
|
offset += frame_samples;
|
|
continue;
|
|
}
|
|
};
|
|
|
|
let header = build_rtp_header(codec_pt, seq, ts, ssrc);
|
|
let mut packet = header.to_vec();
|
|
packet.extend_from_slice(&encoded);
|
|
let _ = socket.send_to(&packet, dest).await;
|
|
|
|
seq = seq.wrapping_add(1);
|
|
ts = ts.wrapping_add(rtp_clock_increment(codec_pt));
|
|
offset += frame_samples;
|
|
}
|
|
|
|
Ok((seq, ts))
|
|
}
|
|
|
|
/// Load a WAV file and split it into 20ms f32 PCM frames at 48kHz.
|
|
/// Used by the leg interaction system to prepare prompt audio for the mixer.
|
|
pub fn load_prompt_pcm_frames(wav_path: &str) -> Result<Vec<Vec<f32>>, String> {
|
|
let path = Path::new(wav_path);
|
|
if !path.exists() {
|
|
return Err(format!("WAV file not found: {wav_path}"));
|
|
}
|
|
|
|
let mut reader =
|
|
hound::WavReader::open(path).map_err(|e| format!("open WAV {wav_path}: {e}"))?;
|
|
let spec = reader.spec();
|
|
let wav_rate = spec.sample_rate;
|
|
|
|
// Read all samples as f32 in [-1.0, 1.0].
|
|
let samples: Vec<f32> = if spec.bits_per_sample == 16 {
|
|
reader
|
|
.samples::<i16>()
|
|
.filter_map(|s| s.ok())
|
|
.map(|s| s as f32 / 32768.0)
|
|
.collect()
|
|
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
|
|
reader.samples::<f32>().filter_map(|s| s.ok()).collect()
|
|
} else {
|
|
return Err(format!(
|
|
"unsupported WAV format: {}bit {:?}",
|
|
spec.bits_per_sample, spec.sample_format
|
|
));
|
|
};
|
|
|
|
if samples.is_empty() {
|
|
return Ok(vec![]);
|
|
}
|
|
|
|
pcm_to_mix_frames(&samples, wav_rate)
|
|
}
|
|
|
|
/// Convert PCM samples at an arbitrary rate into 48kHz 20ms mixer frames.
|
|
pub fn pcm_to_mix_frames(samples: &[f32], sample_rate: u32) -> Result<Vec<Vec<f32>>, String> {
|
|
if samples.is_empty() {
|
|
return Ok(vec![]);
|
|
}
|
|
|
|
// Resample to MIX_RATE (48kHz) if needed.
|
|
let resampled = if sample_rate != MIX_RATE {
|
|
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
|
transcoder
|
|
.resample_f32(samples, sample_rate, MIX_RATE)
|
|
.map_err(|e| format!("resample: {e}"))?
|
|
} else {
|
|
samples.to_vec()
|
|
};
|
|
|
|
// Split into MIX_FRAME_SIZE (960) sample frames.
|
|
let mut frames = Vec::new();
|
|
let mut offset = 0;
|
|
while offset < resampled.len() {
|
|
let end = (offset + MIX_FRAME_SIZE).min(resampled.len());
|
|
let mut frame = resampled[offset..end].to_vec();
|
|
// Pad short final frame with silence.
|
|
frame.resize(MIX_FRAME_SIZE, 0.0);
|
|
frames.push(frame);
|
|
offset += MIX_FRAME_SIZE;
|
|
}
|
|
|
|
Ok(frames)
|
|
}
|