Files
siprouter/rust/crates/proxy-engine/src/audio_player.rs

244 lines
7.6 KiB
Rust

//! Audio player — reads a WAV file and streams it as RTP packets.
//! Also provides prompt preparation for the leg interaction system.
use crate::rtp::{build_rtp_header, rtp_clock_increment};
use codec_lib::{codec_sample_rate, TranscodeState};
use std::net::SocketAddr;
use std::path::Path;
use std::sync::Arc;
use tokio::net::UdpSocket;
use tokio::time::{self, Duration};
/// Mixing sample rate used by the mixer (must stay in sync with mixer::MIX_RATE).
const MIX_RATE: u32 = 48000;
/// Samples per 20ms frame at the mixing rate.
const MIX_FRAME_SIZE: usize = 960;
/// Play a WAV file as RTP to a destination.
/// Returns when playback is complete.
pub async fn play_wav_file(
file_path: &str,
socket: Arc<UdpSocket>,
dest: SocketAddr,
codec_pt: u8,
ssrc: u32,
) -> Result<u32, String> {
let path = Path::new(file_path);
if !path.exists() {
return Err(format!("WAV file not found: {file_path}"));
}
// Read WAV file.
let mut reader =
hound::WavReader::open(path).map_err(|e| format!("open WAV {file_path}: {e}"))?;
let spec = reader.spec();
let wav_rate = spec.sample_rate;
// Read all samples as i16.
let samples: Vec<i16> = if spec.bits_per_sample == 16 {
reader.samples::<i16>().filter_map(|s| s.ok()).collect()
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
reader
.samples::<f32>()
.filter_map(|s| s.ok())
.map(|s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
.collect()
} else {
return Err(format!(
"unsupported WAV format: {}bit {:?}",
spec.bits_per_sample, spec.sample_format
));
};
if samples.is_empty() {
return Ok(0);
}
// Create codec state for encoding.
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
// Resample to target codec rate.
let target_rate = codec_sample_rate(codec_pt);
let resampled = if wav_rate != target_rate {
transcoder
.resample(&samples, wav_rate, target_rate)
.map_err(|e| format!("resample: {e}"))?
} else {
samples
};
// Calculate frame size (20ms of audio at target rate).
let frame_samples = (target_rate as usize) / 50; // 20ms = 1/50 second
// Stream as RTP at 20ms intervals.
let mut seq: u16 = 0;
let mut ts: u32 = 0;
let mut offset = 0;
let mut interval = time::interval(Duration::from_millis(20));
let mut frames_sent = 0u32;
while offset < resampled.len() {
interval.tick().await;
let end = (offset + frame_samples).min(resampled.len());
let frame = &resampled[offset..end];
// Pad short final frame with silence.
let frame_data = if frame.len() < frame_samples {
let mut padded = frame.to_vec();
padded.resize(frame_samples, 0);
padded
} else {
frame.to_vec()
};
// Encode to target codec.
let encoded = match transcoder.encode_from_pcm(&frame_data, codec_pt) {
Ok(e) if !e.is_empty() => e,
_ => {
offset += frame_samples;
continue;
}
};
// Build RTP packet.
let header = build_rtp_header(codec_pt, seq, ts, ssrc);
let mut packet = header.to_vec();
packet.extend_from_slice(&encoded);
let _ = socket.send_to(&packet, dest).await;
seq = seq.wrapping_add(1);
ts = ts.wrapping_add(rtp_clock_increment(codec_pt));
offset += frame_samples;
frames_sent += 1;
}
Ok(frames_sent)
}
/// Generate and play a beep tone (sine wave) as RTP.
pub async fn play_beep(
socket: Arc<UdpSocket>,
dest: SocketAddr,
codec_pt: u8,
ssrc: u32,
start_seq: u16,
start_ts: u32,
freq_hz: u32,
duration_ms: u32,
) -> Result<(u16, u32), String> {
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
let target_rate = codec_sample_rate(codec_pt);
let frame_samples = (target_rate as usize) / 50;
let total_samples = (target_rate as usize * duration_ms as usize) / 1000;
// Generate sine wave.
let amplitude = 16000i16;
let sine: Vec<i16> = (0..total_samples)
.map(|i| {
let t = i as f64 / target_rate as f64;
(amplitude as f64 * (2.0 * std::f64::consts::PI * freq_hz as f64 * t).sin()) as i16
})
.collect();
let mut seq = start_seq;
let mut ts = start_ts;
let mut offset = 0;
let mut interval = time::interval(Duration::from_millis(20));
while offset < sine.len() {
interval.tick().await;
let end = (offset + frame_samples).min(sine.len());
let mut frame = sine[offset..end].to_vec();
frame.resize(frame_samples, 0);
let encoded = match transcoder.encode_from_pcm(&frame, codec_pt) {
Ok(e) if !e.is_empty() => e,
_ => {
offset += frame_samples;
continue;
}
};
let header = build_rtp_header(codec_pt, seq, ts, ssrc);
let mut packet = header.to_vec();
packet.extend_from_slice(&encoded);
let _ = socket.send_to(&packet, dest).await;
seq = seq.wrapping_add(1);
ts = ts.wrapping_add(rtp_clock_increment(codec_pt));
offset += frame_samples;
}
Ok((seq, ts))
}
/// Load a WAV file and split it into 20ms f32 PCM frames at 48kHz.
/// Used by the leg interaction system to prepare prompt audio for the mixer.
pub fn load_prompt_pcm_frames(wav_path: &str) -> Result<Vec<Vec<f32>>, String> {
let path = Path::new(wav_path);
if !path.exists() {
return Err(format!("WAV file not found: {wav_path}"));
}
let mut reader =
hound::WavReader::open(path).map_err(|e| format!("open WAV {wav_path}: {e}"))?;
let spec = reader.spec();
let wav_rate = spec.sample_rate;
// Read all samples as f32 in [-1.0, 1.0].
let samples: Vec<f32> = if spec.bits_per_sample == 16 {
reader
.samples::<i16>()
.filter_map(|s| s.ok())
.map(|s| s as f32 / 32768.0)
.collect()
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
reader.samples::<f32>().filter_map(|s| s.ok()).collect()
} else {
return Err(format!(
"unsupported WAV format: {}bit {:?}",
spec.bits_per_sample, spec.sample_format
));
};
if samples.is_empty() {
return Ok(vec![]);
}
pcm_to_mix_frames(&samples, wav_rate)
}
/// Convert PCM samples at an arbitrary rate into 48kHz 20ms mixer frames.
pub fn pcm_to_mix_frames(samples: &[f32], sample_rate: u32) -> Result<Vec<Vec<f32>>, String> {
if samples.is_empty() {
return Ok(vec![]);
}
// Resample to MIX_RATE (48kHz) if needed.
let resampled = if sample_rate != MIX_RATE {
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
transcoder
.resample_f32(samples, sample_rate, MIX_RATE)
.map_err(|e| format!("resample: {e}"))?
} else {
samples.to_vec()
};
// Split into MIX_FRAME_SIZE (960) sample frames.
let mut frames = Vec::new();
let mut offset = 0;
while offset < resampled.len() {
let end = (offset + MIX_FRAME_SIZE).min(resampled.len());
let mut frame = resampled[offset..end].to_vec();
// Pad short final frame with silence.
frame.resize(MIX_FRAME_SIZE, 0.0);
frames.push(frame);
offset += MIX_FRAME_SIZE;
}
Ok(frames)
}