feat(mixer): enhance mixer functionality with interaction and tool legs

- Updated mixer to handle participant and isolated leg roles, allowing for IVR and consent interactions.
- Introduced commands for starting and canceling interactions, managing tool legs for recording and transcription.
- Implemented per-source audio handling for tool legs, enabling separate audio processing.
- Enhanced DTMF handling to forward events between participant legs only.
- Added support for PCM recording directly from tool legs, with WAV file generation.
- Updated TypeScript definitions and functions to support new interaction and tool leg features.
This commit is contained in:
2026-04-10 14:54:21 +00:00
parent 6a130db7c7
commit 7d59361352
13 changed files with 1448 additions and 94 deletions

View File

@@ -1,4 +1,5 @@
//! Audio player — reads a WAV file and streams it as RTP packets.
//! Also provides prompt preparation for the leg interaction system.
use crate::rtp::{build_rtp_header, rtp_clock_increment};
use codec_lib::{codec_sample_rate, TranscodeState};
@@ -8,6 +9,11 @@ use std::sync::Arc;
use tokio::net::UdpSocket;
use tokio::time::{self, Duration};
/// Mixing sample rate used by the mixer (must stay in sync with mixer::MIX_RATE).
const MIX_RATE: u32 = 16000;
/// Samples per 20ms frame at the mixing rate.
const MIX_FRAME_SIZE: usize = 320;
/// Play a WAV file as RTP to a destination.
/// Returns when playback is complete.
pub async fn play_wav_file(
@@ -171,3 +177,64 @@ pub async fn play_beep(
Ok((seq, ts))
}
/// Load a WAV file and split it into 20ms PCM frames at 16kHz.
/// Used by the leg interaction system to prepare prompt audio for the mixer.
pub fn load_prompt_pcm_frames(wav_path: &str) -> Result<Vec<Vec<i16>>, String> {
let path = Path::new(wav_path);
if !path.exists() {
return Err(format!("WAV file not found: {wav_path}"));
}
let mut reader =
hound::WavReader::open(path).map_err(|e| format!("open WAV {wav_path}: {e}"))?;
let spec = reader.spec();
let wav_rate = spec.sample_rate;
// Read all samples as i16.
let samples: Vec<i16> = if spec.bits_per_sample == 16 {
reader
.samples::<i16>()
.filter_map(|s| s.ok())
.collect()
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
reader
.samples::<f32>()
.filter_map(|s| s.ok())
.map(|s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
.collect()
} else {
return Err(format!(
"unsupported WAV format: {}bit {:?}",
spec.bits_per_sample, spec.sample_format
));
};
if samples.is_empty() {
return Ok(vec![]);
}
// Resample to MIX_RATE (16kHz) if needed.
let resampled = if wav_rate != MIX_RATE {
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
transcoder
.resample(&samples, wav_rate, MIX_RATE)
.map_err(|e| format!("resample: {e}"))?
} else {
samples
};
// Split into MIX_FRAME_SIZE (320) sample frames.
let mut frames = Vec::new();
let mut offset = 0;
while offset < resampled.len() {
let end = (offset + MIX_FRAME_SIZE).min(resampled.len());
let mut frame = resampled[offset..end].to_vec();
// Pad short final frame with silence.
frame.resize(MIX_FRAME_SIZE, 0);
frames.push(frame);
offset += MIX_FRAME_SIZE;
}
Ok(frames)
}