feat(proxy-engine): add multiparty call mixing with dynamic SIP and WebRTC leg management

This commit is contained in:
2026-04-10 12:52:48 +00:00
parent 36eab44e28
commit 93f671f1f9
13 changed files with 1572 additions and 809 deletions

View File

@@ -0,0 +1,232 @@
//! Audio mixer — mix-minus engine for multiparty calls.
//!
//! Each Call spawns one mixer task. Legs communicate with the mixer via
//! tokio mpsc channels — no shared mutable state, no lock contention.
//!
//! The mixer runs a 20ms tick loop:
//! 1. Drain inbound channels, decode to PCM, resample to 16kHz
//! 2. Compute total mix (sum of all legs' PCM as i32)
//! 3. For each leg: mix-minus = total - own, resample to leg codec rate, encode, send
use crate::ipc::{emit_event, OutTx};
use crate::rtp::{build_rtp_header, rtp_clock_increment};
use codec_lib::{codec_sample_rate, TranscodeState};
use std::collections::HashMap;
use tokio::sync::mpsc;
use tokio::task::JoinHandle;
use tokio::time::{self, Duration, MissedTickBehavior};
/// Mixing sample rate — 16kHz. G.722 is native, G.711 needs 2× upsample, Opus needs 3× downsample.
const MIX_RATE: u32 = 16000;
/// Samples per 20ms frame at the mixing rate.
const MIX_FRAME_SIZE: usize = 320; // 16000 * 0.020
/// A raw RTP payload received from a leg (no RTP header).
pub struct RtpPacket {
pub payload: Vec<u8>,
pub payload_type: u8,
}
/// Commands sent to the mixer task via a control channel.
pub enum MixerCommand {
/// Add a new leg to the mix.
AddLeg {
leg_id: String,
codec_pt: u8,
inbound_rx: mpsc::Receiver<RtpPacket>,
outbound_tx: mpsc::Sender<Vec<u8>>,
},
/// Remove a leg from the mix (channels are dropped, I/O tasks exit).
RemoveLeg { leg_id: String },
/// Shut down the mixer.
Shutdown,
}
/// Internal per-leg state inside the mixer.
struct MixerLegSlot {
codec_pt: u8,
transcoder: TranscodeState,
inbound_rx: mpsc::Receiver<RtpPacket>,
outbound_tx: mpsc::Sender<Vec<u8>>,
/// Last decoded PCM frame at MIX_RATE (320 samples). Used for mix-minus.
last_pcm_frame: Vec<i16>,
/// Number of consecutive ticks with no inbound packet.
silent_ticks: u32,
// RTP output state.
rtp_seq: u16,
rtp_ts: u32,
rtp_ssrc: u32,
}
/// Spawn the mixer task for a call. Returns the command sender and task handle.
pub fn spawn_mixer(
call_id: String,
out_tx: OutTx,
) -> (mpsc::Sender<MixerCommand>, JoinHandle<()>) {
let (cmd_tx, cmd_rx) = mpsc::channel::<MixerCommand>(32);
let handle = tokio::spawn(async move {
mixer_loop(call_id, cmd_rx, out_tx).await;
});
(cmd_tx, handle)
}
/// The 20ms mixing loop.
async fn mixer_loop(
call_id: String,
mut cmd_rx: mpsc::Receiver<MixerCommand>,
out_tx: OutTx,
) {
let mut legs: HashMap<String, MixerLegSlot> = HashMap::new();
let mut interval = time::interval(Duration::from_millis(20));
interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
loop {
interval.tick().await;
// 1. Process control commands (non-blocking).
loop {
match cmd_rx.try_recv() {
Ok(MixerCommand::AddLeg {
leg_id,
codec_pt,
inbound_rx,
outbound_tx,
}) => {
let transcoder = match TranscodeState::new() {
Ok(t) => t,
Err(e) => {
emit_event(
&out_tx,
"mixer_error",
serde_json::json!({
"call_id": call_id,
"leg_id": leg_id,
"error": format!("codec init: {e}"),
}),
);
continue;
}
};
legs.insert(
leg_id,
MixerLegSlot {
codec_pt,
transcoder,
inbound_rx,
outbound_tx,
last_pcm_frame: vec![0i16; MIX_FRAME_SIZE],
silent_ticks: 0,
rtp_seq: 0,
rtp_ts: 0,
rtp_ssrc: rand::random(),
},
);
}
Ok(MixerCommand::RemoveLeg { leg_id }) => {
legs.remove(&leg_id);
// Channels drop → I/O tasks exit cleanly.
}
Ok(MixerCommand::Shutdown) => return,
Err(mpsc::error::TryRecvError::Empty) => break,
Err(mpsc::error::TryRecvError::Disconnected) => return,
}
}
if legs.is_empty() {
continue;
}
// 2. Drain inbound packets, decode to 16kHz PCM.
let leg_ids: Vec<String> = legs.keys().cloned().collect();
for lid in &leg_ids {
let slot = legs.get_mut(lid).unwrap();
// Drain channel, keep only the latest packet (simple jitter handling).
let mut latest: Option<RtpPacket> = None;
loop {
match slot.inbound_rx.try_recv() {
Ok(pkt) => latest = Some(pkt),
Err(_) => break,
}
}
if let Some(pkt) = latest {
slot.silent_ticks = 0;
match slot.transcoder.decode_to_pcm(&pkt.payload, pkt.payload_type) {
Ok((pcm, rate)) => {
// Resample to mixing rate if needed.
let pcm_mix = if rate == MIX_RATE {
pcm
} else {
slot.transcoder
.resample(&pcm, rate, MIX_RATE)
.unwrap_or_else(|_| vec![0i16; MIX_FRAME_SIZE])
};
// Pad or truncate to exactly MIX_FRAME_SIZE.
let mut frame = pcm_mix;
frame.resize(MIX_FRAME_SIZE, 0);
slot.last_pcm_frame = frame;
}
Err(_) => {
// Decode failed — use silence.
slot.last_pcm_frame = vec![0i16; MIX_FRAME_SIZE];
}
}
} else {
slot.silent_ticks += 1;
// After 150 ticks (3 seconds) of silence, zero out to avoid stale audio.
if slot.silent_ticks > 150 {
slot.last_pcm_frame = vec![0i16; MIX_FRAME_SIZE];
}
}
}
// 3. Compute total mix (sum of all legs as i32 to avoid overflow).
let mut total_mix = vec![0i32; MIX_FRAME_SIZE];
for slot in legs.values() {
for (i, &s) in slot.last_pcm_frame.iter().enumerate().take(MIX_FRAME_SIZE) {
total_mix[i] += s as i32;
}
}
// 4. For each leg: mix-minus, resample, encode, send.
for slot in legs.values_mut() {
// Mix-minus: total minus this leg's own contribution.
let mut mix_minus = Vec::with_capacity(MIX_FRAME_SIZE);
for i in 0..MIX_FRAME_SIZE {
let sample =
(total_mix[i] - slot.last_pcm_frame[i] as i32).clamp(-32768, 32767) as i16;
mix_minus.push(sample);
}
// Resample from 16kHz to the leg's codec native rate.
let target_rate = codec_sample_rate(slot.codec_pt);
let resampled = if target_rate == MIX_RATE {
mix_minus
} else {
slot.transcoder
.resample(&mix_minus, MIX_RATE, target_rate)
.unwrap_or_default()
};
// Encode to the leg's codec.
let encoded = match slot.transcoder.encode_from_pcm(&resampled, slot.codec_pt) {
Ok(e) if !e.is_empty() => e,
_ => continue,
};
// Build RTP packet with header.
let header = build_rtp_header(slot.codec_pt, slot.rtp_seq, slot.rtp_ts, slot.rtp_ssrc);
let mut rtp = header.to_vec();
rtp.extend_from_slice(&encoded);
slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
slot.rtp_ts = slot.rtp_ts.wrapping_add(rtp_clock_increment(slot.codec_pt));
// Non-blocking send — drop frame if channel is full.
let _ = slot.outbound_tx.try_send(rtp);
}
}
}