fix(proxy-engine,codec-lib,sip-proto,ts): preserve negotiated media details and improve RTP audio handling across call legs
This commit is contained in:
@@ -35,6 +35,8 @@ pub struct RtpPacket {
|
||||
pub payload_type: u8,
|
||||
/// RTP marker bit (first packet of a DTMF event, etc.).
|
||||
pub marker: bool,
|
||||
/// RTP sequence number for reordering.
|
||||
pub seq: u16,
|
||||
/// RTP timestamp from the original packet header.
|
||||
pub timestamp: u32,
|
||||
}
|
||||
@@ -319,16 +321,18 @@ async fn mixer_loop(
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── 2. Drain inbound packets, decode to 16kHz PCM. ─────────
|
||||
// ── 2. Drain inbound packets, decode to 48kHz f32 PCM. ────
|
||||
// DTMF (PT 101) packets are collected separately.
|
||||
// Audio packets are sorted by sequence number and decoded
|
||||
// in order to maintain codec state (critical for G.722 ADPCM).
|
||||
let leg_ids: Vec<String> = legs.keys().cloned().collect();
|
||||
let mut dtmf_forward: Vec<(String, RtpPacket)> = Vec::new();
|
||||
|
||||
for lid in &leg_ids {
|
||||
let slot = legs.get_mut(lid).unwrap();
|
||||
|
||||
// Drain channel — collect DTMF packets separately, keep latest audio.
|
||||
let mut latest_audio: Option<RtpPacket> = None;
|
||||
// Drain channel — collect DTMF separately, collect ALL audio packets.
|
||||
let mut audio_packets: Vec<RtpPacket> = Vec::new();
|
||||
loop {
|
||||
match slot.inbound_rx.try_recv() {
|
||||
Ok(pkt) => {
|
||||
@@ -336,35 +340,47 @@ async fn mixer_loop(
|
||||
// DTMF telephone-event: collect for processing.
|
||||
dtmf_forward.push((lid.clone(), pkt));
|
||||
} else {
|
||||
latest_audio = Some(pkt);
|
||||
audio_packets.push(pkt);
|
||||
}
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(pkt) = latest_audio {
|
||||
if !audio_packets.is_empty() {
|
||||
slot.silent_ticks = 0;
|
||||
match slot.transcoder.decode_to_f32(&pkt.payload, pkt.payload_type) {
|
||||
Ok((pcm, rate)) => {
|
||||
// Resample to 48kHz mixing rate if needed.
|
||||
let pcm_48k = if rate == MIX_RATE {
|
||||
pcm
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&pcm, rate, MIX_RATE)
|
||||
.unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
|
||||
};
|
||||
// Per-leg inbound denoising at 48kHz.
|
||||
let denoised = TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k);
|
||||
// Pad or truncate to exactly MIX_FRAME_SIZE.
|
||||
let mut frame = denoised;
|
||||
frame.resize(MIX_FRAME_SIZE, 0.0);
|
||||
slot.last_pcm_frame = frame;
|
||||
}
|
||||
Err(_) => {
|
||||
// Decode failed — use silence.
|
||||
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
|
||||
|
||||
// Sort by sequence number for correct codec state progression.
|
||||
// This prevents G.722 ADPCM state corruption from out-of-order packets.
|
||||
audio_packets.sort_by_key(|p| p.seq);
|
||||
|
||||
// Decode ALL packets in order (maintains codec state),
|
||||
// but only keep the last decoded frame for mixing.
|
||||
for pkt in &audio_packets {
|
||||
match slot.transcoder.decode_to_f32(&pkt.payload, pkt.payload_type) {
|
||||
Ok((pcm, rate)) => {
|
||||
// Resample to 48kHz mixing rate if needed.
|
||||
let pcm_48k = if rate == MIX_RATE {
|
||||
pcm
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&pcm, rate, MIX_RATE)
|
||||
.unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
|
||||
};
|
||||
// Per-leg inbound denoising at 48kHz.
|
||||
// Skip for Opus/WebRTC legs — browsers already apply
|
||||
// their own noise suppression via getUserMedia.
|
||||
let processed = if slot.codec_pt != codec_lib::PT_OPUS {
|
||||
TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k)
|
||||
} else {
|
||||
pcm_48k
|
||||
};
|
||||
// Pad or truncate to exactly MIX_FRAME_SIZE.
|
||||
let mut frame = processed;
|
||||
frame.resize(MIX_FRAME_SIZE, 0.0);
|
||||
slot.last_pcm_frame = frame;
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
} else if dtmf_forward.iter().any(|(src, _)| src == lid) {
|
||||
|
||||
Reference in New Issue
Block a user