feat(proxy-engine,codec-lib): add adaptive RTP jitter buffering with Opus packet loss concealment and stable 20ms resampling

This commit is contained in:
2026-04-10 21:15:34 +00:00
parent b6950e11d2
commit 7c4756402e
7 changed files with 350 additions and 54 deletions

View File

@@ -142,8 +142,10 @@ impl TranscodeState {
}
/// High-quality sample rate conversion using rubato FFT resampler.
/// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
/// maintaining proper inter-frame state for continuous audio streams.
///
/// To maintain continuous filter state, the resampler always processes at a
/// canonical chunk size (20ms at the source rate). This prevents cache
/// thrashing from variable input sizes and preserves inter-frame filter state.
pub fn resample(
&mut self,
pcm: &[i16],
@@ -154,28 +156,61 @@ impl TranscodeState {
return Ok(pcm.to_vec());
}
let chunk = pcm.len();
let key = (from_rate, to_rate, chunk);
let canonical_chunk = (from_rate as usize) / 50; // 20ms
let key = (from_rate, to_rate, canonical_chunk);
if !self.resamplers.contains_key(&key) {
let r =
FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
let r = FftFixedIn::<f64>::new(
from_rate as usize,
to_rate as usize,
canonical_chunk,
1,
1,
)
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
self.resamplers.insert(key, r);
}
let resampler = self.resamplers.get_mut(&key).unwrap();
let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
let input = vec![float_in];
let mut output = Vec::with_capacity(
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
);
let result = resampler
.process(&input, None)
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
let mut offset = 0;
while offset < pcm.len() {
let remaining = pcm.len() - offset;
let copy_len = remaining.min(canonical_chunk);
let mut chunk = vec![0.0f64; canonical_chunk];
for i in 0..copy_len {
chunk[i] = pcm[offset + i] as f64 / 32768.0;
}
Ok(result[0]
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
.collect())
let input = vec![chunk];
let result = resampler
.process(&input, None)
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
if remaining < canonical_chunk {
let expected =
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
let take = expected.min(result[0].len());
output.extend(
result[0][..take]
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
);
} else {
output.extend(
result[0]
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
);
}
offset += canonical_chunk;
}
Ok(output)
}
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
@@ -329,6 +364,21 @@ impl TranscodeState {
}
}
/// Opus packet loss concealment — synthesize one frame to fill a gap.
/// Returns f32 PCM at 48kHz. `frame_size` should be 960 for 20ms.
pub fn opus_plc(&mut self, frame_size: usize) -> Result<Vec<f32>, String> {
let mut pcm = vec![0.0f32; frame_size];
let out = MutSignals::try_from(&mut pcm[..])
.map_err(|e| format!("opus plc signals: {e}"))?;
let n: usize = self
.opus_dec
.decode_float(None::<OpusPacket<'_>>, out, false)
.map_err(|e| format!("opus plc: {e}"))?
.into();
pcm.truncate(n);
Ok(pcm)
}
/// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
///
/// For Opus, uses native float encode (no i16 quantization).
@@ -357,7 +407,10 @@ impl TranscodeState {
}
/// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
/// Uses a separate cache from the i16 resampler.
///
/// To maintain continuous filter state, the resampler always processes at a
/// canonical chunk size (20ms at the source rate). This prevents cache
/// thrashing from variable input sizes and preserves inter-frame filter state.
pub fn resample_f32(
&mut self,
pcm: &[f32],
@@ -368,23 +421,50 @@ impl TranscodeState {
return Ok(pcm.to_vec());
}
let chunk = pcm.len();
let key = (from_rate, to_rate, chunk);
let canonical_chunk = (from_rate as usize) / 50; // 20ms
let key = (from_rate, to_rate, canonical_chunk);
if !self.resamplers_f32.contains_key(&key) {
let r =
FftFixedIn::<f32>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
let r = FftFixedIn::<f32>::new(
from_rate as usize,
to_rate as usize,
canonical_chunk,
1,
1,
)
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
self.resamplers_f32.insert(key, r);
}
let resampler = self.resamplers_f32.get_mut(&key).unwrap();
let input = vec![pcm.to_vec()];
let result = resampler
.process(&input, None)
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
let mut output = Vec::with_capacity(
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
);
Ok(result[0].clone())
let mut offset = 0;
while offset < pcm.len() {
let remaining = pcm.len() - offset;
let mut chunk = vec![0.0f32; canonical_chunk];
let copy_len = remaining.min(canonical_chunk);
chunk[..copy_len].copy_from_slice(&pcm[offset..offset + copy_len]);
let input = vec![chunk];
let result = resampler
.process(&input, None)
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
if remaining < canonical_chunk {
let expected =
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
output.extend_from_slice(&result[0][..expected.min(result[0].len())]);
} else {
output.extend_from_slice(&result[0]);
}
offset += canonical_chunk;
}
Ok(output)
}
/// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.