feat(proxy-engine,codec-lib): add adaptive RTP jitter buffering with Opus packet loss concealment and stable 20ms resampling
This commit is contained in:
@@ -142,8 +142,10 @@ impl TranscodeState {
|
||||
}
|
||||
|
||||
/// High-quality sample rate conversion using rubato FFT resampler.
|
||||
/// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
|
||||
/// maintaining proper inter-frame state for continuous audio streams.
|
||||
///
|
||||
/// To maintain continuous filter state, the resampler always processes at a
|
||||
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||
pub fn resample(
|
||||
&mut self,
|
||||
pcm: &[i16],
|
||||
@@ -154,28 +156,61 @@ impl TranscodeState {
|
||||
return Ok(pcm.to_vec());
|
||||
}
|
||||
|
||||
let chunk = pcm.len();
|
||||
let key = (from_rate, to_rate, chunk);
|
||||
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||
let key = (from_rate, to_rate, canonical_chunk);
|
||||
|
||||
if !self.resamplers.contains_key(&key) {
|
||||
let r =
|
||||
FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
|
||||
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
|
||||
let r = FftFixedIn::<f64>::new(
|
||||
from_rate as usize,
|
||||
to_rate as usize,
|
||||
canonical_chunk,
|
||||
1,
|
||||
1,
|
||||
)
|
||||
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
|
||||
self.resamplers.insert(key, r);
|
||||
}
|
||||
let resampler = self.resamplers.get_mut(&key).unwrap();
|
||||
|
||||
let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
|
||||
let input = vec![float_in];
|
||||
let mut output = Vec::with_capacity(
|
||||
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||
);
|
||||
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
||||
let mut offset = 0;
|
||||
while offset < pcm.len() {
|
||||
let remaining = pcm.len() - offset;
|
||||
let copy_len = remaining.min(canonical_chunk);
|
||||
let mut chunk = vec![0.0f64; canonical_chunk];
|
||||
for i in 0..copy_len {
|
||||
chunk[i] = pcm[offset + i] as f64 / 32768.0;
|
||||
}
|
||||
|
||||
Ok(result[0]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect())
|
||||
let input = vec![chunk];
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
||||
|
||||
if remaining < canonical_chunk {
|
||||
let expected =
|
||||
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||
let take = expected.min(result[0].len());
|
||||
output.extend(
|
||||
result[0][..take]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||
);
|
||||
} else {
|
||||
output.extend(
|
||||
result[0]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||
);
|
||||
}
|
||||
|
||||
offset += canonical_chunk;
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
|
||||
@@ -329,6 +364,21 @@ impl TranscodeState {
|
||||
}
|
||||
}
|
||||
|
||||
/// Opus packet loss concealment — synthesize one frame to fill a gap.
|
||||
/// Returns f32 PCM at 48kHz. `frame_size` should be 960 for 20ms.
|
||||
pub fn opus_plc(&mut self, frame_size: usize) -> Result<Vec<f32>, String> {
|
||||
let mut pcm = vec![0.0f32; frame_size];
|
||||
let out = MutSignals::try_from(&mut pcm[..])
|
||||
.map_err(|e| format!("opus plc signals: {e}"))?;
|
||||
let n: usize = self
|
||||
.opus_dec
|
||||
.decode_float(None::<OpusPacket<'_>>, out, false)
|
||||
.map_err(|e| format!("opus plc: {e}"))?
|
||||
.into();
|
||||
pcm.truncate(n);
|
||||
Ok(pcm)
|
||||
}
|
||||
|
||||
/// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
|
||||
///
|
||||
/// For Opus, uses native float encode (no i16 quantization).
|
||||
@@ -357,7 +407,10 @@ impl TranscodeState {
|
||||
}
|
||||
|
||||
/// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
|
||||
/// Uses a separate cache from the i16 resampler.
|
||||
///
|
||||
/// To maintain continuous filter state, the resampler always processes at a
|
||||
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||
pub fn resample_f32(
|
||||
&mut self,
|
||||
pcm: &[f32],
|
||||
@@ -368,23 +421,50 @@ impl TranscodeState {
|
||||
return Ok(pcm.to_vec());
|
||||
}
|
||||
|
||||
let chunk = pcm.len();
|
||||
let key = (from_rate, to_rate, chunk);
|
||||
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||
let key = (from_rate, to_rate, canonical_chunk);
|
||||
|
||||
if !self.resamplers_f32.contains_key(&key) {
|
||||
let r =
|
||||
FftFixedIn::<f32>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
|
||||
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
let r = FftFixedIn::<f32>::new(
|
||||
from_rate as usize,
|
||||
to_rate as usize,
|
||||
canonical_chunk,
|
||||
1,
|
||||
1,
|
||||
)
|
||||
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
self.resamplers_f32.insert(key, r);
|
||||
}
|
||||
let resampler = self.resamplers_f32.get_mut(&key).unwrap();
|
||||
|
||||
let input = vec![pcm.to_vec()];
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
let mut output = Vec::with_capacity(
|
||||
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||
);
|
||||
|
||||
Ok(result[0].clone())
|
||||
let mut offset = 0;
|
||||
while offset < pcm.len() {
|
||||
let remaining = pcm.len() - offset;
|
||||
let mut chunk = vec![0.0f32; canonical_chunk];
|
||||
let copy_len = remaining.min(canonical_chunk);
|
||||
chunk[..copy_len].copy_from_slice(&pcm[offset..offset + copy_len]);
|
||||
|
||||
let input = vec![chunk];
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
|
||||
if remaining < canonical_chunk {
|
||||
let expected =
|
||||
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||
output.extend_from_slice(&result[0][..expected.min(result[0].len())]);
|
||||
} else {
|
||||
output.extend_from_slice(&result[0]);
|
||||
}
|
||||
|
||||
offset += canonical_chunk;
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.
|
||||
|
||||
Reference in New Issue
Block a user