feat(proxy-engine,codec-lib): add adaptive RTP jitter buffering with Opus packet loss concealment and stable 20ms resampling

2026-04-10 21:15:34 +00:00
parent b6950e11d2
commit 7c4756402e
7 changed files with 350 additions and 54 deletions
--- a/rust/crates/codec-lib/src/lib.rs
+++ b/rust/crates/codec-lib/src/lib.rs
@@ -142,8 +142,10 @@ impl TranscodeState {
    }

    /// High-quality sample rate conversion using rubato FFT resampler.
-    /// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
-    /// maintaining proper inter-frame state for continuous audio streams.
+    ///
+    /// To maintain continuous filter state, the resampler always processes at a
+    /// canonical chunk size (20ms at the source rate). This prevents cache
+    /// thrashing from variable input sizes and preserves inter-frame filter state.
    pub fn resample(
        &mut self,
        pcm: &[i16],
@@ -154,28 +156,61 @@ impl TranscodeState {
            return Ok(pcm.to_vec());
        }

-        let chunk = pcm.len();
-        let key = (from_rate, to_rate, chunk);
+        let canonical_chunk = (from_rate as usize) / 50; // 20ms
+        let key = (from_rate, to_rate, canonical_chunk);

        if !self.resamplers.contains_key(&key) {
-            let r =
-                FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
-                    .map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
+            let r = FftFixedIn::<f64>::new(
+                from_rate as usize,
+                to_rate as usize,
+                canonical_chunk,
+                1,
+                1,
+            )
+            .map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
            self.resamplers.insert(key, r);
        }
        let resampler = self.resamplers.get_mut(&key).unwrap();

-        let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
-        let input = vec![float_in];
+        let mut output = Vec::with_capacity(
+            (pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
+        );

-        let result = resampler
-            .process(&input, None)
-            .map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
+        let mut offset = 0;
+        while offset < pcm.len() {
+            let remaining = pcm.len() - offset;
+            let copy_len = remaining.min(canonical_chunk);
+            let mut chunk = vec![0.0f64; canonical_chunk];
+            for i in 0..copy_len {
+                chunk[i] = pcm[offset + i] as f64 / 32768.0;
+            }

-        Ok(result[0]
-            .iter()
-            .map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
-            .collect())
+            let input = vec![chunk];
+            let result = resampler
+                .process(&input, None)
+                .map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
+
+            if remaining < canonical_chunk {
+                let expected =
+                    (copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
+                let take = expected.min(result[0].len());
+                output.extend(
+                    result[0][..take]
+                        .iter()
+                        .map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
+                );
+            } else {
+                output.extend(
+                    result[0]
+                        .iter()
+                        .map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
+                );
+            }
+
+            offset += canonical_chunk;
+        }
+
+        Ok(output)
    }

    /// Apply RNNoise ML noise suppression to 48kHz PCM audio.
@@ -329,6 +364,21 @@ impl TranscodeState {
        }
    }

+    /// Opus packet loss concealment — synthesize one frame to fill a gap.
+    /// Returns f32 PCM at 48kHz. `frame_size` should be 960 for 20ms.
+    pub fn opus_plc(&mut self, frame_size: usize) -> Result<Vec<f32>, String> {
+        let mut pcm = vec![0.0f32; frame_size];
+        let out = MutSignals::try_from(&mut pcm[..])
+            .map_err(|e| format!("opus plc signals: {e}"))?;
+        let n: usize = self
+            .opus_dec
+            .decode_float(None::<OpusPacket<'_>>, out, false)
+            .map_err(|e| format!("opus plc: {e}"))?
+            .into();
+        pcm.truncate(n);
+        Ok(pcm)
+    }
+
    /// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
    ///
    /// For Opus, uses native float encode (no i16 quantization).
@@ -357,7 +407,10 @@ impl TranscodeState {
    }

    /// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
-    /// Uses a separate cache from the i16 resampler.
+    ///
+    /// To maintain continuous filter state, the resampler always processes at a
+    /// canonical chunk size (20ms at the source rate). This prevents cache
+    /// thrashing from variable input sizes and preserves inter-frame filter state.
    pub fn resample_f32(
        &mut self,
        pcm: &[f32],
@@ -368,23 +421,50 @@ impl TranscodeState {
            return Ok(pcm.to_vec());
        }

-        let chunk = pcm.len();
-        let key = (from_rate, to_rate, chunk);
+        let canonical_chunk = (from_rate as usize) / 50; // 20ms
+        let key = (from_rate, to_rate, canonical_chunk);

        if !self.resamplers_f32.contains_key(&key) {
-            let r =
-                FftFixedIn::<f32>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
-                    .map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
+            let r = FftFixedIn::<f32>::new(
+                from_rate as usize,
+                to_rate as usize,
+                canonical_chunk,
+                1,
+                1,
+            )
+            .map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
            self.resamplers_f32.insert(key, r);
        }
        let resampler = self.resamplers_f32.get_mut(&key).unwrap();

-        let input = vec![pcm.to_vec()];
-        let result = resampler
-            .process(&input, None)
-            .map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
+        let mut output = Vec::with_capacity(
+            (pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
+        );

-        Ok(result[0].clone())
+        let mut offset = 0;
+        while offset < pcm.len() {
+            let remaining = pcm.len() - offset;
+            let mut chunk = vec![0.0f32; canonical_chunk];
+            let copy_len = remaining.min(canonical_chunk);
+            chunk[..copy_len].copy_from_slice(&pcm[offset..offset + copy_len]);
+
+            let input = vec![chunk];
+            let result = resampler
+                .process(&input, None)
+                .map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
+
+            if remaining < canonical_chunk {
+                let expected =
+                    (copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
+                output.extend_from_slice(&result[0][..expected.min(result[0].len())]);
+            } else {
+                output.extend_from_slice(&result[0]);
+            }
+
+            offset += canonical_chunk;
+        }
+
+        Ok(output)
    }

    /// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.