Files
siprouter/rust/crates/codec-lib/src/lib.rs

553 lines
20 KiB
Rust
Raw Normal View History

//! Audio codec library for the SIP router.
//!
//! Handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding with ML noise suppression.
//! Used by the `proxy-engine` binary for all audio transcoding.
use audiopus::coder::{Decoder as OpusDecoder, Encoder as OpusEncoder};
use audiopus::packet::Packet as OpusPacket;
use audiopus::{Application, Bitrate as OpusBitrate, Channels, MutSignals, SampleRate};
use ezk_g722::libg722::{self, Bitrate};
use nnnoiseless::DenoiseState;
use rubato::{FftFixedIn, Resampler};
use std::collections::HashMap;
// ---- Payload type constants ------------------------------------------------
pub const PT_PCMU: u8 = 0;
pub const PT_PCMA: u8 = 8;
pub const PT_G722: u8 = 9;
pub const PT_OPUS: u8 = 111;
/// Return the native sample rate for a given payload type.
pub fn codec_sample_rate(pt: u8) -> u32 {
match pt {
PT_OPUS => 48000,
PT_G722 => 16000,
_ => 8000, // PCMU, PCMA
}
}
// ---- G.711 µ-law (PCMU) ---------------------------------------------------
pub fn mulaw_encode(sample: i16) -> u8 {
const BIAS: i16 = 0x84;
const CLIP: i16 = 32635;
let sign = if sample < 0 { 0x80u8 } else { 0 };
let mut s = (sample as i32).unsigned_abs().min(CLIP as u32) as i16;
s += BIAS;
let mut exp = 7u8;
let mut mask = 0x4000i16;
while exp > 0 && (s & mask) == 0 {
exp -= 1;
mask >>= 1;
}
let mantissa = ((s >> (exp + 3)) & 0x0f) as u8;
!(sign | (exp << 4) | mantissa)
}
pub fn mulaw_decode(mulaw: u8) -> i16 {
let v = !mulaw;
let sign = v & 0x80;
let exp = (v >> 4) & 0x07;
let mantissa = v & 0x0f;
// Use i32 to avoid overflow when exp=7, mantissa=15 (result > i16::MAX).
let mut sample = (((mantissa as i32) << 4) + 0x84) << exp;
sample -= 0x84;
let sample = if sign != 0 { -sample } else { sample };
sample.clamp(-32768, 32767) as i16
}
// ---- G.711 A-law (PCMA) ---------------------------------------------------
pub fn alaw_encode(sample: i16) -> u8 {
let sign = if sample >= 0 { 0x80u8 } else { 0 };
let s = (sample as i32).unsigned_abs().min(32767) as i16;
let mut exp = 7u8;
let mut mask = 0x4000i16;
while exp > 0 && (s & mask) == 0 {
exp -= 1;
mask >>= 1;
}
let mantissa = if exp > 0 {
((s >> (exp + 3)) & 0x0f) as u8
} else {
((s >> 4) & 0x0f) as u8
};
(sign | (exp << 4) | mantissa) ^ 0x55
}
pub fn alaw_decode(alaw: u8) -> i16 {
let v = alaw ^ 0x55;
let sign = v & 0x80;
let exp = (v >> 4) & 0x07;
let mantissa = v & 0x0f;
// Use i32 to avoid overflow for extreme values.
let sample = if exp == 0 {
((mantissa as i32) << 4) + 8
} else {
(((mantissa as i32) << 4) + 0x108) << (exp - 1)
};
let sample = if sign != 0 { sample } else { -sample };
sample.clamp(-32768, 32767) as i16
}
// ---- TranscodeState --------------------------------------------------------
/// Per-session codec state holding Opus, G.722, resampler, and denoiser instances.
///
/// Each concurrent call should get its own `TranscodeState` to prevent stateful
/// codecs (Opus, G.722 ADPCM) from corrupting each other.
pub struct TranscodeState {
opus_enc: OpusEncoder,
opus_dec: OpusDecoder,
g722_enc: libg722::encoder::Encoder,
g722_dec: libg722::decoder::Decoder,
/// Cached FFT resamplers keyed by (from_rate, to_rate, chunk_size).
resamplers: HashMap<(u32, u32, usize), FftFixedIn<f64>>,
/// Cached f32 FFT resamplers keyed by (from_rate, to_rate, chunk_size).
resamplers_f32: HashMap<(u32, u32, usize), FftFixedIn<f32>>,
/// ML noise suppression for the SIP-bound direction.
denoiser_to_sip: Box<DenoiseState<'static>>,
/// ML noise suppression for the browser-bound direction.
denoiser_to_browser: Box<DenoiseState<'static>>,
}
impl TranscodeState {
/// Create a new transcoding session with fresh codec state.
pub fn new() -> Result<Self, String> {
let mut opus_enc =
OpusEncoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)
.map_err(|e| format!("opus encoder: {e}"))?;
opus_enc
.set_complexity(5)
.map_err(|e| format!("opus set_complexity: {e}"))?;
opus_enc
.set_bitrate(OpusBitrate::BitsPerSecond(24000))
.map_err(|e| format!("opus set_bitrate: {e}"))?;
let opus_dec = OpusDecoder::new(SampleRate::Hz48000, Channels::Mono)
.map_err(|e| format!("opus decoder: {e}"))?;
let g722_enc = libg722::encoder::Encoder::new(Bitrate::Mode1_64000, false, false);
let g722_dec = libg722::decoder::Decoder::new(Bitrate::Mode1_64000, false, false);
Ok(Self {
opus_enc,
opus_dec,
g722_enc,
g722_dec,
resamplers: HashMap::new(),
resamplers_f32: HashMap::new(),
denoiser_to_sip: DenoiseState::new(),
denoiser_to_browser: DenoiseState::new(),
})
}
/// High-quality sample rate conversion using rubato FFT resampler.
///
/// To maintain continuous filter state, the resampler always processes at a
/// canonical chunk size (20ms at the source rate). This prevents cache
/// thrashing from variable input sizes and preserves inter-frame filter state.
pub fn resample(
&mut self,
pcm: &[i16],
from_rate: u32,
to_rate: u32,
) -> Result<Vec<i16>, String> {
if from_rate == to_rate || pcm.is_empty() {
return Ok(pcm.to_vec());
}
let canonical_chunk = (from_rate as usize) / 50; // 20ms
let key = (from_rate, to_rate, canonical_chunk);
if !self.resamplers.contains_key(&key) {
let r = FftFixedIn::<f64>::new(
from_rate as usize,
to_rate as usize,
canonical_chunk,
1,
1,
)
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
self.resamplers.insert(key, r);
}
let resampler = self.resamplers.get_mut(&key).unwrap();
let mut output = Vec::with_capacity(
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
);
let mut offset = 0;
while offset < pcm.len() {
let remaining = pcm.len() - offset;
let copy_len = remaining.min(canonical_chunk);
let mut chunk = vec![0.0f64; canonical_chunk];
for i in 0..copy_len {
chunk[i] = pcm[offset + i] as f64 / 32768.0;
}
let input = vec![chunk];
let result = resampler
.process(&input, None)
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
if remaining < canonical_chunk {
let expected =
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
let take = expected.min(result[0].len());
output.extend(
result[0][..take]
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
);
} else {
output.extend(
result[0]
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
);
}
offset += canonical_chunk;
}
Ok(output)
}
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
/// Processes in 480-sample (10ms) frames. State persists across calls.
pub fn denoise(denoiser: &mut DenoiseState, pcm: &[i16]) -> Vec<i16> {
let frame_size = DenoiseState::FRAME_SIZE; // 480
let total = pcm.len();
let whole = (total / frame_size) * frame_size;
let mut output = Vec::with_capacity(total);
let mut out_buf = [0.0f32; 480];
for offset in (0..whole).step_by(frame_size) {
let input: Vec<f32> = pcm[offset..offset + frame_size]
.iter()
.map(|&s| s as f32)
.collect();
denoiser.process_frame(&mut out_buf, &input);
output.extend(
out_buf
.iter()
.map(|&s| s.round().clamp(-32768.0, 32767.0) as i16),
);
}
if whole < total {
output.extend_from_slice(&pcm[whole..]);
}
output
}
/// Transcode audio payload from one codec to another.
///
/// `direction`: `Some("to_sip")` or `Some("to_browser")` selects per-direction
/// denoiser. `None` skips denoising (backward compat).
pub fn transcode(
&mut self,
data: &[u8],
from_pt: u8,
to_pt: u8,
direction: Option<&str>,
) -> Result<Vec<u8>, String> {
if from_pt == to_pt {
return Ok(data.to_vec());
}
let (pcm, rate) = self.decode_to_pcm(data, from_pt)?;
let processed = if let Some(dir) = direction {
let pcm_48k = self.resample(&pcm, rate, 48000)?;
let denoiser = match dir {
"to_sip" => &mut self.denoiser_to_sip,
_ => &mut self.denoiser_to_browser,
};
let denoised = Self::denoise(denoiser, &pcm_48k);
let target_rate = codec_sample_rate(to_pt);
self.resample(&denoised, 48000, target_rate)?
} else {
let target_rate = codec_sample_rate(to_pt);
if rate == target_rate {
pcm
} else {
self.resample(&pcm, rate, target_rate)?
}
};
self.encode_from_pcm(&processed, to_pt)
}
/// Decode an encoded audio payload to raw 16-bit PCM samples.
/// Returns (samples, sample_rate).
pub fn decode_to_pcm(&mut self, data: &[u8], pt: u8) -> Result<(Vec<i16>, u32), String> {
match pt {
PT_OPUS => {
let mut pcm = vec![0i16; 5760]; // up to 120ms at 48kHz
let packet =
OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
let out =
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
let n: usize = self
.opus_dec
.decode(Some(packet), out, false)
.map_err(|e| format!("opus decode: {e}"))?
.into();
pcm.truncate(n);
Ok((pcm, 48000))
}
PT_G722 => {
let pcm = self.g722_dec.decode(data);
Ok((pcm, 16000))
}
PT_PCMU => {
let pcm: Vec<i16> = data.iter().map(|&b| mulaw_decode(b)).collect();
Ok((pcm, 8000))
}
PT_PCMA => {
let pcm: Vec<i16> = data.iter().map(|&b| alaw_decode(b)).collect();
Ok((pcm, 8000))
}
_ => Err(format!("unsupported source PT {pt}")),
}
}
/// Encode raw PCM samples to an audio codec.
pub fn encode_from_pcm(&mut self, pcm: &[i16], pt: u8) -> Result<Vec<u8>, String> {
match pt {
PT_OPUS => {
let mut buf = vec![0u8; 4000];
let n: usize = self
.opus_enc
.encode(pcm, &mut buf)
.map_err(|e| format!("opus encode: {e}"))?
.into();
buf.truncate(n);
Ok(buf)
}
PT_G722 => Ok(self.g722_enc.encode(pcm)),
PT_PCMU => Ok(pcm.iter().map(|&s| mulaw_encode(s)).collect()),
PT_PCMA => Ok(pcm.iter().map(|&s| alaw_encode(s)).collect()),
_ => Err(format!("unsupported target PT {pt}")),
}
}
// ---- f32 API for high-quality internal bus ----------------------------
/// Decode an encoded audio payload to f32 PCM samples in [-1.0, 1.0].
/// Returns (samples, sample_rate).
///
/// For Opus, uses native float decode (no i16 quantization).
/// For G.722/G.711, decodes to i16 then converts (codec is natively i16).
pub fn decode_to_f32(&mut self, data: &[u8], pt: u8) -> Result<(Vec<f32>, u32), String> {
match pt {
PT_OPUS => {
let mut pcm = vec![0.0f32; 5760]; // up to 120ms at 48kHz
let packet =
OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
let out =
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
let n: usize = self
.opus_dec
.decode_float(Some(packet), out, false)
.map_err(|e| format!("opus decode_float: {e}"))?
.into();
pcm.truncate(n);
Ok((pcm, 48000))
}
_ => {
// G.722, PCMU, PCMA: natively i16 codecs — decode then convert.
let (pcm_i16, rate) = self.decode_to_pcm(data, pt)?;
let pcm_f32 = pcm_i16.iter().map(|&s| s as f32 / 32768.0).collect();
Ok((pcm_f32, rate))
}
}
}
/// Opus packet loss concealment — synthesize one frame to fill a gap.
/// Returns f32 PCM at 48kHz. `frame_size` should be 960 for 20ms.
pub fn opus_plc(&mut self, frame_size: usize) -> Result<Vec<f32>, String> {
let mut pcm = vec![0.0f32; frame_size];
let out = MutSignals::try_from(&mut pcm[..])
.map_err(|e| format!("opus plc signals: {e}"))?;
let n: usize = self
.opus_dec
.decode_float(None::<OpusPacket<'_>>, out, false)
.map_err(|e| format!("opus plc: {e}"))?
.into();
pcm.truncate(n);
Ok(pcm)
}
/// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
///
/// For Opus, uses native float encode (no i16 quantization).
/// For G.722/G.711, converts to i16 then encodes (codec is natively i16).
pub fn encode_from_f32(&mut self, pcm: &[f32], pt: u8) -> Result<Vec<u8>, String> {
match pt {
PT_OPUS => {
let mut buf = vec![0u8; 4000];
let n: usize = self
.opus_enc
.encode_float(pcm, &mut buf)
.map_err(|e| format!("opus encode_float: {e}"))?
.into();
buf.truncate(n);
Ok(buf)
}
_ => {
// G.722, PCMU, PCMA: natively i16 codecs.
let pcm_i16: Vec<i16> = pcm
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
.collect();
self.encode_from_pcm(&pcm_i16, pt)
}
}
}
/// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
///
/// To maintain continuous filter state, the resampler always processes at a
/// canonical chunk size (20ms at the source rate). This prevents cache
/// thrashing from variable input sizes and preserves inter-frame filter state.
pub fn resample_f32(
&mut self,
pcm: &[f32],
from_rate: u32,
to_rate: u32,
) -> Result<Vec<f32>, String> {
if from_rate == to_rate || pcm.is_empty() {
return Ok(pcm.to_vec());
}
let canonical_chunk = (from_rate as usize) / 50; // 20ms
let key = (from_rate, to_rate, canonical_chunk);
if !self.resamplers_f32.contains_key(&key) {
let r = FftFixedIn::<f32>::new(
from_rate as usize,
to_rate as usize,
canonical_chunk,
1,
1,
)
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
self.resamplers_f32.insert(key, r);
}
let resampler = self.resamplers_f32.get_mut(&key).unwrap();
let mut output = Vec::with_capacity(
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
);
let mut offset = 0;
while offset < pcm.len() {
let remaining = pcm.len() - offset;
let mut chunk = vec![0.0f32; canonical_chunk];
let copy_len = remaining.min(canonical_chunk);
chunk[..copy_len].copy_from_slice(&pcm[offset..offset + copy_len]);
let input = vec![chunk];
let result = resampler
.process(&input, None)
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
if remaining < canonical_chunk {
let expected =
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
output.extend_from_slice(&result[0][..expected.min(result[0].len())]);
} else {
output.extend_from_slice(&result[0]);
}
offset += canonical_chunk;
}
Ok(output)
}
/// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.
/// Processes in 480-sample (10ms) frames. State persists across calls.
/// Operates natively in f32 — no i16 conversion overhead.
pub fn denoise_f32(denoiser: &mut DenoiseState, pcm: &[f32]) -> Vec<f32> {
let frame_size = DenoiseState::FRAME_SIZE; // 480
let total = pcm.len();
let whole = (total / frame_size) * frame_size;
let mut output = Vec::with_capacity(total);
let mut out_buf = [0.0f32; 480];
// nnnoiseless expects f32 samples scaled as i16 range (-32768..32767).
for offset in (0..whole).step_by(frame_size) {
let input: Vec<f32> = pcm[offset..offset + frame_size]
.iter()
.map(|&s| s * 32768.0)
.collect();
denoiser.process_frame(&mut out_buf, &input);
output.extend(out_buf.iter().map(|&s| s / 32768.0));
}
if whole < total {
output.extend_from_slice(&pcm[whole..]);
}
output
}
}
/// Create a new standalone denoiser for per-leg inbound processing.
pub fn new_denoiser() -> Box<DenoiseState<'static>> {
DenoiseState::new()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn mulaw_roundtrip() {
for sample in [-32768i16, -1000, -1, 0, 1, 1000, 32767] {
let encoded = mulaw_encode(sample);
let decoded = mulaw_decode(encoded);
// µ-law is lossy; verify the decoded value is close.
assert!((sample as i32 - decoded as i32).abs() < 1000,
"µ-law roundtrip failed for {sample}: got {decoded}");
}
}
#[test]
fn alaw_roundtrip() {
for sample in [-32768i16, -1000, -1, 0, 1, 1000, 32767] {
let encoded = alaw_encode(sample);
let decoded = alaw_decode(encoded);
assert!((sample as i32 - decoded as i32).abs() < 1000,
"A-law roundtrip failed for {sample}: got {decoded}");
}
}
#[test]
fn codec_sample_rates() {
assert_eq!(codec_sample_rate(PT_OPUS), 48000);
assert_eq!(codec_sample_rate(PT_G722), 16000);
assert_eq!(codec_sample_rate(PT_PCMU), 8000);
assert_eq!(codec_sample_rate(PT_PCMA), 8000);
}
#[test]
fn transcode_same_pt_is_passthrough() {
let mut st = TranscodeState::new().unwrap();
let data = vec![0u8; 160];
let result = st.transcode(&data, PT_PCMU, PT_PCMU, None).unwrap();
assert_eq!(result, data);
}
#[test]
fn pcmu_to_pcma_roundtrip() {
let mut st = TranscodeState::new().unwrap();
// 160 bytes = 20ms of PCMU at 8kHz
let pcmu_data: Vec<u8> = (0..160).map(|i| mulaw_encode((i as i16 * 200) - 16000)).collect();
let pcma = st.transcode(&pcmu_data, PT_PCMU, PT_PCMA, None).unwrap();
assert_eq!(pcma.len(), 160); // Same frame size
let back = st.transcode(&pcma, PT_PCMA, PT_PCMU, None).unwrap();
assert_eq!(back.len(), 160);
}
}