//! Audio codec library for the SIP router. //! //! Handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding with ML noise suppression. //! Used by the `proxy-engine` binary for all audio transcoding. use audiopus::coder::{Decoder as OpusDecoder, Encoder as OpusEncoder}; use audiopus::packet::Packet as OpusPacket; use audiopus::{Application, Bitrate as OpusBitrate, Channels, MutSignals, SampleRate}; use ezk_g722::libg722::{self, Bitrate}; use nnnoiseless::DenoiseState; use rubato::{FftFixedIn, Resampler}; use std::collections::HashMap; // ---- Payload type constants ------------------------------------------------ pub const PT_PCMU: u8 = 0; pub const PT_PCMA: u8 = 8; pub const PT_G722: u8 = 9; pub const PT_OPUS: u8 = 111; /// Return the native sample rate for a given payload type. pub fn codec_sample_rate(pt: u8) -> u32 { match pt { PT_OPUS => 48000, PT_G722 => 16000, _ => 8000, // PCMU, PCMA } } // ---- G.711 µ-law (PCMU) --------------------------------------------------- pub fn mulaw_encode(sample: i16) -> u8 { const BIAS: i16 = 0x84; const CLIP: i16 = 32635; let sign = if sample < 0 { 0x80u8 } else { 0 }; let mut s = (sample as i32).unsigned_abs().min(CLIP as u32) as i16; s += BIAS; let mut exp = 7u8; let mut mask = 0x4000i16; while exp > 0 && (s & mask) == 0 { exp -= 1; mask >>= 1; } let mantissa = ((s >> (exp + 3)) & 0x0f) as u8; !(sign | (exp << 4) | mantissa) } pub fn mulaw_decode(mulaw: u8) -> i16 { let v = !mulaw; let sign = v & 0x80; let exp = (v >> 4) & 0x07; let mantissa = v & 0x0f; // Use i32 to avoid overflow when exp=7, mantissa=15 (result > i16::MAX). let mut sample = (((mantissa as i32) << 4) + 0x84) << exp; sample -= 0x84; let sample = if sign != 0 { -sample } else { sample }; sample.clamp(-32768, 32767) as i16 } // ---- G.711 A-law (PCMA) --------------------------------------------------- pub fn alaw_encode(sample: i16) -> u8 { let sign = if sample >= 0 { 0x80u8 } else { 0 }; let s = (sample as i32).unsigned_abs().min(32767) as i16; let mut exp = 7u8; let mut mask = 0x4000i16; while exp > 0 && (s & mask) == 0 { exp -= 1; mask >>= 1; } let mantissa = if exp > 0 { ((s >> (exp + 3)) & 0x0f) as u8 } else { ((s >> 4) & 0x0f) as u8 }; (sign | (exp << 4) | mantissa) ^ 0x55 } pub fn alaw_decode(alaw: u8) -> i16 { let v = alaw ^ 0x55; let sign = v & 0x80; let exp = (v >> 4) & 0x07; let mantissa = v & 0x0f; // Use i32 to avoid overflow for extreme values. let sample = if exp == 0 { ((mantissa as i32) << 4) + 8 } else { (((mantissa as i32) << 4) + 0x108) << (exp - 1) }; let sample = if sign != 0 { sample } else { -sample }; sample.clamp(-32768, 32767) as i16 } // ---- TranscodeState -------------------------------------------------------- /// Per-session codec state holding Opus, G.722, resampler, and denoiser instances. /// /// Each concurrent call should get its own `TranscodeState` to prevent stateful /// codecs (Opus, G.722 ADPCM) from corrupting each other. pub struct TranscodeState { opus_enc: OpusEncoder, opus_dec: OpusDecoder, g722_enc: libg722::encoder::Encoder, g722_dec: libg722::decoder::Decoder, /// Cached FFT resamplers keyed by (from_rate, to_rate, chunk_size). resamplers: HashMap<(u32, u32, usize), FftFixedIn>, /// ML noise suppression for the SIP-bound direction. denoiser_to_sip: Box>, /// ML noise suppression for the browser-bound direction. denoiser_to_browser: Box>, } impl TranscodeState { /// Create a new transcoding session with fresh codec state. pub fn new() -> Result { let mut opus_enc = OpusEncoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip) .map_err(|e| format!("opus encoder: {e}"))?; opus_enc .set_complexity(5) .map_err(|e| format!("opus set_complexity: {e}"))?; opus_enc .set_bitrate(OpusBitrate::BitsPerSecond(24000)) .map_err(|e| format!("opus set_bitrate: {e}"))?; let opus_dec = OpusDecoder::new(SampleRate::Hz48000, Channels::Mono) .map_err(|e| format!("opus decoder: {e}"))?; let g722_enc = libg722::encoder::Encoder::new(Bitrate::Mode1_64000, false, false); let g722_dec = libg722::decoder::Decoder::new(Bitrate::Mode1_64000, false, false); Ok(Self { opus_enc, opus_dec, g722_enc, g722_dec, resamplers: HashMap::new(), denoiser_to_sip: DenoiseState::new(), denoiser_to_browser: DenoiseState::new(), }) } /// High-quality sample rate conversion using rubato FFT resampler. /// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused, /// maintaining proper inter-frame state for continuous audio streams. pub fn resample( &mut self, pcm: &[i16], from_rate: u32, to_rate: u32, ) -> Result, String> { if from_rate == to_rate || pcm.is_empty() { return Ok(pcm.to_vec()); } let chunk = pcm.len(); let key = (from_rate, to_rate, chunk); if !self.resamplers.contains_key(&key) { let r = FftFixedIn::::new(from_rate as usize, to_rate as usize, chunk, 1, 1) .map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?; self.resamplers.insert(key, r); } let resampler = self.resamplers.get_mut(&key).unwrap(); let float_in: Vec = pcm.iter().map(|&s| s as f64 / 32768.0).collect(); let input = vec![float_in]; let result = resampler .process(&input, None) .map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?; Ok(result[0] .iter() .map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16) .collect()) } /// Apply RNNoise ML noise suppression to 48kHz PCM audio. /// Processes in 480-sample (10ms) frames. State persists across calls. pub fn denoise(denoiser: &mut DenoiseState, pcm: &[i16]) -> Vec { let frame_size = DenoiseState::FRAME_SIZE; // 480 let total = pcm.len(); let whole = (total / frame_size) * frame_size; let mut output = Vec::with_capacity(total); let mut out_buf = [0.0f32; 480]; for offset in (0..whole).step_by(frame_size) { let input: Vec = pcm[offset..offset + frame_size] .iter() .map(|&s| s as f32) .collect(); denoiser.process_frame(&mut out_buf, &input); output.extend( out_buf .iter() .map(|&s| s.round().clamp(-32768.0, 32767.0) as i16), ); } if whole < total { output.extend_from_slice(&pcm[whole..]); } output } /// Transcode audio payload from one codec to another. /// /// `direction`: `Some("to_sip")` or `Some("to_browser")` selects per-direction /// denoiser. `None` skips denoising (backward compat). pub fn transcode( &mut self, data: &[u8], from_pt: u8, to_pt: u8, direction: Option<&str>, ) -> Result, String> { if from_pt == to_pt { return Ok(data.to_vec()); } let (pcm, rate) = self.decode_to_pcm(data, from_pt)?; let processed = if let Some(dir) = direction { let pcm_48k = self.resample(&pcm, rate, 48000)?; let denoiser = match dir { "to_sip" => &mut self.denoiser_to_sip, _ => &mut self.denoiser_to_browser, }; let denoised = Self::denoise(denoiser, &pcm_48k); let target_rate = codec_sample_rate(to_pt); self.resample(&denoised, 48000, target_rate)? } else { let target_rate = codec_sample_rate(to_pt); if rate == target_rate { pcm } else { self.resample(&pcm, rate, target_rate)? } }; self.encode_from_pcm(&processed, to_pt) } /// Decode an encoded audio payload to raw 16-bit PCM samples. /// Returns (samples, sample_rate). pub fn decode_to_pcm(&mut self, data: &[u8], pt: u8) -> Result<(Vec, u32), String> { match pt { PT_OPUS => { let mut pcm = vec![0i16; 5760]; // up to 120ms at 48kHz let packet = OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?; let out = MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?; let n: usize = self .opus_dec .decode(Some(packet), out, false) .map_err(|e| format!("opus decode: {e}"))? .into(); pcm.truncate(n); Ok((pcm, 48000)) } PT_G722 => { let pcm = self.g722_dec.decode(data); Ok((pcm, 16000)) } PT_PCMU => { let pcm: Vec = data.iter().map(|&b| mulaw_decode(b)).collect(); Ok((pcm, 8000)) } PT_PCMA => { let pcm: Vec = data.iter().map(|&b| alaw_decode(b)).collect(); Ok((pcm, 8000)) } _ => Err(format!("unsupported source PT {pt}")), } } /// Encode raw PCM samples to an audio codec. pub fn encode_from_pcm(&mut self, pcm: &[i16], pt: u8) -> Result, String> { match pt { PT_OPUS => { let mut buf = vec![0u8; 4000]; let n: usize = self .opus_enc .encode(pcm, &mut buf) .map_err(|e| format!("opus encode: {e}"))? .into(); buf.truncate(n); Ok(buf) } PT_G722 => Ok(self.g722_enc.encode(pcm)), PT_PCMU => Ok(pcm.iter().map(|&s| mulaw_encode(s)).collect()), PT_PCMA => Ok(pcm.iter().map(|&s| alaw_encode(s)).collect()), _ => Err(format!("unsupported target PT {pt}")), } } } #[cfg(test)] mod tests { use super::*; #[test] fn mulaw_roundtrip() { for sample in [-32768i16, -1000, -1, 0, 1, 1000, 32767] { let encoded = mulaw_encode(sample); let decoded = mulaw_decode(encoded); // µ-law is lossy; verify the decoded value is close. assert!((sample as i32 - decoded as i32).abs() < 1000, "µ-law roundtrip failed for {sample}: got {decoded}"); } } #[test] fn alaw_roundtrip() { for sample in [-32768i16, -1000, -1, 0, 1, 1000, 32767] { let encoded = alaw_encode(sample); let decoded = alaw_decode(encoded); assert!((sample as i32 - decoded as i32).abs() < 1000, "A-law roundtrip failed for {sample}: got {decoded}"); } } #[test] fn codec_sample_rates() { assert_eq!(codec_sample_rate(PT_OPUS), 48000); assert_eq!(codec_sample_rate(PT_G722), 16000); assert_eq!(codec_sample_rate(PT_PCMU), 8000); assert_eq!(codec_sample_rate(PT_PCMA), 8000); } #[test] fn transcode_same_pt_is_passthrough() { let mut st = TranscodeState::new().unwrap(); let data = vec![0u8; 160]; let result = st.transcode(&data, PT_PCMU, PT_PCMU, None).unwrap(); assert_eq!(result, data); } #[test] fn pcmu_to_pcma_roundtrip() { let mut st = TranscodeState::new().unwrap(); // 160 bytes = 20ms of PCMU at 8kHz let pcmu_data: Vec = (0..160).map(|i| mulaw_encode((i as i16 * 200) - 16000)).collect(); let pcma = st.transcode(&pcmu_data, PT_PCMU, PT_PCMA, None).unwrap(); assert_eq!(pcma.len(), 160); // Same frame size let back = st.transcode(&pcma, PT_PCMA, PT_PCMU, None).unwrap(); assert_eq!(back.len(), 160); } }