Files
siprouter/rust/crates/codec-lib/src/lib.rs

350 lines
12 KiB
Rust
Raw Normal View History

//! Audio codec library for the SIP router.
//!
//! Handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding with ML noise suppression.
//! Used by both the standalone `opus-codec` CLI and the `proxy-engine` binary.
use audiopus::coder::{Decoder as OpusDecoder, Encoder as OpusEncoder};
use audiopus::packet::Packet as OpusPacket;
use audiopus::{Application, Bitrate as OpusBitrate, Channels, MutSignals, SampleRate};
use ezk_g722::libg722::{self, Bitrate};
use nnnoiseless::DenoiseState;
use rubato::{FftFixedIn, Resampler};
use std::collections::HashMap;
// ---- Payload type constants ------------------------------------------------
pub const PT_PCMU: u8 = 0;
pub const PT_PCMA: u8 = 8;
pub const PT_G722: u8 = 9;
pub const PT_OPUS: u8 = 111;
/// Return the native sample rate for a given payload type.
pub fn codec_sample_rate(pt: u8) -> u32 {
match pt {
PT_OPUS => 48000,
PT_G722 => 16000,
_ => 8000, // PCMU, PCMA
}
}
// ---- G.711 µ-law (PCMU) ---------------------------------------------------
pub fn mulaw_encode(sample: i16) -> u8 {
const BIAS: i16 = 0x84;
const CLIP: i16 = 32635;
let sign = if sample < 0 { 0x80u8 } else { 0 };
let mut s = (sample as i32).unsigned_abs().min(CLIP as u32) as i16;
s += BIAS;
let mut exp = 7u8;
let mut mask = 0x4000i16;
while exp > 0 && (s & mask) == 0 {
exp -= 1;
mask >>= 1;
}
let mantissa = ((s >> (exp + 3)) & 0x0f) as u8;
!(sign | (exp << 4) | mantissa)
}
pub fn mulaw_decode(mulaw: u8) -> i16 {
let v = !mulaw;
let sign = v & 0x80;
let exp = (v >> 4) & 0x07;
let mantissa = v & 0x0f;
// Use i32 to avoid overflow when exp=7, mantissa=15 (result > i16::MAX).
let mut sample = (((mantissa as i32) << 4) + 0x84) << exp;
sample -= 0x84;
let sample = if sign != 0 { -sample } else { sample };
sample.clamp(-32768, 32767) as i16
}
// ---- G.711 A-law (PCMA) ---------------------------------------------------
pub fn alaw_encode(sample: i16) -> u8 {
let sign = if sample >= 0 { 0x80u8 } else { 0 };
let s = (sample as i32).unsigned_abs().min(32767) as i16;
let mut exp = 7u8;
let mut mask = 0x4000i16;
while exp > 0 && (s & mask) == 0 {
exp -= 1;
mask >>= 1;
}
let mantissa = if exp > 0 {
((s >> (exp + 3)) & 0x0f) as u8
} else {
((s >> 4) & 0x0f) as u8
};
(sign | (exp << 4) | mantissa) ^ 0x55
}
pub fn alaw_decode(alaw: u8) -> i16 {
let v = alaw ^ 0x55;
let sign = v & 0x80;
let exp = (v >> 4) & 0x07;
let mantissa = v & 0x0f;
// Use i32 to avoid overflow for extreme values.
let sample = if exp == 0 {
((mantissa as i32) << 4) + 8
} else {
(((mantissa as i32) << 4) + 0x108) << (exp - 1)
};
let sample = if sign != 0 { sample } else { -sample };
sample.clamp(-32768, 32767) as i16
}
// ---- TranscodeState --------------------------------------------------------
/// Per-session codec state holding Opus, G.722, resampler, and denoiser instances.
///
/// Each concurrent call should get its own `TranscodeState` to prevent stateful
/// codecs (Opus, G.722 ADPCM) from corrupting each other.
pub struct TranscodeState {
opus_enc: OpusEncoder,
opus_dec: OpusDecoder,
g722_enc: libg722::encoder::Encoder,
g722_dec: libg722::decoder::Decoder,
/// Cached FFT resamplers keyed by (from_rate, to_rate, chunk_size).
resamplers: HashMap<(u32, u32, usize), FftFixedIn<f64>>,
/// ML noise suppression for the SIP-bound direction.
denoiser_to_sip: Box<DenoiseState<'static>>,
/// ML noise suppression for the browser-bound direction.
denoiser_to_browser: Box<DenoiseState<'static>>,
}
impl TranscodeState {
/// Create a new transcoding session with fresh codec state.
pub fn new() -> Result<Self, String> {
let mut opus_enc =
OpusEncoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)
.map_err(|e| format!("opus encoder: {e}"))?;
opus_enc
.set_complexity(5)
.map_err(|e| format!("opus set_complexity: {e}"))?;
opus_enc
.set_bitrate(OpusBitrate::BitsPerSecond(24000))
.map_err(|e| format!("opus set_bitrate: {e}"))?;
let opus_dec = OpusDecoder::new(SampleRate::Hz48000, Channels::Mono)
.map_err(|e| format!("opus decoder: {e}"))?;
let g722_enc = libg722::encoder::Encoder::new(Bitrate::Mode1_64000, false, false);
let g722_dec = libg722::decoder::Decoder::new(Bitrate::Mode1_64000, false, false);
Ok(Self {
opus_enc,
opus_dec,
g722_enc,
g722_dec,
resamplers: HashMap::new(),
denoiser_to_sip: DenoiseState::new(),
denoiser_to_browser: DenoiseState::new(),
})
}
/// High-quality sample rate conversion using rubato FFT resampler.
/// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
/// maintaining proper inter-frame state for continuous audio streams.
pub fn resample(
&mut self,
pcm: &[i16],
from_rate: u32,
to_rate: u32,
) -> Result<Vec<i16>, String> {
if from_rate == to_rate || pcm.is_empty() {
return Ok(pcm.to_vec());
}
let chunk = pcm.len();
let key = (from_rate, to_rate, chunk);
if !self.resamplers.contains_key(&key) {
let r =
FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
self.resamplers.insert(key, r);
}
let resampler = self.resamplers.get_mut(&key).unwrap();
let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
let input = vec![float_in];
let result = resampler
.process(&input, None)
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
Ok(result[0]
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
.collect())
}
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
/// Processes in 480-sample (10ms) frames. State persists across calls.
pub fn denoise(denoiser: &mut DenoiseState, pcm: &[i16]) -> Vec<i16> {
let frame_size = DenoiseState::FRAME_SIZE; // 480
let total = pcm.len();
let whole = (total / frame_size) * frame_size;
let mut output = Vec::with_capacity(total);
let mut out_buf = [0.0f32; 480];
for offset in (0..whole).step_by(frame_size) {
let input: Vec<f32> = pcm[offset..offset + frame_size]
.iter()
.map(|&s| s as f32)
.collect();
denoiser.process_frame(&mut out_buf, &input);
output.extend(
out_buf
.iter()
.map(|&s| s.round().clamp(-32768.0, 32767.0) as i16),
);
}
if whole < total {
output.extend_from_slice(&pcm[whole..]);
}
output
}
/// Transcode audio payload from one codec to another.
///
/// `direction`: `Some("to_sip")` or `Some("to_browser")` selects per-direction
/// denoiser. `None` skips denoising (backward compat).
pub fn transcode(
&mut self,
data: &[u8],
from_pt: u8,
to_pt: u8,
direction: Option<&str>,
) -> Result<Vec<u8>, String> {
if from_pt == to_pt {
return Ok(data.to_vec());
}
let (pcm, rate) = self.decode_to_pcm(data, from_pt)?;
let processed = if let Some(dir) = direction {
let pcm_48k = self.resample(&pcm, rate, 48000)?;
let denoiser = match dir {
"to_sip" => &mut self.denoiser_to_sip,
_ => &mut self.denoiser_to_browser,
};
let denoised = Self::denoise(denoiser, &pcm_48k);
let target_rate = codec_sample_rate(to_pt);
self.resample(&denoised, 48000, target_rate)?
} else {
let target_rate = codec_sample_rate(to_pt);
if rate == target_rate {
pcm
} else {
self.resample(&pcm, rate, target_rate)?
}
};
self.encode_from_pcm(&processed, to_pt)
}
/// Decode an encoded audio payload to raw 16-bit PCM samples.
/// Returns (samples, sample_rate).
pub fn decode_to_pcm(&mut self, data: &[u8], pt: u8) -> Result<(Vec<i16>, u32), String> {
match pt {
PT_OPUS => {
let mut pcm = vec![0i16; 5760]; // up to 120ms at 48kHz
let packet =
OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
let out =
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
let n: usize = self
.opus_dec
.decode(Some(packet), out, false)
.map_err(|e| format!("opus decode: {e}"))?
.into();
pcm.truncate(n);
Ok((pcm, 48000))
}
PT_G722 => {
let pcm = self.g722_dec.decode(data);
Ok((pcm, 16000))
}
PT_PCMU => {
let pcm: Vec<i16> = data.iter().map(|&b| mulaw_decode(b)).collect();
Ok((pcm, 8000))
}
PT_PCMA => {
let pcm: Vec<i16> = data.iter().map(|&b| alaw_decode(b)).collect();
Ok((pcm, 8000))
}
_ => Err(format!("unsupported source PT {pt}")),
}
}
/// Encode raw PCM samples to an audio codec.
pub fn encode_from_pcm(&mut self, pcm: &[i16], pt: u8) -> Result<Vec<u8>, String> {
match pt {
PT_OPUS => {
let mut buf = vec![0u8; 4000];
let n: usize = self
.opus_enc
.encode(pcm, &mut buf)
.map_err(|e| format!("opus encode: {e}"))?
.into();
buf.truncate(n);
Ok(buf)
}
PT_G722 => Ok(self.g722_enc.encode(pcm)),
PT_PCMU => Ok(pcm.iter().map(|&s| mulaw_encode(s)).collect()),
PT_PCMA => Ok(pcm.iter().map(|&s| alaw_encode(s)).collect()),
_ => Err(format!("unsupported target PT {pt}")),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn mulaw_roundtrip() {
for sample in [-32768i16, -1000, -1, 0, 1, 1000, 32767] {
let encoded = mulaw_encode(sample);
let decoded = mulaw_decode(encoded);
// µ-law is lossy; verify the decoded value is close.
assert!((sample as i32 - decoded as i32).abs() < 1000,
"µ-law roundtrip failed for {sample}: got {decoded}");
}
}
#[test]
fn alaw_roundtrip() {
for sample in [-32768i16, -1000, -1, 0, 1, 1000, 32767] {
let encoded = alaw_encode(sample);
let decoded = alaw_decode(encoded);
assert!((sample as i32 - decoded as i32).abs() < 1000,
"A-law roundtrip failed for {sample}: got {decoded}");
}
}
#[test]
fn codec_sample_rates() {
assert_eq!(codec_sample_rate(PT_OPUS), 48000);
assert_eq!(codec_sample_rate(PT_G722), 16000);
assert_eq!(codec_sample_rate(PT_PCMU), 8000);
assert_eq!(codec_sample_rate(PT_PCMA), 8000);
}
#[test]
fn transcode_same_pt_is_passthrough() {
let mut st = TranscodeState::new().unwrap();
let data = vec![0u8; 160];
let result = st.transcode(&data, PT_PCMU, PT_PCMU, None).unwrap();
assert_eq!(result, data);
}
#[test]
fn pcmu_to_pcma_roundtrip() {
let mut st = TranscodeState::new().unwrap();
// 160 bytes = 20ms of PCMU at 8kHz
let pcmu_data: Vec<u8> = (0..160).map(|i| mulaw_encode((i as i16 * 200) - 16000)).collect();
let pcma = st.transcode(&pcmu_data, PT_PCMU, PT_PCMA, None).unwrap();
assert_eq!(pcma.len(), 160); // Same frame size
let back = st.transcode(&pcma, PT_PCMA, PT_PCMU, None).unwrap();
assert_eq!(back.len(), 160);
}
}