initial commit — SIP B2BUA + WebRTC bridge with Rust codec engine

Full-featured SIP router with multi-provider trunking, browser softphone
via WebRTC, real-time Opus/G.722/PCM transcoding in Rust, RNNoise ML
noise suppression, Kokoro neural TTS announcements, and a Lit-based
web dashboard with live call monitoring and REST API.
This commit is contained in:
2026-04-09 23:03:55 +00:00
commit f3e1c96872
59 changed files with 18377 additions and 0 deletions

1971
rust/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

7
rust/Cargo.toml Normal file
View File

@@ -0,0 +1,7 @@
[workspace]
members = ["crates/opus-codec", "crates/tts-engine"]
resolver = "2"
[profile.release]
opt-level = 3
lto = true

View File

@@ -0,0 +1,17 @@
[package]
name = "opus-codec"
version = "0.2.0"
edition = "2021"
[[bin]]
name = "opus-codec"
path = "src/main.rs"
[dependencies]
audiopus = "0.3.0-rc.0"
ezk-g722 = "0.1"
rubato = "0.14"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
base64 = "0.22"
nnnoiseless = { version = "0.5", default-features = false }

View File

@@ -0,0 +1,464 @@
/// Audio transcoding bridge for smartrust.
///
/// Handles Opus ↔ G.722 ↔ PCMU transcoding for the SIP router.
/// Uses audiopus (libopus) for Opus and ezk-g722 (SpanDSP port) for G.722.
///
/// Supports per-session codec state so concurrent calls don't corrupt each
/// other's stateful codecs (Opus, G.722 ADPCM).
///
/// Protocol:
/// -> {"id":"1","method":"init","params":{}}
/// <- {"id":"1","success":true,"result":{}}
/// -> {"id":"2","method":"create_session","params":{"session_id":"call-abc"}}
/// <- {"id":"2","success":true,"result":{}}
/// -> {"id":"3","method":"transcode","params":{"session_id":"call-abc","data_b64":"...","from_pt":111,"to_pt":9}}
/// <- {"id":"3","success":true,"result":{"data_b64":"..."}}
/// -> {"id":"4","method":"destroy_session","params":{"session_id":"call-abc"}}
/// <- {"id":"4","success":true,"result":{}}
use audiopus::coder::{Decoder as OpusDecoder, Encoder as OpusEncoder};
use audiopus::packet::Packet as OpusPacket;
use audiopus::{Application, Bitrate as OpusBitrate, Channels, MutSignals, SampleRate};
use base64::engine::general_purpose::STANDARD as B64;
use base64::Engine as _;
use ezk_g722::libg722::{self, Bitrate};
use nnnoiseless::DenoiseState;
use rubato::{FftFixedIn, Resampler};
use serde::Deserialize;
use std::collections::HashMap;
use std::io::{self, BufRead, Write};
// Payload type constants.
const PT_PCMU: u8 = 0;
const PT_PCMA: u8 = 8;
const PT_G722: u8 = 9;
const PT_OPUS: u8 = 111;
#[derive(Deserialize)]
struct Request {
id: String,
method: String,
#[serde(default)]
params: serde_json::Value,
}
fn respond(out: &mut impl Write, id: &str, success: bool, result: Option<serde_json::Value>, error: Option<&str>) {
let mut resp = serde_json::json!({ "id": id, "success": success });
if let Some(r) = result { resp["result"] = r; }
if let Some(e) = error { resp["error"] = serde_json::Value::String(e.to_string()); }
let _ = writeln!(out, "{}", resp);
let _ = out.flush();
}
// ---------------------------------------------------------------------------
// Codec state
// ---------------------------------------------------------------------------
struct TranscodeState {
opus_enc: OpusEncoder,
opus_dec: OpusDecoder,
g722_enc: libg722::encoder::Encoder,
g722_dec: libg722::decoder::Decoder,
// Cached FFT resamplers keyed by (from_rate, to_rate, chunk_size).
resamplers: HashMap<(u32, u32, usize), FftFixedIn<f64>>,
// Per-direction ML noise suppression (RNNoise). Separate state per direction
// prevents the RNN hidden state from being corrupted by interleaved audio streams.
denoiser_to_sip: Box<DenoiseState<'static>>,
denoiser_to_browser: Box<DenoiseState<'static>>,
}
impl TranscodeState {
fn new() -> Result<Self, String> {
let mut opus_enc = OpusEncoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)
.map_err(|e| format!("opus encoder: {e}"))?;
// Telephony-grade tuning: complexity 5 is sufficient for voice bridged to G.722.
opus_enc.set_complexity(5).map_err(|e| format!("opus set_complexity: {e}"))?;
opus_enc.set_bitrate(OpusBitrate::BitsPerSecond(24000)).map_err(|e| format!("opus set_bitrate: {e}"))?;
let opus_dec = OpusDecoder::new(SampleRate::Hz48000, Channels::Mono)
.map_err(|e| format!("opus decoder: {e}"))?;
let g722_enc = libg722::encoder::Encoder::new(Bitrate::Mode1_64000, false, false);
let g722_dec = libg722::decoder::Decoder::new(Bitrate::Mode1_64000, false, false);
Ok(Self {
opus_enc, opus_dec, g722_enc, g722_dec,
resamplers: HashMap::new(),
denoiser_to_sip: DenoiseState::new(),
denoiser_to_browser: DenoiseState::new(),
})
}
/// High-quality sample rate conversion using rubato FFT resampler.
/// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
/// maintaining proper inter-frame state for continuous audio streams.
fn resample(&mut self, pcm: &[i16], from_rate: u32, to_rate: u32) -> Result<Vec<i16>, String> {
if from_rate == to_rate || pcm.is_empty() {
return Ok(pcm.to_vec());
}
let chunk = pcm.len();
let key = (from_rate, to_rate, chunk);
// Get or create cached resampler for this rate pair + chunk size.
if !self.resamplers.contains_key(&key) {
let r = FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
self.resamplers.insert(key, r);
}
let resampler = self.resamplers.get_mut(&key).unwrap();
// i16 → f64 normalized to [-1.0, 1.0]
let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
let input = vec![float_in];
let result = resampler.process(&input, None)
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
// f64 → i16
Ok(result[0].iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
.collect())
}
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
/// Processes in 480-sample (10ms) frames. State persists across calls.
fn denoise(denoiser: &mut DenoiseState, pcm: &[i16]) -> Vec<i16> {
let frame_size = DenoiseState::FRAME_SIZE; // 480
let total = pcm.len();
// Round down to whole frames — don't process partial frames to avoid
// injecting artificial silence into the RNN state.
let whole = (total / frame_size) * frame_size;
let mut output = Vec::with_capacity(total);
let mut out_buf = [0.0f32; 480];
for offset in (0..whole).step_by(frame_size) {
let input: Vec<f32> = pcm[offset..offset + frame_size]
.iter().map(|&s| s as f32).collect();
denoiser.process_frame(&mut out_buf, &input);
output.extend(out_buf.iter()
.map(|&s| s.round().clamp(-32768.0, 32767.0) as i16));
}
// Pass through any trailing partial-frame samples unmodified.
if whole < total {
output.extend_from_slice(&pcm[whole..]);
}
output
}
/// Transcode audio payload from one codec to another.
/// `direction`: "to_sip" or "to_browser" — selects the per-direction denoiser.
/// If None, denoising is skipped (backward compat).
fn transcode(&mut self, data: &[u8], from_pt: u8, to_pt: u8, direction: Option<&str>) -> Result<Vec<u8>, String> {
if from_pt == to_pt {
return Ok(data.to_vec());
}
// Decode to PCM (at source sample rate).
let (pcm, rate) = self.decode_to_pcm(data, from_pt)?;
// Apply noise suppression if direction is specified.
let processed = if let Some(dir) = direction {
// Resample to 48kHz for denoising (no-op when already 48kHz).
let pcm_48k = self.resample(&pcm, rate, 48000)?;
let denoiser = match dir {
"to_sip" => &mut self.denoiser_to_sip,
_ => &mut self.denoiser_to_browser,
};
let denoised = Self::denoise(denoiser, &pcm_48k);
// Resample to target rate (no-op when target is 48kHz).
let target_rate = codec_sample_rate(to_pt);
self.resample(&denoised, 48000, target_rate)?
} else {
// No denoising — direct resample.
let target_rate = codec_sample_rate(to_pt);
if rate == target_rate { pcm } else { self.resample(&pcm, rate, target_rate)? }
};
// Encode from PCM.
self.encode_from_pcm(&processed, to_pt)
}
fn decode_to_pcm(&mut self, data: &[u8], pt: u8) -> Result<(Vec<i16>, u32), String> {
match pt {
PT_OPUS => {
let mut pcm = vec![0i16; 5760]; // up to 120ms at 48kHz (RFC 6716 max)
let packet = OpusPacket::try_from(data)
.map_err(|e| format!("opus packet: {e}"))?;
let out = MutSignals::try_from(&mut pcm[..])
.map_err(|e| format!("opus signals: {e}"))?;
let n: usize = self.opus_dec.decode(Some(packet), out, false)
.map_err(|e| format!("opus decode: {e}"))?.into();
pcm.truncate(n);
Ok((pcm, 48000))
}
PT_G722 => {
let pcm = self.g722_dec.decode(data);
Ok((pcm, 16000))
}
PT_PCMU => {
let pcm: Vec<i16> = data.iter().map(|&b| mulaw_decode(b)).collect();
Ok((pcm, 8000))
}
PT_PCMA => {
let pcm: Vec<i16> = data.iter().map(|&b| alaw_decode(b)).collect();
Ok((pcm, 8000))
}
_ => Err(format!("unsupported source PT {pt}")),
}
}
fn encode_from_pcm(&mut self, pcm: &[i16], pt: u8) -> Result<Vec<u8>, String> {
match pt {
PT_OPUS => {
let mut buf = vec![0u8; 4000];
let n: usize = self.opus_enc.encode(pcm, &mut buf)
.map_err(|e| format!("opus encode: {e}"))?.into();
buf.truncate(n);
Ok(buf)
}
PT_G722 => {
Ok(self.g722_enc.encode(pcm))
}
PT_PCMU => {
Ok(pcm.iter().map(|&s| mulaw_encode(s)).collect())
}
PT_PCMA => {
Ok(pcm.iter().map(|&s| alaw_encode(s)).collect())
}
_ => Err(format!("unsupported target PT {pt}")),
}
}
}
fn codec_sample_rate(pt: u8) -> u32 {
match pt {
PT_OPUS => 48000,
PT_G722 => 16000,
_ => 8000, // PCMU, PCMA
}
}
// ---------------------------------------------------------------------------
// G.711 µ-law (PCMU)
// ---------------------------------------------------------------------------
fn mulaw_encode(sample: i16) -> u8 {
const BIAS: i16 = 0x84;
const CLIP: i16 = 32635;
let sign = if sample < 0 { 0x80u8 } else { 0 };
// Use i32 to avoid overflow when sample == i16::MIN (-32768).
let mut s = (sample as i32).unsigned_abs().min(CLIP as u32) as i16;
s += BIAS;
let mut exp = 7u8;
let mut mask = 0x4000i16;
while exp > 0 && (s & mask) == 0 { exp -= 1; mask >>= 1; }
let mantissa = ((s >> (exp + 3)) & 0x0f) as u8;
!(sign | (exp << 4) | mantissa)
}
fn mulaw_decode(mulaw: u8) -> i16 {
let v = !mulaw;
let sign = v & 0x80;
let exp = (v >> 4) & 0x07;
let mantissa = v & 0x0f;
let mut sample = (((mantissa as i16) << 4) + 0x84) << exp;
sample -= 0x84;
if sign != 0 { -sample } else { sample }
}
// ---------------------------------------------------------------------------
// G.711 A-law (PCMA)
// ---------------------------------------------------------------------------
fn alaw_encode(sample: i16) -> u8 {
let sign = if sample >= 0 { 0x80u8 } else { 0 };
// Use i32 to avoid overflow when sample == i16::MIN (-32768).
let s = (sample as i32).unsigned_abs().min(32767) as i16;
let mut exp = 7u8;
let mut mask = 0x4000i16;
while exp > 0 && (s & mask) == 0 { exp -= 1; mask >>= 1; }
let mantissa = if exp > 0 { ((s >> (exp + 3)) & 0x0f) as u8 } else { ((s >> 4) & 0x0f) as u8 };
(sign | (exp << 4) | mantissa) ^ 0x55
}
fn alaw_decode(alaw: u8) -> i16 {
let v = alaw ^ 0x55;
let sign = v & 0x80;
let exp = (v >> 4) & 0x07;
let mantissa = v & 0x0f;
let sample = if exp == 0 {
((mantissa as i16) << 4) + 8
} else {
(((mantissa as i16) << 4) + 0x108) << (exp - 1)
};
if sign != 0 { sample } else { -sample }
}
// ---------------------------------------------------------------------------
// Main loop
// ---------------------------------------------------------------------------
/// Resolve a session: if session_id is provided, look it up in the sessions map;
/// otherwise fall back to the default state (backward compat with `init`).
fn get_session<'a>(
sessions: &'a mut HashMap<String, TranscodeState>,
default: &'a mut Option<TranscodeState>,
params: &serde_json::Value,
) -> Option<&'a mut TranscodeState> {
if let Some(sid) = params.get("session_id").and_then(|v| v.as_str()) {
sessions.get_mut(sid)
} else {
default.as_mut()
}
}
fn main() {
let stdin = io::stdin();
let stdout = io::stdout();
let mut out = io::BufWriter::new(stdout.lock());
let _ = writeln!(out, r#"{{"event":"ready","data":{{}}}}"#);
let _ = out.flush();
// Default state for backward-compat `init` (no session_id).
let mut default_state: Option<TranscodeState> = None;
// Per-session codec state for concurrent call isolation.
let mut sessions: HashMap<String, TranscodeState> = HashMap::new();
for line in stdin.lock().lines() {
let line = match line {
Ok(l) if !l.trim().is_empty() => l,
Ok(_) => continue,
Err(_) => break,
};
let req: Request = match serde_json::from_str(&line) {
Ok(r) => r,
Err(e) => {
respond(&mut out, "", false, None, Some(&format!("parse: {e}")));
continue;
}
};
match req.method.as_str() {
// Backward-compat: init the default (shared) session.
"init" => {
match TranscodeState::new() {
Ok(s) => {
default_state = Some(s);
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
}
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
}
}
// Create an isolated session with its own codec state.
"create_session" => {
let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond(&mut out, &req.id, false, None, Some("missing session_id")); continue; }
};
if sessions.contains_key(&session_id) {
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
continue;
}
match TranscodeState::new() {
Ok(s) => {
sessions.insert(session_id, s);
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
}
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
}
}
// Destroy a session, freeing its codec state.
"destroy_session" => {
let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
Some(s) => s,
None => { respond(&mut out, &req.id, false, None, Some("missing session_id")); continue; }
};
sessions.remove(session_id);
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
}
// Transcode: uses session_id if provided, else default state.
"transcode" => {
let st = match get_session(&mut sessions, &mut default_state, &req.params) {
Some(s) => s,
None => { respond(&mut out, &req.id, false, None, Some("not initialized (no session or default state)")); continue; }
};
let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
Some(s) => s,
None => { respond(&mut out, &req.id, false, None, Some("missing data_b64")); continue; }
};
let from_pt = req.params.get("from_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
let direction = req.params.get("direction").and_then(|v| v.as_str());
let data = match B64.decode(data_b64) {
Ok(b) => b,
Err(e) => { respond(&mut out, &req.id, false, None, Some(&format!("b64: {e}"))); continue; }
};
match st.transcode(&data, from_pt, to_pt, direction) {
Ok(result) => {
respond(&mut out, &req.id, true, Some(serde_json::json!({ "data_b64": B64.encode(&result) })), None);
}
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
}
}
// Encode raw 16-bit PCM to a target codec.
// Params: data_b64 (raw PCM bytes, 16-bit LE), sample_rate (input Hz), to_pt
// Optional: session_id for isolated codec state.
"encode_pcm" => {
let st = match get_session(&mut sessions, &mut default_state, &req.params) {
Some(s) => s,
None => { respond(&mut out, &req.id, false, None, Some("not initialized (no session or default state)")); continue; }
};
let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
Some(s) => s,
None => { respond(&mut out, &req.id, false, None, Some("missing data_b64")); continue; }
};
let sample_rate = req.params.get("sample_rate").and_then(|v| v.as_u64()).unwrap_or(22050) as u32;
let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(9) as u8;
let data = match B64.decode(data_b64) {
Ok(b) => b,
Err(e) => { respond(&mut out, &req.id, false, None, Some(&format!("b64: {e}"))); continue; }
};
if data.len() % 2 != 0 {
respond(&mut out, &req.id, false, None, Some("PCM data has odd byte count (expected 16-bit LE samples)"));
continue;
}
// Convert raw bytes to i16 samples.
let pcm: Vec<i16> = data.chunks_exact(2)
.map(|c| i16::from_le_bytes([c[0], c[1]]))
.collect();
// Resample to target codec's sample rate.
let target_rate = codec_sample_rate(to_pt);
let resampled = match st.resample(&pcm, sample_rate, target_rate) {
Ok(r) => r,
Err(e) => { respond(&mut out, &req.id, false, None, Some(&e)); continue; }
};
// Encode to target codec (reuse encode_from_pcm).
match st.encode_from_pcm(&resampled, to_pt) {
Ok(encoded) => {
respond(&mut out, &req.id, true, Some(serde_json::json!({ "data_b64": B64.encode(&encoded) })), None);
}
Err(e) => { respond(&mut out, &req.id, false, None, Some(&e)); continue; }
}
}
// Legacy commands (kept for backward compat).
"encode" | "decode" => {
respond(&mut out, &req.id, false, None, Some("use 'transcode' command instead"));
}
_ => respond(&mut out, &req.id, false, None, Some(&format!("unknown: {}", req.method))),
}
}
}

View File

@@ -0,0 +1,18 @@
[package]
name = "tts-engine"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "tts-engine"
path = "src/main.rs"
[dependencies]
kokoro-tts = { version = "0.3", default-features = false }
# Pin to rc.11 matching kokoro-tts's expectation; enable vendored TLS to avoid system libssl-dev.
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
"std", "download-binaries", "copy-dylibs", "ndarray",
"tls-native-vendored"
] }
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
hound = "3.5"

View File

@@ -0,0 +1,149 @@
/// TTS engine CLI — synthesizes text to a WAV file using Kokoro neural TTS.
///
/// Usage:
/// echo "Hello world" | tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav
/// tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav --text "Hello world"
///
/// Outputs 24kHz 16-bit mono WAV.
use kokoro_tts::{KokoroTts, Voice};
use std::io::{self, Read};
fn parse_args() -> Result<(String, String, String, String, Option<String>), String> {
let args: Vec<String> = std::env::args().collect();
let mut model = String::new();
let mut voices = String::new();
let mut output = String::new();
let mut text: Option<String> = None;
let mut voice_name: Option<String> = None;
let mut i = 1;
while i < args.len() {
match args[i].as_str() {
"--model" => { i += 1; model = args.get(i).cloned().unwrap_or_default(); }
"--voices" => { i += 1; voices = args.get(i).cloned().unwrap_or_default(); }
"--output" | "--output_file" => { i += 1; output = args.get(i).cloned().unwrap_or_default(); }
"--text" => { i += 1; text = args.get(i).cloned(); }
"--voice" => { i += 1; voice_name = args.get(i).cloned(); }
_ => {}
}
i += 1;
}
if model.is_empty() { return Err("--model required".into()); }
if voices.is_empty() { return Err("--voices required".into()); }
if output.is_empty() { return Err("--output required".into()); }
let voice_str = voice_name.unwrap_or_else(|| "af_bella".into());
Ok((model, voices, output, voice_str, text))
}
fn select_voice(name: &str) -> Voice {
match name {
"af_bella" => Voice::AfBella(1.0),
"af_heart" => Voice::AfHeart(1.0),
"af_jessica" => Voice::AfJessica(1.0),
"af_nicole" => Voice::AfNicole(1.0),
"af_nova" => Voice::AfNova(1.0),
"af_sarah" => Voice::AfSarah(1.0),
"af_sky" => Voice::AfSky(1.0),
"af_river" => Voice::AfRiver(1.0),
"af_alloy" => Voice::AfAlloy(1.0),
"af_aoede" => Voice::AfAoede(1.0),
"af_kore" => Voice::AfKore(1.0),
"am_adam" => Voice::AmAdam(1.0),
"am_echo" => Voice::AmEcho(1.0),
"am_eric" => Voice::AmEric(1.0),
"am_fenrir" => Voice::AmFenrir(1.0),
"am_liam" => Voice::AmLiam(1.0),
"am_michael" => Voice::AmMichael(1.0),
"am_onyx" => Voice::AmOnyx(1.0),
"am_puck" => Voice::AmPuck(1.0),
"bf_alice" => Voice::BfAlice(1.0),
"bf_emma" => Voice::BfEmma(1.0),
"bf_isabella" => Voice::BfIsabella(1.0),
"bf_lily" => Voice::BfLily(1.0),
"bm_daniel" => Voice::BmDaniel(1.0),
"bm_fable" => Voice::BmFable(1.0),
"bm_george" => Voice::BmGeorge(1.0),
"bm_lewis" => Voice::BmLewis(1.0),
_ => {
eprintln!("[tts-engine] unknown voice '{}', falling back to af_bella", name);
Voice::AfBella(1.0)
}
}
}
#[tokio::main]
async fn main() {
let (model_path, voices_path, output_path, voice_name, text_arg) = match parse_args() {
Ok(v) => v,
Err(e) => {
eprintln!("Error: {}", e);
eprintln!("Usage: tts-engine --model <model.onnx> --voices <voices.bin> --output <output.wav> [--text <text>] [--voice <voice_name>]");
std::process::exit(1);
}
};
// Get text from --text arg or stdin.
let text = match text_arg {
Some(t) => t,
None => {
let mut buf = String::new();
io::stdin().read_to_string(&mut buf).expect("failed to read stdin");
buf.trim().to_string()
}
};
if text.is_empty() {
eprintln!("[tts-engine] no text provided");
std::process::exit(1);
}
eprintln!("[tts-engine] loading model: {}", model_path);
let tts = match KokoroTts::new(&model_path, &voices_path).await {
Ok(t) => t,
Err(e) => {
eprintln!("[tts-engine] failed to load model: {:?}", e);
std::process::exit(1);
}
};
let voice = select_voice(&voice_name);
eprintln!("[tts-engine] synthesizing with voice '{}': \"{}\"", voice_name, text);
let (samples, duration) = match tts.synth(&text, voice).await {
Ok(r) => r,
Err(e) => {
eprintln!("[tts-engine] synthesis failed: {:?}", e);
std::process::exit(1);
}
};
eprintln!("[tts-engine] synthesized {} samples in {:?}", samples.len(), duration);
// Write WAV: 24kHz, 16-bit, mono (same format announcement.ts expects).
let spec = hound::WavSpec {
channels: 1,
sample_rate: 24000,
bits_per_sample: 16,
sample_format: hound::SampleFormat::Int,
};
let mut writer = match hound::WavWriter::create(&output_path, spec) {
Ok(w) => w,
Err(e) => {
eprintln!("[tts-engine] failed to create WAV: {}", e);
std::process::exit(1);
}
};
for &sample in &samples {
let s16 = (sample * 32767.0).round().clamp(-32768.0, 32767.0) as i16;
writer.write_sample(s16).unwrap();
}
writer.finalize().unwrap();
eprintln!("[tts-engine] wrote {}", output_path);
}