diff --git a/changelog.md b/changelog.md index 6647a06..0e5f0cc 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,13 @@ # Changelog +## 2026-04-12 - 1.22.0 - feat(proxy-engine) +add on-demand TTS caching for voicemail and IVR prompts + +- Route inbound calls directly to configured IVR menus and track them with a dedicated IVR call state +- Generate voicemail greetings and IVR menu prompts inside the Rust proxy engine on demand instead of precomputing prompts in TypeScript +- Add cacheable TTS output with sidecar metadata and enable Kokoro CMUdict support for improved prompt generation +- Extend proxy configuration to include voiceboxes and IVR menus, and update documentation to reflect Kokoro-only prompt generation + ## 2026-04-11 - 1.21.0 - feat(providers) replace provider creation modal with a guided multi-step setup flow diff --git a/package.json b/package.json index d6e9555..e7e199e 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "restartBackground": "pnpm run buildRust && pnpm run bundle; test -f .server.pid && kill $(cat .server.pid) 2>/dev/null; sleep 1; rm -f sip_trace.log proxy.out && nohup tsx ts/sipproxy.ts > proxy.out 2>&1 & echo $! > .server.pid; sleep 2; cat proxy.out" }, "dependencies": { - "@design.estate/dees-catalog": "^3.71.1", + "@design.estate/dees-catalog": "^3.77.0", "@design.estate/dees-element": "^2.2.4", "@push.rocks/smartrust": "^1.3.2", "@push.rocks/smartstate": "^2.3.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5c43a5a..f4319d6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,8 +9,8 @@ importers: .: dependencies: '@design.estate/dees-catalog': - specifier: ^3.71.1 - version: 3.71.1(@tiptap/pm@2.27.2) + specifier: ^3.77.0 + version: 3.77.0(@tiptap/pm@2.27.2) '@design.estate/dees-element': specifier: ^2.2.4 version: 2.2.4 @@ -81,8 +81,8 @@ packages: '@configvault.io/interfaces@1.0.17': resolution: {integrity: sha512-bEcCUR2VBDJsTin8HQh8Uw/mlYl2v8A3jMIaQ+MTB9Hrqd6CZL2dL7iJdWyFl/3EIX+LDxWFR+Oq7liIq7w+1Q==} - '@design.estate/dees-catalog@3.71.1': - resolution: {integrity: sha512-aZzykaAtKqlBalwISF+u8mtJu37ZVLzt5IjxGA/FdL9dBurTA0O2Z6delvJsj6G/RvUUMO9sFdcFJ7NUe8BcVw==} + '@design.estate/dees-catalog@3.77.0': + resolution: {integrity: sha512-2IfvH390WXCF733XcmEcUP9skqogTz9xlqQw5PUJZy0u2Hf6+hJTyQOi4mcKmhpTE/kCpaD51uw21Lr4ncW6cg==} '@design.estate/dees-comms@1.0.30': resolution: {integrity: sha512-KchMlklJfKAjQiJiR0xmofXtQ27VgZtBIxcMwPE9d+h3jJRv+lPZxzBQVOM0eyM0uS44S5vJMZ11IeV4uDXSHg==} @@ -93,8 +93,8 @@ packages: '@design.estate/dees-element@2.2.4': resolution: {integrity: sha512-O9cA6flBMMd+pBwMQrZXwAWel9yVxgokolb+Em6gvkXxPJ0P/B5UDn4Vc2d4ts3ta55PTBm+l2dPeDVGx/bl7Q==} - '@design.estate/dees-wcctools@3.8.0': - resolution: {integrity: sha512-CC14iVKUrguzD9jIrdPBd9fZ4egVJEZMxl5y8iy0l7WLumeoYvGsoXj5INVkRPLRVLqziIdi4Je1hXqHt2NU+g==} + '@design.estate/dees-wcctools@3.8.4': + resolution: {integrity: sha512-KpFK/azK+a/Xpq33pXKcho+tdFKVHhKZM5ArvHqo9QMwTczgp5DZZgowTDUuqAofjZwnuVfCPHK/Pw9e64N46A==} '@emnapi/core@1.9.2': resolution: {integrity: sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==} @@ -1694,8 +1694,8 @@ packages: resolution: {integrity: sha512-JvNw9Y81y33E+BEYPr0U7omo+U9AySnsMsEiXgwT6yqd31VQWTLNQqmT4ou5eqPFUrTfIDFta2wKhB1hyohtAQ==} engines: {node: 20 || >=22} - lucide@0.577.0: - resolution: {integrity: sha512-PpC/m5eOItp/WU/GlQPFBXDOhq6HibL73KzYP37OX3LM7VmzWQF8voEj8QRWUFvy9FIKfeDQkWYoyS1D/MdWFA==} + lucide@1.8.0: + resolution: {integrity: sha512-JjV/QnadgFLj1Pyu9IKl0lknrolFEzo04B64QcYLLeRzZl/iEHpdbSrRRKbyXcv45SZNv+WGjIUCT33e7xHO6Q==} make-dir@3.1.0: resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==} @@ -2462,7 +2462,7 @@ snapshots: '@api.global/typedrequest-interfaces': 3.0.19 '@api.global/typedsocket': 4.1.2(@push.rocks/smartserve@2.0.3) '@cloudflare/workers-types': 4.20260409.1 - '@design.estate/dees-catalog': 3.71.1(@tiptap/pm@2.27.2) + '@design.estate/dees-catalog': 3.77.0(@tiptap/pm@2.27.2) '@design.estate/dees-comms': 1.0.30 '@push.rocks/lik': 6.4.0 '@push.rocks/smartdelay': 3.0.5 @@ -2529,11 +2529,11 @@ snapshots: dependencies: '@api.global/typedrequest-interfaces': 3.0.19 - '@design.estate/dees-catalog@3.71.1(@tiptap/pm@2.27.2)': + '@design.estate/dees-catalog@3.77.0(@tiptap/pm@2.27.2)': dependencies: '@design.estate/dees-domtools': 2.5.4 '@design.estate/dees-element': 2.2.4 - '@design.estate/dees-wcctools': 3.8.0 + '@design.estate/dees-wcctools': 3.8.4 '@fortawesome/fontawesome-svg-core': 7.2.0 '@fortawesome/free-brands-svg-icons': 7.2.0 '@fortawesome/free-regular-svg-icons': 7.2.0 @@ -2553,7 +2553,7 @@ snapshots: highlight.js: 11.11.1 ibantools: 4.5.4 lightweight-charts: 5.1.0 - lucide: 0.577.0 + lucide: 1.8.0 monaco-editor: 0.55.1 pdfjs-dist: 4.10.38 xterm: 5.3.0 @@ -2610,7 +2610,7 @@ snapshots: - supports-color - vue - '@design.estate/dees-wcctools@3.8.0': + '@design.estate/dees-wcctools@3.8.4': dependencies: '@design.estate/dees-domtools': 2.5.4 '@design.estate/dees-element': 2.2.4 @@ -4487,7 +4487,7 @@ snapshots: lru-cache@11.3.3: {} - lucide@0.577.0: {} + lucide@1.8.0: {} make-dir@3.1.0: dependencies: diff --git a/readme.md b/readme.md index d343fe2..f8ef986 100644 --- a/readme.md +++ b/readme.md @@ -20,7 +20,7 @@ siprouter sits between your SIP trunk providers and your endpoints — hardware - 🎯 **Adaptive Jitter Buffer** — Per-leg jitter buffering with sequence-based reordering, adaptive depth (60–120ms), Opus PLC for lost packets, and hold/resume detection - 📧 **Voicemail** — Configurable voicemail boxes with TTS greetings, recording, and web playback - 🔢 **IVR Menus** — DTMF-navigable interactive voice response with nested menus, routing actions, and custom prompts -- 🗣️ **Neural TTS** — Kokoro-powered announcements and greetings with 25+ voice presets, backed by espeak-ng fallback +- 🗣️ **Neural TTS** — Kokoro-powered greetings and IVR prompts with 25+ voice presets - 🎙️ **Call Recording** — Per-source separated WAV recording at 48kHz via tool legs - 🖥️ **Web Dashboard** — Real-time SPA with 9 views: live calls, browser phone, routing, voicemail, IVR, contacts, providers, and streaming logs @@ -98,7 +98,6 @@ sequenceDiagram - **Node.js** ≥ 20 with `tsx` globally available - **pnpm** for package management - **Rust** toolchain (for building the proxy engine) -- **espeak-ng** (optional, for TTS fallback) ### Install & Build @@ -190,7 +189,7 @@ Create `.nogit/config.json`: ### TTS Setup (Optional) -For neural announcements and voicemail greetings, download the Kokoro TTS model: +For neural voicemail greetings and IVR prompts, download the Kokoro TTS model: ```bash mkdir -p .nogit/tts @@ -200,7 +199,7 @@ curl -L -o .nogit/tts/voices.bin \ https://github.com/mzdk100/kokoro/releases/download/V1.0/voices.bin ``` -Without the model files, TTS falls back to `espeak-ng`. Without either, announcements are skipped — everything else works fine. +Without the model files, TTS prompts (IVR menus, voicemail greetings) are skipped — everything else works fine. ### Run @@ -227,7 +226,6 @@ siprouter/ │ ├── frontend.ts # Web dashboard HTTP/WS server + REST API │ ├── webrtcbridge.ts # WebRTC signaling layer │ ├── registrar.ts # Browser softphone registration -│ ├── announcement.ts # TTS announcement generator (espeak-ng / Kokoro) │ ├── voicebox.ts # Voicemail box management │ └── call/ │ └── prompt-cache.ts # Named audio prompt WAV management @@ -288,13 +286,12 @@ flowchart LR ## 🗣️ Neural TTS -Announcements and voicemail greetings are synthesized using [Kokoro TTS](https://github.com/mzdk100/kokoro) — an 82M parameter neural model running via ONNX Runtime directly in the Rust process: +Voicemail greetings and IVR prompts are synthesized using [Kokoro TTS](https://github.com/mzdk100/kokoro) — an 82M parameter neural model running via ONNX Runtime directly in the Rust process: - **24 kHz, 16-bit mono** output - **25+ voice presets** — American/British, male/female (e.g., `af_bella`, `am_adam`, `bf_emma`, `bm_george`) - **~800ms** synthesis time for a 3-second phrase - Lazy-loaded on first use — no startup cost if TTS is unused -- Falls back to `espeak-ng` if the ONNX model is not available --- diff --git a/rust/Cargo.lock b/rust/Cargo.lock index a7e2ca5..d547fc3 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -532,6 +532,15 @@ dependencies = [ "cc", ] +[[package]] +name = "cmudict-fast" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9f73004e928ed46c3e7fd7406d2b12c8674153295f08af084b49860276dc02" +dependencies = [ + "thiserror", +] + [[package]] name = "codec-lib" version = "0.1.0" @@ -1730,6 +1739,7 @@ dependencies = [ "bincode 2.0.1", "cc", "chinese-number", + "cmudict-fast", "futures", "jieba-rs", "log", diff --git a/rust/crates/proxy-engine/Cargo.toml b/rust/crates/proxy-engine/Cargo.toml index 8c0c59e..fce00d0 100644 --- a/rust/crates/proxy-engine/Cargo.toml +++ b/rust/crates/proxy-engine/Cargo.toml @@ -19,7 +19,7 @@ regex-lite = "0.1" webrtc = "0.8" rand = "0.8" hound = "3.5" -kokoro-tts = { version = "0.3", default-features = false } +kokoro-tts = { version = "0.3", default-features = false, features = ["use-cmudict"] } ort = { version = "=2.0.0-rc.11", default-features = false, features = [ "std", "download-binaries", "copy-dylibs", "ndarray", "tls-native-vendored" diff --git a/rust/crates/proxy-engine/src/call.rs b/rust/crates/proxy-engine/src/call.rs index b035105..292c052 100644 --- a/rust/crates/proxy-engine/src/call.rs +++ b/rust/crates/proxy-engine/src/call.rs @@ -23,6 +23,7 @@ pub enum CallState { Ringing, Connected, Voicemail, + Ivr, Terminated, } @@ -37,6 +38,7 @@ impl CallState { Self::Ringing => "ringing", Self::Connected => "connected", Self::Voicemail => "voicemail", + Self::Ivr => "ivr", Self::Terminated => "terminated", } } diff --git a/rust/crates/proxy-engine/src/call_manager.rs b/rust/crates/proxy-engine/src/call_manager.rs index 5504fa1..806121c 100644 --- a/rust/crates/proxy-engine/src/call_manager.rs +++ b/rust/crates/proxy-engine/src/call_manager.rs @@ -12,13 +12,16 @@ use crate::mixer::spawn_mixer; use crate::registrar::Registrar; use crate::rtp::RtpPortPool; use crate::sip_leg::{SipLeg, SipLegAction, SipLegConfig}; +use crate::tts::TtsEngine; use sip_proto::helpers::{build_sdp, generate_call_id, generate_tag, parse_sdp_endpoint, SdpOptions}; use sip_proto::message::{ResponseOptions, SipMessage}; use sip_proto::rewrite::{rewrite_sdp, rewrite_sip_uri}; use std::collections::HashMap; use std::net::SocketAddr; +use std::path::Path; use std::sync::Arc; use tokio::net::UdpSocket; +use tokio::sync::Mutex; /// Result of creating an inbound call — carries both the call id and /// whether browsers should be notified (flows from the matched inbound @@ -681,6 +684,7 @@ impl CallManager { rtp_pool: &mut RtpPortPool, socket: &UdpSocket, public_ip: Option<&str>, + tts_engine: Arc>, ) -> Option { let call_id = self.next_call_id(); let lan_ip = &config.proxy.lan_ip; @@ -710,10 +714,27 @@ impl CallManager { // - `ring_browsers` is informational only — browsers see a toast but // do not race the SIP device. First-to-answer-wins requires a // multi-leg fork + per-leg CANCEL, which is not built yet. - // - `voicemail_box`, `ivr_menu_id`, `no_answer_timeout` are not honored. let route = config.resolve_inbound_route(provider_id, &called_number, &caller_number); let ring_browsers = route.ring_browsers; + // IVR routing: if the route targets an IVR menu, go there directly. + if let Some(ref ivr_menu_id) = route.ivr_menu_id { + if let Some(ivr) = &config.ivr { + if ivr.enabled { + if let Some(menu) = ivr.menus.iter().find(|m| m.id == *ivr_menu_id) { + let call_id = self + .route_to_ivr( + &call_id, invite, from_addr, &caller_number, + provider_id, provider_config, config, rtp_pool, socket, + public_ip, menu, &tts_engine, + ) + .await?; + return Some(InboundCallCreated { call_id, ring_browsers }); + } + } + } + } + // Pick the first registered device from the matched targets, or fall // back to any-registered-device if the route has no resolved targets. let device_addr = route @@ -726,10 +747,17 @@ impl CallManager { Some(addr) => addr, None => { // No device registered → voicemail. + // Resolve greeting WAV on-demand (may trigger TTS generation). + let greeting_wav = resolve_greeting_wav( + config, + route.voicemail_box.as_deref(), + &tts_engine, + ).await; let call_id = self .route_to_voicemail( &call_id, invite, from_addr, &caller_number, provider_id, provider_config, config, rtp_pool, socket, public_ip, + greeting_wav, ) .await?; return Some(InboundCallCreated { call_id, ring_browsers }); @@ -1536,6 +1564,7 @@ impl CallManager { rtp_pool: &mut RtpPortPool, socket: &UdpSocket, public_ip: Option<&str>, + greeting_wav: Option, ) -> Option { let lan_ip = &config.proxy.lan_ip; let pub_ip = public_ip.unwrap_or(lan_ip.as_str()); @@ -1630,8 +1659,6 @@ impl CallManager { .as_millis(); let recording_dir = "nogit/voicemail/default".to_string(); let recording_path = format!("{recording_dir}/msg-{timestamp}.wav"); - let greeting_wav = find_greeting_wav(); - let out_tx = self.out_tx.clone(); let call_id_owned = call_id.to_string(); let caller_owned = caller_number.to_string(); @@ -1648,6 +1675,211 @@ impl CallManager { Some(call_id.to_string()) } + // ----------------------------------------------------------------------- + // IVR routing + // ----------------------------------------------------------------------- + + #[allow(clippy::too_many_arguments)] + async fn route_to_ivr( + &mut self, + call_id: &str, + invite: &SipMessage, + from_addr: SocketAddr, + caller_number: &str, + provider_id: &str, + provider_config: &ProviderConfig, + config: &AppConfig, + rtp_pool: &mut RtpPortPool, + socket: &UdpSocket, + public_ip: Option<&str>, + menu: &crate::config::IvrMenuConfig, + tts_engine: &Arc>, + ) -> Option { + let lan_ip = &config.proxy.lan_ip; + + let rtp_alloc = match rtp_pool.allocate().await { + Some(a) => a, + None => { + let resp = SipMessage::create_response(503, "Service Unavailable", invite, None); + let _ = socket.send_to(&resp.serialize(), from_addr).await; + return None; + } + }; + + let codec_pt = provider_config.codecs.first().copied().unwrap_or(9); + let pub_ip = public_ip.unwrap_or(lan_ip.as_str()); + + let sdp = sip_proto::helpers::build_sdp(&sip_proto::helpers::SdpOptions { + ip: pub_ip, + port: rtp_alloc.port, + payload_types: &provider_config.codecs, + ..Default::default() + }); + + let response = SipMessage::create_response( + 200, "OK", invite, + Some(sip_proto::message::ResponseOptions { + to_tag: Some(sip_proto::helpers::generate_tag()), + contact: Some(format!("", lan_ip, config.proxy.lan_port)), + body: Some(sdp), + content_type: Some("application/sdp".to_string()), + ..Default::default() + }), + ); + let _ = socket.send_to(&response.serialize(), from_addr).await; + + let provider_media = if invite.has_sdp_body() { + parse_sdp_endpoint(&invite.body) + .and_then(|ep| format!("{}:{}", ep.address, ep.port).parse().ok()) + } else { + Some(from_addr) + }; + let provider_media = provider_media.unwrap_or(from_addr); + + // Create call with IVR state. + let (mixer_cmd_tx, mixer_task) = spawn_mixer(call_id.to_string(), self.out_tx.clone()); + let mut call = Call::new( + call_id.to_string(), + CallDirection::Inbound, + provider_id.to_string(), + mixer_cmd_tx.clone(), + mixer_task, + ); + call.state = CallState::Ivr; + call.caller_number = Some(caller_number.to_string()); + + let provider_leg_id = format!("{call_id}-prov"); + call.legs.insert( + provider_leg_id.clone(), + LegInfo { + id: provider_leg_id.clone(), + kind: LegKind::SipProvider, + state: LegState::Connected, + codec_pt, + sip_leg: None, + sip_call_id: Some(invite.call_id().to_string()), + webrtc_session_id: None, + rtp_socket: Some(rtp_alloc.socket.clone()), + rtp_port: rtp_alloc.port, + public_ip: public_ip.map(|s| s.to_string()), + remote_media: Some(provider_media), + signaling_addr: Some(from_addr), + metadata: HashMap::new(), + }, + ); + + self.sip_index.insert( + invite.call_id().to_string(), + (call_id.to_string(), provider_leg_id.clone()), + ); + self.calls.insert(call_id.to_string(), call); + + // Emit leg_added for the provider leg. + if let Some(call) = self.calls.get(call_id) { + for leg in call.legs.values() { + emit_leg_added_event(&self.out_tx, call_id, leg); + } + } + + // Generate IVR prompt on-demand via TTS (cached). + let voice = menu.prompt_voice.as_deref().unwrap_or("af_bella"); + let prompt_output = format!(".nogit/tts/ivr-menu-{}.wav", menu.id); + let prompt_params = serde_json::json!({ + "model": ".nogit/tts/kokoro-v1.0.onnx", + "voices": ".nogit/tts/voices.bin", + "voice": voice, + "text": &menu.prompt_text, + "output": &prompt_output, + "cacheable": true, + }); + + let prompt_wav = { + let mut tts = tts_engine.lock().await; + match tts.generate(&prompt_params).await { + Ok(_) => Some(prompt_output), + Err(e) => { + eprintln!("[ivr] TTS generation failed: {e}"); + None + } + } + }; + + // Load prompt and run interaction via the mixer. + let out_tx = self.out_tx.clone(); + let call_id_owned = call_id.to_string(); + let expected_digits: Vec = menu + .entries + .iter() + .filter_map(|e| e.digit.chars().next()) + .collect(); + let timeout_ms = menu.timeout_sec.unwrap_or(5) * 1000; + + tokio::spawn(async move { + // Load prompt PCM frames if available. + let prompt_frames = prompt_wav.as_ref().and_then(|wav| { + crate::audio_player::load_prompt_pcm_frames(wav).ok() + }); + + if let Some(frames) = prompt_frames { + let (result_tx, result_rx) = tokio::sync::oneshot::channel(); + let _ = mixer_cmd_tx + .send(crate::mixer::MixerCommand::StartInteraction { + leg_id: provider_leg_id.clone(), + prompt_pcm_frames: frames, + expected_digits: expected_digits.clone(), + timeout_ms, + result_tx, + }) + .await; + + // Wait for digit or timeout. + let safety = tokio::time::Duration::from_millis(timeout_ms as u64 + 30000); + let result = match tokio::time::timeout(safety, result_rx).await { + Ok(Ok(r)) => r, + Ok(Err(_)) => crate::mixer::InteractionResult::Cancelled, + Err(_) => crate::mixer::InteractionResult::Timeout, + }; + + match &result { + crate::mixer::InteractionResult::Digit(d) => { + eprintln!("[ivr] caller pressed '{d}' on call {call_id_owned}"); + emit_event( + &out_tx, + "ivr_digit", + serde_json::json!({ + "call_id": call_id_owned, + "digit": d.to_string(), + }), + ); + } + crate::mixer::InteractionResult::Timeout => { + eprintln!("[ivr] timeout on call {call_id_owned}"); + emit_event( + &out_tx, + "ivr_timeout", + serde_json::json!({ "call_id": call_id_owned }), + ); + } + crate::mixer::InteractionResult::Cancelled => { + eprintln!("[ivr] cancelled on call {call_id_owned}"); + } + } + } else { + eprintln!("[ivr] no prompt available for call {call_id_owned}, ending"); + emit_event( + &out_tx, + "ivr_error", + serde_json::json!({ + "call_id": call_id_owned, + "error": "no prompt available", + }), + ); + } + }); + + Some(call_id.to_string()) + } + // ----------------------------------------------------------------------- // Internal helpers // ----------------------------------------------------------------------- @@ -1662,13 +1894,56 @@ impl CallManager { } } -fn find_greeting_wav() -> Option { - let candidates = [ +/// Resolve the greeting WAV for a voicemail box. +/// +/// Priority: +/// 1. Pre-recorded WAV from voicebox config (`greetingWavPath`) +/// 2. On-demand TTS generation from greeting text (cached via `cacheable: true`) +/// 3. Legacy hardcoded paths (`.nogit/voicemail/default/greeting.wav`, etc.) +/// 4. None — voicemail session plays beep only +async fn resolve_greeting_wav( + config: &AppConfig, + voicebox_id: Option<&str>, + tts_engine: &Arc>, +) -> Option { + // 1. Look up voicebox config. + let vb = voicebox_id + .and_then(|id| config.voiceboxes.iter().find(|v| v.id == id && v.enabled)); + + if let Some(vb) = vb { + // 2. Pre-recorded WAV takes priority. + if let Some(ref wav) = vb.greeting_wav_path { + if Path::new(wav).exists() { + return Some(wav.clone()); + } + } + // 3. TTS on-demand with caching. + let text = vb.greeting_text.as_deref().unwrap_or( + "The person you are trying to reach is not available. Please leave a message after the tone.", + ); + let voice = vb.greeting_voice.as_deref().unwrap_or("af_bella"); + let output = format!(".nogit/tts/voicemail-greeting-{}.wav", vb.id); + + let params = serde_json::json!({ + "model": ".nogit/tts/kokoro-v1.0.onnx", + "voices": ".nogit/tts/voices.bin", + "voice": voice, + "text": text, + "output": &output, + "cacheable": true, + }); + let mut tts = tts_engine.lock().await; + if tts.generate(¶ms).await.is_ok() { + return Some(output); + } + } + + // 4. Fallback: legacy hardcoded paths. + for path in &[ ".nogit/voicemail/default/greeting.wav", ".nogit/voicemail/greeting.wav", - ]; - for path in &candidates { - if std::path::Path::new(path).exists() { + ] { + if Path::new(path).exists() { return Some(path.to_string()); } } diff --git a/rust/crates/proxy-engine/src/config.rs b/rust/crates/proxy-engine/src/config.rs index c341bc5..f9c1dd1 100644 --- a/rust/crates/proxy-engine/src/config.rs +++ b/rust/crates/proxy-engine/src/config.rs @@ -159,6 +159,10 @@ pub struct AppConfig { pub providers: Vec, pub devices: Vec, pub routing: RoutingConfig, + #[serde(default)] + pub voiceboxes: Vec, + #[serde(default)] + pub ivr: Option, } #[derive(Debug, Clone, Deserialize)] @@ -166,6 +170,59 @@ pub struct RoutingConfig { pub routes: Vec, } +// --------------------------------------------------------------------------- +// Voicebox config +// --------------------------------------------------------------------------- + +#[allow(dead_code)] +#[derive(Debug, Clone, Deserialize)] +pub struct VoiceboxConfig { + pub id: String, + #[serde(default)] + pub enabled: bool, + #[serde(rename = "greetingText")] + pub greeting_text: Option, + #[serde(rename = "greetingVoice")] + pub greeting_voice: Option, + #[serde(rename = "greetingWavPath")] + pub greeting_wav_path: Option, + #[serde(rename = "maxRecordingSec")] + pub max_recording_sec: Option, +} + +// --------------------------------------------------------------------------- +// IVR config +// --------------------------------------------------------------------------- + +#[allow(dead_code)] +#[derive(Debug, Clone, Deserialize)] +pub struct IvrConfig { + pub enabled: bool, + pub menus: Vec, + #[serde(rename = "entryMenuId")] + pub entry_menu_id: String, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct IvrMenuConfig { + pub id: String, + #[serde(rename = "promptText")] + pub prompt_text: String, + #[serde(rename = "promptVoice")] + pub prompt_voice: Option, + pub entries: Vec, + #[serde(rename = "timeoutSec")] + pub timeout_sec: Option, +} + +#[allow(dead_code)] +#[derive(Debug, Clone, Deserialize)] +pub struct IvrMenuEntry { + pub digit: String, + pub action: String, + pub target: Option, +} + // --------------------------------------------------------------------------- // Pattern matching (ported from ts/config.ts) // --------------------------------------------------------------------------- diff --git a/rust/crates/proxy-engine/src/main.rs b/rust/crates/proxy-engine/src/main.rs index 6f34109..4fb4b3e 100644 --- a/rust/crates/proxy-engine/src/main.rs +++ b/rust/crates/proxy-engine/src/main.rs @@ -50,11 +50,12 @@ struct ProxyEngine { registrar: Registrar, call_mgr: CallManager, rtp_pool: Option, + tts_engine: Arc>, out_tx: OutTx, } impl ProxyEngine { - fn new(out_tx: OutTx) -> Self { + fn new(out_tx: OutTx, tts_engine: Arc>) -> Self { Self { config: None, transport: None, @@ -62,6 +63,7 @@ impl ProxyEngine { registrar: Registrar::new(out_tx.clone()), call_mgr: CallManager::new(out_tx.clone()), rtp_pool: None, + tts_engine, out_tx, } } @@ -88,15 +90,16 @@ async fn main() { // Emit ready event. emit_event(&out_tx, "ready", serde_json::json!({})); - // Shared engine state (SIP side). - let engine = Arc::new(Mutex::new(ProxyEngine::new(out_tx.clone()))); + // TTS engine — separate internal lock, lazy-loads model on first use. + let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new())); + + // Shared engine state (SIP side). TTS engine is stored inside so the + // SIP packet handler path can reach it for on-demand voicemail/IVR generation. + let engine = Arc::new(Mutex::new(ProxyEngine::new(out_tx.clone(), tts_engine))); // WebRTC engine — separate lock to avoid deadlock with SIP handlers. let webrtc = Arc::new(Mutex::new(WebRtcEngine::new(out_tx.clone()))); - // TTS engine — separate lock, lazy-loads model on first use. - let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new())); - // Read commands from stdin. let stdin = tokio::io::stdin(); let reader = BufReader::new(stdin); @@ -117,12 +120,11 @@ async fn main() { let engine = engine.clone(); let webrtc = webrtc.clone(); - let tts_engine = tts_engine.clone(); let out_tx = out_tx.clone(); // Handle commands — some are async, so we spawn. tokio::spawn(async move { - handle_command(engine, webrtc, tts_engine, &out_tx, cmd).await; + handle_command(engine, webrtc, &out_tx, cmd).await; }); } } @@ -130,7 +132,6 @@ async fn main() { async fn handle_command( engine: Arc>, webrtc: Arc>, - tts_engine: Arc>, out_tx: &OutTx, cmd: Command, ) { @@ -155,8 +156,8 @@ async fn handle_command( "add_tool_leg" => handle_add_tool_leg(engine, out_tx, &cmd).await, "remove_tool_leg" => handle_remove_tool_leg(engine, out_tx, &cmd).await, "set_leg_metadata" => handle_set_leg_metadata(engine, out_tx, &cmd).await, - // TTS command — lock tts_engine only (no SIP/WebRTC contention). - "generate_tts" => handle_generate_tts(tts_engine, out_tx, &cmd).await, + // TTS command — gets tts_engine from inside ProxyEngine. + "generate_tts" => handle_generate_tts(engine, out_tx, &cmd).await, _ => respond_err(out_tx, &cmd.id, &format!("unknown command: {}", cmd.method)), } } @@ -325,8 +326,10 @@ async fn handle_sip_packet( ref registrar, ref mut call_mgr, ref mut rtp_pool, + ref tts_engine, .. } = *eng; + let tts_clone = tts_engine.clone(); let rtp_pool = rtp_pool.as_mut().unwrap(); let inbound = call_mgr .create_inbound_call( @@ -339,6 +342,7 @@ async fn handle_sip_packet( rtp_pool, socket, public_ip.as_deref(), + tts_clone, ) .await; @@ -1231,10 +1235,11 @@ async fn handle_set_leg_metadata( /// Handle `generate_tts` — synthesize text to a WAV file using Kokoro TTS. async fn handle_generate_tts( - tts_engine: Arc>, + engine: Arc>, out_tx: &OutTx, cmd: &Command, ) { + let tts_engine = engine.lock().await.tts_engine.clone(); let mut tts = tts_engine.lock().await; match tts.generate(&cmd.params).await { Ok(result) => respond_ok(out_tx, &cmd.id, result), diff --git a/rust/crates/proxy-engine/src/tts.rs b/rust/crates/proxy-engine/src/tts.rs index 02d8f17..93c0562 100644 --- a/rust/crates/proxy-engine/src/tts.rs +++ b/rust/crates/proxy-engine/src/tts.rs @@ -1,8 +1,13 @@ //! Text-to-speech engine — synthesizes text to WAV files using Kokoro neural TTS. //! //! The model is loaded lazily on first use. If the model/voices files are not -//! present, the generate command returns an error and the TS side falls back -//! to espeak-ng. +//! present, the generate command returns an error and the caller skips the prompt. +//! +//! Caching is handled internally via a `.meta` sidecar file next to each WAV. +//! When `cacheable` is true, the engine checks whether the existing WAV was +//! generated from the same text+voice; if so it returns immediately (cache hit). +//! Callers never need to check for cached files — that is entirely this module's +//! responsibility. use kokoro_tts::{KokoroTts, Voice}; use std::path::Path; @@ -32,6 +37,8 @@ impl TtsEngine { /// - `voice`: voice name (e.g. "af_bella") /// - `text`: text to synthesize /// - `output`: output WAV file path + /// - `cacheable`: if true, skip synthesis when the output WAV already + /// matches the same text+voice (checked via a `.meta` sidecar file) pub async fn generate(&mut self, params: &serde_json::Value) -> Result { let model_path = params.get("model").and_then(|v| v.as_str()) .ok_or("missing 'model' param")?; @@ -43,11 +50,19 @@ impl TtsEngine { .ok_or("missing 'text' param")?; let output_path = params.get("output").and_then(|v| v.as_str()) .ok_or("missing 'output' param")?; + let cacheable = params.get("cacheable").and_then(|v| v.as_bool()) + .unwrap_or(false); if text.is_empty() { return Err("empty text".into()); } + // Cache check: if cacheable and the sidecar matches, return immediately. + if cacheable && self.is_cache_hit(output_path, text, voice_name) { + eprintln!("[tts] cache hit: {output_path}"); + return Ok(serde_json::json!({ "output": output_path })); + } + // Check that model/voices files exist. if !Path::new(model_path).exists() { return Err(format!("model not found: {model_path}")); @@ -56,6 +71,11 @@ impl TtsEngine { return Err(format!("voices not found: {voices_path}")); } + // Ensure parent directory exists. + if let Some(parent) = Path::new(output_path).parent() { + let _ = std::fs::create_dir_all(parent); + } + // Lazy-load or reload if paths changed. if self.tts.is_none() || self.loaded_model_path != model_path @@ -95,9 +115,41 @@ impl TtsEngine { } writer.finalize().map_err(|e| format!("WAV finalize: {e}"))?; + // Write sidecar for future cache checks. + if cacheable { + self.write_cache_meta(output_path, text, voice_name); + } + eprintln!("[tts] wrote {output_path}"); Ok(serde_json::json!({ "output": output_path })) } + + // ----------------------------------------------------------------------- + // Cache helpers + // ----------------------------------------------------------------------- + + /// Check if the WAV + sidecar on disk match the given text+voice. + fn is_cache_hit(&self, output_path: &str, text: &str, voice: &str) -> bool { + let meta_path = format!("{output_path}.meta"); + if !Path::new(output_path).exists() || !Path::new(&meta_path).exists() { + return false; + } + match std::fs::read_to_string(&meta_path) { + Ok(contents) => contents == Self::cache_key(text, voice), + Err(_) => false, + } + } + + /// Write the sidecar `.meta` file next to the WAV. + fn write_cache_meta(&self, output_path: &str, text: &str, voice: &str) { + let meta_path = format!("{output_path}.meta"); + let _ = std::fs::write(&meta_path, Self::cache_key(text, voice)); + } + + /// Build the cache key from text + voice. + fn cache_key(text: &str, voice: &str) -> String { + format!("{}\0{}", text, voice) + } } /// Map voice name string to Kokoro Voice enum variant. diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index 3a5e209..7c6514f 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: 'siprouter', - version: '1.21.0', + version: '1.22.0', description: 'undefined' } diff --git a/ts/announcement.ts b/ts/announcement.ts deleted file mode 100644 index effca62..0000000 --- a/ts/announcement.ts +++ /dev/null @@ -1,137 +0,0 @@ -/** - * TTS announcement module — generates announcement WAV files at startup. - * - * Engine priority: espeak-ng (formant TTS, fast) → Kokoro neural TTS via - * proxy-engine → disabled. - * - * The generated WAV is left on disk for Rust's audio_player / start_interaction - * to play during calls. No encoding or RTP playback happens in TypeScript. - */ - -import { execSync } from 'node:child_process'; -import fs from 'node:fs'; -import path from 'node:path'; -import { sendProxyCommand, isProxyReady } from './proxybridge.ts'; - -// --------------------------------------------------------------------------- -// State -// --------------------------------------------------------------------------- - -const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts'); -const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now."; -const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav'); - -// Kokoro fallback constants. -const KOKORO_MODEL = 'kokoro-v1.0.onnx'; -const KOKORO_VOICES = 'voices.bin'; -const KOKORO_VOICE = 'af_bella'; - -// --------------------------------------------------------------------------- -// TTS generators -// --------------------------------------------------------------------------- - -/** Check if espeak-ng is available on the system. */ -function isEspeakAvailable(): boolean { - try { - execSync('which espeak-ng', { stdio: 'pipe' }); - return true; - } catch { - return false; - } -} - -/** Generate announcement WAV via espeak-ng (primary engine). */ -function generateViaEspeak(wavPath: string, text: string, log: (msg: string) => void): boolean { - log('[tts] generating announcement audio via espeak-ng...'); - try { - execSync( - `espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`, - { timeout: 10000, stdio: 'pipe' }, - ); - log('[tts] espeak-ng WAV generated'); - return true; - } catch (e: any) { - log(`[tts] espeak-ng failed: ${e.message}`); - return false; - } -} - -/** Generate announcement WAV via Kokoro TTS (fallback, runs inside proxy-engine). */ -async function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): Promise { - const modelPath = path.join(TTS_DIR, KOKORO_MODEL); - const voicesPath = path.join(TTS_DIR, KOKORO_VOICES); - - if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) { - log('[tts] Kokoro model/voices not found — Kokoro fallback unavailable'); - return false; - } - - if (!isProxyReady()) { - log('[tts] proxy-engine not ready — Kokoro fallback unavailable'); - return false; - } - - log('[tts] generating announcement audio via Kokoro TTS (fallback)...'); - try { - await sendProxyCommand('generate_tts', { - model: modelPath, - voices: voicesPath, - voice: KOKORO_VOICE, - text, - output: wavPath, - }); - log('[tts] Kokoro WAV generated (via proxy-engine)'); - return true; - } catch (e: any) { - log(`[tts] Kokoro failed: ${e.message}`); - return false; - } -} - -// --------------------------------------------------------------------------- -// Initialization -// --------------------------------------------------------------------------- - -/** - * Pre-generate the announcement WAV file. - * Must be called after the proxy engine is initialized. - * - * Engine priority: espeak-ng → Kokoro → disabled. - */ -export async function initAnnouncement(log: (msg: string) => void): Promise { - fs.mkdirSync(TTS_DIR, { recursive: true }); - - try { - if (!fs.existsSync(CACHE_WAV)) { - let generated = false; - - // Try espeak-ng first. - if (isEspeakAvailable()) { - generated = generateViaEspeak(CACHE_WAV, ANNOUNCEMENT_TEXT, log); - } else { - log('[tts] espeak-ng not installed — trying Kokoro fallback'); - } - - // Fall back to Kokoro (via proxy-engine). - if (!generated) { - generated = await generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log); - } - - if (!generated) { - log('[tts] no TTS engine available — announcements disabled'); - return false; - } - } - - log('[tts] announcement WAV ready'); - return true; - } catch (e: any) { - log(`[tts] init error: ${e.message}`); - return false; - } -} - -/** Get the path to the cached announcement WAV, or null if not generated. */ -export function getAnnouncementWavPath(): string | null { - return fs.existsSync(CACHE_WAV) ? CACHE_WAV : null; -} diff --git a/ts/call/prompt-cache.ts b/ts/call/prompt-cache.ts deleted file mode 100644 index 4d9dfa2..0000000 --- a/ts/call/prompt-cache.ts +++ /dev/null @@ -1,275 +0,0 @@ -/** - * PromptCache — manages named audio prompt WAV files for IVR and voicemail. - * - * Generates WAV files via espeak-ng (primary) or Kokoro TTS through the - * proxy-engine (fallback). Also supports loading pre-existing WAV files - * and programmatic tone generation. - * - * All audio playback happens in Rust (audio_player / start_interaction). - * This module only manages WAV files on disk. - */ - -import { execSync } from 'node:child_process'; -import fs from 'node:fs'; -import path from 'node:path'; -import { Buffer } from 'node:buffer'; -import { sendProxyCommand, isProxyReady } from '../proxybridge.ts'; - -// --------------------------------------------------------------------------- -// Types -// --------------------------------------------------------------------------- - -/** A cached prompt — just a WAV file path and metadata. */ -export interface ICachedPrompt { - /** Unique prompt identifier. */ - id: string; - /** Path to the WAV file on disk. */ - wavPath: string; - /** Total duration in milliseconds (approximate, from WAV header). */ - durationMs: number; -} - -// --------------------------------------------------------------------------- -// TTS helpers -// --------------------------------------------------------------------------- - -const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts'); - -/** Check if espeak-ng is available. */ -function isEspeakAvailable(): boolean { - try { - execSync('which espeak-ng', { stdio: 'pipe' }); - return true; - } catch { - return false; - } -} - -/** Generate WAV via espeak-ng. */ -function generateViaEspeak(wavPath: string, text: string): boolean { - try { - execSync( - `espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`, - { timeout: 10000, stdio: 'pipe' }, - ); - return true; - } catch { - return false; - } -} - -/** Generate WAV via Kokoro TTS (runs inside proxy-engine). */ -async function generateViaKokoro(wavPath: string, text: string, voice: string): Promise { - const modelPath = path.join(TTS_DIR, 'kokoro-v1.0.onnx'); - const voicesPath = path.join(TTS_DIR, 'voices.bin'); - if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) return false; - if (!isProxyReady()) return false; - - try { - await sendProxyCommand('generate_tts', { - model: modelPath, - voices: voicesPath, - voice, - text, - output: wavPath, - }); - return true; - } catch { - return false; - } -} - -/** Read a WAV file's duration from its header. */ -function getWavDurationMs(wavPath: string): number { - try { - const wav = fs.readFileSync(wavPath); - if (wav.length < 44) return 0; - if (wav.toString('ascii', 0, 4) !== 'RIFF') return 0; - - let sampleRate = 16000; - let dataSize = 0; - let bitsPerSample = 16; - let channels = 1; - let offset = 12; - - while (offset < wav.length - 8) { - const chunkId = wav.toString('ascii', offset, offset + 4); - const chunkSize = wav.readUInt32LE(offset + 4); - if (chunkId === 'fmt ') { - channels = wav.readUInt16LE(offset + 10); - sampleRate = wav.readUInt32LE(offset + 12); - bitsPerSample = wav.readUInt16LE(offset + 22); - } - if (chunkId === 'data') { - dataSize = chunkSize; - } - offset += 8 + chunkSize; - if (offset % 2 !== 0) offset++; - } - - const bytesPerSample = (bitsPerSample / 8) * channels; - const totalSamples = bytesPerSample > 0 ? dataSize / bytesPerSample : 0; - return sampleRate > 0 ? Math.round((totalSamples / sampleRate) * 1000) : 0; - } catch { - return 0; - } -} - -// --------------------------------------------------------------------------- -// PromptCache -// --------------------------------------------------------------------------- - -export class PromptCache { - private prompts = new Map(); - private log: (msg: string) => void; - private espeakAvailable: boolean | null = null; - - constructor(log: (msg: string) => void) { - this.log = log; - } - - // ------------------------------------------------------------------------- - // Public API - // ------------------------------------------------------------------------- - - /** Get a cached prompt by ID. */ - get(id: string): ICachedPrompt | null { - return this.prompts.get(id) ?? null; - } - - /** Check if a prompt is cached. */ - has(id: string): boolean { - return this.prompts.has(id); - } - - /** List all cached prompt IDs. */ - listIds(): string[] { - return [...this.prompts.keys()]; - } - - /** - * Generate a TTS prompt WAV and cache its path. - * Uses espeak-ng (primary) or Kokoro (fallback). - */ - async generatePrompt(id: string, text: string, voice = 'af_bella'): Promise { - fs.mkdirSync(TTS_DIR, { recursive: true }); - const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`); - - // Check espeak availability once. - if (this.espeakAvailable === null) { - this.espeakAvailable = isEspeakAvailable(); - } - - // Generate WAV if not already on disk. - if (!fs.existsSync(wavPath)) { - let generated = false; - if (this.espeakAvailable) { - generated = generateViaEspeak(wavPath, text); - } - if (!generated) { - generated = await generateViaKokoro(wavPath, text, voice); - } - if (!generated) { - this.log(`[prompt-cache] failed to generate TTS for "${id}"`); - return null; - } - this.log(`[prompt-cache] generated WAV for "${id}"`); - } - - return this.registerWav(id, wavPath); - } - - /** - * Load a pre-existing WAV file as a prompt. - */ - async loadWavPrompt(id: string, wavPath: string): Promise { - if (!fs.existsSync(wavPath)) { - this.log(`[prompt-cache] WAV not found: ${wavPath}`); - return null; - } - return this.registerWav(id, wavPath); - } - - /** - * Generate a beep tone WAV and cache it. - */ - async generateBeep( - id: string, - freqHz = 1000, - durationMs = 500, - amplitude = 8000, - ): Promise { - fs.mkdirSync(TTS_DIR, { recursive: true }); - const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`); - - if (!fs.existsSync(wavPath)) { - // Generate 16kHz 16-bit mono sine wave WAV. - const sampleRate = 16000; - const totalSamples = Math.floor((sampleRate * durationMs) / 1000); - const pcm = Buffer.alloc(totalSamples * 2); - - for (let i = 0; i < totalSamples; i++) { - const t = i / sampleRate; - const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade - let envelope = 1.0; - if (i < fadeLen) envelope = i / fadeLen; - else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen; - - const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope); - pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2); - } - - // Write WAV file. - const headerSize = 44; - const dataSize = pcm.length; - const wav = Buffer.alloc(headerSize + dataSize); - - // RIFF header - wav.write('RIFF', 0); - wav.writeUInt32LE(36 + dataSize, 4); - wav.write('WAVE', 8); - - // fmt chunk - wav.write('fmt ', 12); - wav.writeUInt32LE(16, 16); // chunk size - wav.writeUInt16LE(1, 20); // PCM format - wav.writeUInt16LE(1, 22); // mono - wav.writeUInt32LE(sampleRate, 24); - wav.writeUInt32LE(sampleRate * 2, 28); // byte rate - wav.writeUInt16LE(2, 32); // block align - wav.writeUInt16LE(16, 34); // bits per sample - - // data chunk - wav.write('data', 36); - wav.writeUInt32LE(dataSize, 40); - pcm.copy(wav, 44); - - fs.writeFileSync(wavPath, wav); - this.log(`[prompt-cache] beep WAV generated for "${id}"`); - } - - return this.registerWav(id, wavPath); - } - - /** Remove a prompt from the cache. */ - remove(id: string): void { - this.prompts.delete(id); - } - - /** Clear all cached prompts. */ - clear(): void { - this.prompts.clear(); - } - - // ------------------------------------------------------------------------- - // Internal - // ------------------------------------------------------------------------- - - private registerWav(id: string, wavPath: string): ICachedPrompt { - const durationMs = getWavDurationMs(wavPath); - const prompt: ICachedPrompt = { id, wavPath, durationMs }; - this.prompts.set(id, prompt); - this.log(`[prompt-cache] cached "${id}": ${wavPath} (${(durationMs / 1000).toFixed(1)}s)`); - return prompt; - } -} diff --git a/ts/proxybridge.ts b/ts/proxybridge.ts index 2c15863..82aa477 100644 --- a/ts/proxybridge.ts +++ b/ts/proxybridge.ts @@ -88,7 +88,7 @@ type TProxyCommands = { result: Record; }; generate_tts: { - params: { model: string; voices: string; voice: string; text: string; output: string }; + params: { model: string; voices: string; voice: string; text: string; output: string; cacheable?: boolean }; result: { output: string }; }; // WebRTC signaling — bridged from the browser via the TS control plane. diff --git a/ts/sipproxy.ts b/ts/sipproxy.ts index 7769004..1abbd36 100644 --- a/ts/sipproxy.ts +++ b/ts/sipproxy.ts @@ -24,8 +24,6 @@ import { getAllBrowserDeviceIds, getBrowserDeviceWs, } from './webrtcbridge.ts'; -import { initAnnouncement } from './announcement.ts'; -import { PromptCache } from './call/prompt-cache.ts'; import { VoiceboxManager } from './voicebox.ts'; import { initProxyEngine, @@ -170,7 +168,6 @@ for (const d of appConfig.devices) { // Initialize subsystems // --------------------------------------------------------------------------- -const promptCache = new PromptCache(log); const voiceboxManager = new VoiceboxManager(log); voiceboxManager.init(appConfig.voiceboxes ?? []); @@ -519,6 +516,8 @@ async function startProxyEngine(): Promise { providers: appConfig.providers, devices: appConfig.devices, routing: appConfig.routing, + voiceboxes: appConfig.voiceboxes ?? [], + ivr: appConfig.ivr, }); if (!configured) { @@ -530,31 +529,8 @@ async function startProxyEngine(): Promise { const deviceList = appConfig.devices.map((d) => d.displayName).join(', '); log(`proxy engine started | LAN ${appConfig.proxy.lanIp}:${appConfig.proxy.lanPort} | providers: ${providerList} | devices: ${deviceList}`); - // Generate TTS audio (WAV files on disk, played by Rust audio_player). - try { - await initAnnouncement(log); - - // Pre-generate prompts. - await promptCache.generateBeep('voicemail-beep', 1000, 500, 8000); - for (const vb of appConfig.voiceboxes ?? []) { - if (!vb.enabled) continue; - const promptId = `voicemail-greeting-${vb.id}`; - if (vb.greetingWavPath) { - await promptCache.loadWavPrompt(promptId, vb.greetingWavPath); - } else { - const text = vb.greetingText || 'The person you are trying to reach is not available. Please leave a message after the tone.'; - await promptCache.generatePrompt(promptId, text, vb.greetingVoice || 'af_bella'); - } - } - if (appConfig.ivr?.enabled) { - for (const menu of appConfig.ivr.menus) { - await promptCache.generatePrompt(`ivr-menu-${menu.id}`, menu.promptText, menu.promptVoice || 'af_bella'); - } - } - log(`[startup] prompts cached: ${promptCache.listIds().join(', ') || 'none'}`); - } catch (e) { - log(`[tts] init failed: ${e}`); - } + // TTS prompts (voicemail greetings, IVR menus) are generated on-demand + // by the Rust TTS engine when first needed. No startup pre-generation. } // --------------------------------------------------------------------------- @@ -620,6 +596,8 @@ initWebUi( providers: fresh.providers, devices: fresh.devices, routing: fresh.routing, + voiceboxes: fresh.voiceboxes ?? [], + ivr: fresh.ivr, }).then((ok) => { if (ok) log('[config] reloaded — proxy engine reconfigured'); else log('[config] reload failed — proxy engine rejected config'); diff --git a/ts_web/00_commitinfo_data.ts b/ts_web/00_commitinfo_data.ts index 3a5e209..7c6514f 100644 --- a/ts_web/00_commitinfo_data.ts +++ b/ts_web/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: 'siprouter', - version: '1.21.0', + version: '1.22.0', description: 'undefined' }