feat(proxy-engine): add on-demand TTS caching for voicemail and IVR prompts
This commit is contained in:
@@ -1,5 +1,13 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2026-04-12 - 1.22.0 - feat(proxy-engine)
|
||||||
|
add on-demand TTS caching for voicemail and IVR prompts
|
||||||
|
|
||||||
|
- Route inbound calls directly to configured IVR menus and track them with a dedicated IVR call state
|
||||||
|
- Generate voicemail greetings and IVR menu prompts inside the Rust proxy engine on demand instead of precomputing prompts in TypeScript
|
||||||
|
- Add cacheable TTS output with sidecar metadata and enable Kokoro CMUdict support for improved prompt generation
|
||||||
|
- Extend proxy configuration to include voiceboxes and IVR menus, and update documentation to reflect Kokoro-only prompt generation
|
||||||
|
|
||||||
## 2026-04-11 - 1.21.0 - feat(providers)
|
## 2026-04-11 - 1.21.0 - feat(providers)
|
||||||
replace provider creation modal with a guided multi-step setup flow
|
replace provider creation modal with a guided multi-step setup flow
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
"restartBackground": "pnpm run buildRust && pnpm run bundle; test -f .server.pid && kill $(cat .server.pid) 2>/dev/null; sleep 1; rm -f sip_trace.log proxy.out && nohup tsx ts/sipproxy.ts > proxy.out 2>&1 & echo $! > .server.pid; sleep 2; cat proxy.out"
|
"restartBackground": "pnpm run buildRust && pnpm run bundle; test -f .server.pid && kill $(cat .server.pid) 2>/dev/null; sleep 1; rm -f sip_trace.log proxy.out && nohup tsx ts/sipproxy.ts > proxy.out 2>&1 & echo $! > .server.pid; sleep 2; cat proxy.out"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@design.estate/dees-catalog": "^3.71.1",
|
"@design.estate/dees-catalog": "^3.77.0",
|
||||||
"@design.estate/dees-element": "^2.2.4",
|
"@design.estate/dees-element": "^2.2.4",
|
||||||
"@push.rocks/smartrust": "^1.3.2",
|
"@push.rocks/smartrust": "^1.3.2",
|
||||||
"@push.rocks/smartstate": "^2.3.0",
|
"@push.rocks/smartstate": "^2.3.0",
|
||||||
|
|||||||
28
pnpm-lock.yaml
generated
28
pnpm-lock.yaml
generated
@@ -9,8 +9,8 @@ importers:
|
|||||||
.:
|
.:
|
||||||
dependencies:
|
dependencies:
|
||||||
'@design.estate/dees-catalog':
|
'@design.estate/dees-catalog':
|
||||||
specifier: ^3.71.1
|
specifier: ^3.77.0
|
||||||
version: 3.71.1(@tiptap/pm@2.27.2)
|
version: 3.77.0(@tiptap/pm@2.27.2)
|
||||||
'@design.estate/dees-element':
|
'@design.estate/dees-element':
|
||||||
specifier: ^2.2.4
|
specifier: ^2.2.4
|
||||||
version: 2.2.4
|
version: 2.2.4
|
||||||
@@ -81,8 +81,8 @@ packages:
|
|||||||
'@configvault.io/interfaces@1.0.17':
|
'@configvault.io/interfaces@1.0.17':
|
||||||
resolution: {integrity: sha512-bEcCUR2VBDJsTin8HQh8Uw/mlYl2v8A3jMIaQ+MTB9Hrqd6CZL2dL7iJdWyFl/3EIX+LDxWFR+Oq7liIq7w+1Q==}
|
resolution: {integrity: sha512-bEcCUR2VBDJsTin8HQh8Uw/mlYl2v8A3jMIaQ+MTB9Hrqd6CZL2dL7iJdWyFl/3EIX+LDxWFR+Oq7liIq7w+1Q==}
|
||||||
|
|
||||||
'@design.estate/dees-catalog@3.71.1':
|
'@design.estate/dees-catalog@3.77.0':
|
||||||
resolution: {integrity: sha512-aZzykaAtKqlBalwISF+u8mtJu37ZVLzt5IjxGA/FdL9dBurTA0O2Z6delvJsj6G/RvUUMO9sFdcFJ7NUe8BcVw==}
|
resolution: {integrity: sha512-2IfvH390WXCF733XcmEcUP9skqogTz9xlqQw5PUJZy0u2Hf6+hJTyQOi4mcKmhpTE/kCpaD51uw21Lr4ncW6cg==}
|
||||||
|
|
||||||
'@design.estate/dees-comms@1.0.30':
|
'@design.estate/dees-comms@1.0.30':
|
||||||
resolution: {integrity: sha512-KchMlklJfKAjQiJiR0xmofXtQ27VgZtBIxcMwPE9d+h3jJRv+lPZxzBQVOM0eyM0uS44S5vJMZ11IeV4uDXSHg==}
|
resolution: {integrity: sha512-KchMlklJfKAjQiJiR0xmofXtQ27VgZtBIxcMwPE9d+h3jJRv+lPZxzBQVOM0eyM0uS44S5vJMZ11IeV4uDXSHg==}
|
||||||
@@ -93,8 +93,8 @@ packages:
|
|||||||
'@design.estate/dees-element@2.2.4':
|
'@design.estate/dees-element@2.2.4':
|
||||||
resolution: {integrity: sha512-O9cA6flBMMd+pBwMQrZXwAWel9yVxgokolb+Em6gvkXxPJ0P/B5UDn4Vc2d4ts3ta55PTBm+l2dPeDVGx/bl7Q==}
|
resolution: {integrity: sha512-O9cA6flBMMd+pBwMQrZXwAWel9yVxgokolb+Em6gvkXxPJ0P/B5UDn4Vc2d4ts3ta55PTBm+l2dPeDVGx/bl7Q==}
|
||||||
|
|
||||||
'@design.estate/dees-wcctools@3.8.0':
|
'@design.estate/dees-wcctools@3.8.4':
|
||||||
resolution: {integrity: sha512-CC14iVKUrguzD9jIrdPBd9fZ4egVJEZMxl5y8iy0l7WLumeoYvGsoXj5INVkRPLRVLqziIdi4Je1hXqHt2NU+g==}
|
resolution: {integrity: sha512-KpFK/azK+a/Xpq33pXKcho+tdFKVHhKZM5ArvHqo9QMwTczgp5DZZgowTDUuqAofjZwnuVfCPHK/Pw9e64N46A==}
|
||||||
|
|
||||||
'@emnapi/core@1.9.2':
|
'@emnapi/core@1.9.2':
|
||||||
resolution: {integrity: sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==}
|
resolution: {integrity: sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==}
|
||||||
@@ -1694,8 +1694,8 @@ packages:
|
|||||||
resolution: {integrity: sha512-JvNw9Y81y33E+BEYPr0U7omo+U9AySnsMsEiXgwT6yqd31VQWTLNQqmT4ou5eqPFUrTfIDFta2wKhB1hyohtAQ==}
|
resolution: {integrity: sha512-JvNw9Y81y33E+BEYPr0U7omo+U9AySnsMsEiXgwT6yqd31VQWTLNQqmT4ou5eqPFUrTfIDFta2wKhB1hyohtAQ==}
|
||||||
engines: {node: 20 || >=22}
|
engines: {node: 20 || >=22}
|
||||||
|
|
||||||
lucide@0.577.0:
|
lucide@1.8.0:
|
||||||
resolution: {integrity: sha512-PpC/m5eOItp/WU/GlQPFBXDOhq6HibL73KzYP37OX3LM7VmzWQF8voEj8QRWUFvy9FIKfeDQkWYoyS1D/MdWFA==}
|
resolution: {integrity: sha512-JjV/QnadgFLj1Pyu9IKl0lknrolFEzo04B64QcYLLeRzZl/iEHpdbSrRRKbyXcv45SZNv+WGjIUCT33e7xHO6Q==}
|
||||||
|
|
||||||
make-dir@3.1.0:
|
make-dir@3.1.0:
|
||||||
resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==}
|
resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==}
|
||||||
@@ -2462,7 +2462,7 @@ snapshots:
|
|||||||
'@api.global/typedrequest-interfaces': 3.0.19
|
'@api.global/typedrequest-interfaces': 3.0.19
|
||||||
'@api.global/typedsocket': 4.1.2(@push.rocks/smartserve@2.0.3)
|
'@api.global/typedsocket': 4.1.2(@push.rocks/smartserve@2.0.3)
|
||||||
'@cloudflare/workers-types': 4.20260409.1
|
'@cloudflare/workers-types': 4.20260409.1
|
||||||
'@design.estate/dees-catalog': 3.71.1(@tiptap/pm@2.27.2)
|
'@design.estate/dees-catalog': 3.77.0(@tiptap/pm@2.27.2)
|
||||||
'@design.estate/dees-comms': 1.0.30
|
'@design.estate/dees-comms': 1.0.30
|
||||||
'@push.rocks/lik': 6.4.0
|
'@push.rocks/lik': 6.4.0
|
||||||
'@push.rocks/smartdelay': 3.0.5
|
'@push.rocks/smartdelay': 3.0.5
|
||||||
@@ -2529,11 +2529,11 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
'@api.global/typedrequest-interfaces': 3.0.19
|
'@api.global/typedrequest-interfaces': 3.0.19
|
||||||
|
|
||||||
'@design.estate/dees-catalog@3.71.1(@tiptap/pm@2.27.2)':
|
'@design.estate/dees-catalog@3.77.0(@tiptap/pm@2.27.2)':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@design.estate/dees-domtools': 2.5.4
|
'@design.estate/dees-domtools': 2.5.4
|
||||||
'@design.estate/dees-element': 2.2.4
|
'@design.estate/dees-element': 2.2.4
|
||||||
'@design.estate/dees-wcctools': 3.8.0
|
'@design.estate/dees-wcctools': 3.8.4
|
||||||
'@fortawesome/fontawesome-svg-core': 7.2.0
|
'@fortawesome/fontawesome-svg-core': 7.2.0
|
||||||
'@fortawesome/free-brands-svg-icons': 7.2.0
|
'@fortawesome/free-brands-svg-icons': 7.2.0
|
||||||
'@fortawesome/free-regular-svg-icons': 7.2.0
|
'@fortawesome/free-regular-svg-icons': 7.2.0
|
||||||
@@ -2553,7 +2553,7 @@ snapshots:
|
|||||||
highlight.js: 11.11.1
|
highlight.js: 11.11.1
|
||||||
ibantools: 4.5.4
|
ibantools: 4.5.4
|
||||||
lightweight-charts: 5.1.0
|
lightweight-charts: 5.1.0
|
||||||
lucide: 0.577.0
|
lucide: 1.8.0
|
||||||
monaco-editor: 0.55.1
|
monaco-editor: 0.55.1
|
||||||
pdfjs-dist: 4.10.38
|
pdfjs-dist: 4.10.38
|
||||||
xterm: 5.3.0
|
xterm: 5.3.0
|
||||||
@@ -2610,7 +2610,7 @@ snapshots:
|
|||||||
- supports-color
|
- supports-color
|
||||||
- vue
|
- vue
|
||||||
|
|
||||||
'@design.estate/dees-wcctools@3.8.0':
|
'@design.estate/dees-wcctools@3.8.4':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@design.estate/dees-domtools': 2.5.4
|
'@design.estate/dees-domtools': 2.5.4
|
||||||
'@design.estate/dees-element': 2.2.4
|
'@design.estate/dees-element': 2.2.4
|
||||||
@@ -4487,7 +4487,7 @@ snapshots:
|
|||||||
|
|
||||||
lru-cache@11.3.3: {}
|
lru-cache@11.3.3: {}
|
||||||
|
|
||||||
lucide@0.577.0: {}
|
lucide@1.8.0: {}
|
||||||
|
|
||||||
make-dir@3.1.0:
|
make-dir@3.1.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
|
|||||||
11
readme.md
11
readme.md
@@ -20,7 +20,7 @@ siprouter sits between your SIP trunk providers and your endpoints — hardware
|
|||||||
- 🎯 **Adaptive Jitter Buffer** — Per-leg jitter buffering with sequence-based reordering, adaptive depth (60–120ms), Opus PLC for lost packets, and hold/resume detection
|
- 🎯 **Adaptive Jitter Buffer** — Per-leg jitter buffering with sequence-based reordering, adaptive depth (60–120ms), Opus PLC for lost packets, and hold/resume detection
|
||||||
- 📧 **Voicemail** — Configurable voicemail boxes with TTS greetings, recording, and web playback
|
- 📧 **Voicemail** — Configurable voicemail boxes with TTS greetings, recording, and web playback
|
||||||
- 🔢 **IVR Menus** — DTMF-navigable interactive voice response with nested menus, routing actions, and custom prompts
|
- 🔢 **IVR Menus** — DTMF-navigable interactive voice response with nested menus, routing actions, and custom prompts
|
||||||
- 🗣️ **Neural TTS** — Kokoro-powered announcements and greetings with 25+ voice presets, backed by espeak-ng fallback
|
- 🗣️ **Neural TTS** — Kokoro-powered greetings and IVR prompts with 25+ voice presets
|
||||||
- 🎙️ **Call Recording** — Per-source separated WAV recording at 48kHz via tool legs
|
- 🎙️ **Call Recording** — Per-source separated WAV recording at 48kHz via tool legs
|
||||||
- 🖥️ **Web Dashboard** — Real-time SPA with 9 views: live calls, browser phone, routing, voicemail, IVR, contacts, providers, and streaming logs
|
- 🖥️ **Web Dashboard** — Real-time SPA with 9 views: live calls, browser phone, routing, voicemail, IVR, contacts, providers, and streaming logs
|
||||||
|
|
||||||
@@ -98,7 +98,6 @@ sequenceDiagram
|
|||||||
- **Node.js** ≥ 20 with `tsx` globally available
|
- **Node.js** ≥ 20 with `tsx` globally available
|
||||||
- **pnpm** for package management
|
- **pnpm** for package management
|
||||||
- **Rust** toolchain (for building the proxy engine)
|
- **Rust** toolchain (for building the proxy engine)
|
||||||
- **espeak-ng** (optional, for TTS fallback)
|
|
||||||
|
|
||||||
### Install & Build
|
### Install & Build
|
||||||
|
|
||||||
@@ -190,7 +189,7 @@ Create `.nogit/config.json`:
|
|||||||
|
|
||||||
### TTS Setup (Optional)
|
### TTS Setup (Optional)
|
||||||
|
|
||||||
For neural announcements and voicemail greetings, download the Kokoro TTS model:
|
For neural voicemail greetings and IVR prompts, download the Kokoro TTS model:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir -p .nogit/tts
|
mkdir -p .nogit/tts
|
||||||
@@ -200,7 +199,7 @@ curl -L -o .nogit/tts/voices.bin \
|
|||||||
https://github.com/mzdk100/kokoro/releases/download/V1.0/voices.bin
|
https://github.com/mzdk100/kokoro/releases/download/V1.0/voices.bin
|
||||||
```
|
```
|
||||||
|
|
||||||
Without the model files, TTS falls back to `espeak-ng`. Without either, announcements are skipped — everything else works fine.
|
Without the model files, TTS prompts (IVR menus, voicemail greetings) are skipped — everything else works fine.
|
||||||
|
|
||||||
### Run
|
### Run
|
||||||
|
|
||||||
@@ -227,7 +226,6 @@ siprouter/
|
|||||||
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
|
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
|
||||||
│ ├── webrtcbridge.ts # WebRTC signaling layer
|
│ ├── webrtcbridge.ts # WebRTC signaling layer
|
||||||
│ ├── registrar.ts # Browser softphone registration
|
│ ├── registrar.ts # Browser softphone registration
|
||||||
│ ├── announcement.ts # TTS announcement generator (espeak-ng / Kokoro)
|
|
||||||
│ ├── voicebox.ts # Voicemail box management
|
│ ├── voicebox.ts # Voicemail box management
|
||||||
│ └── call/
|
│ └── call/
|
||||||
│ └── prompt-cache.ts # Named audio prompt WAV management
|
│ └── prompt-cache.ts # Named audio prompt WAV management
|
||||||
@@ -288,13 +286,12 @@ flowchart LR
|
|||||||
|
|
||||||
## 🗣️ Neural TTS
|
## 🗣️ Neural TTS
|
||||||
|
|
||||||
Announcements and voicemail greetings are synthesized using [Kokoro TTS](https://github.com/mzdk100/kokoro) — an 82M parameter neural model running via ONNX Runtime directly in the Rust process:
|
Voicemail greetings and IVR prompts are synthesized using [Kokoro TTS](https://github.com/mzdk100/kokoro) — an 82M parameter neural model running via ONNX Runtime directly in the Rust process:
|
||||||
|
|
||||||
- **24 kHz, 16-bit mono** output
|
- **24 kHz, 16-bit mono** output
|
||||||
- **25+ voice presets** — American/British, male/female (e.g., `af_bella`, `am_adam`, `bf_emma`, `bm_george`)
|
- **25+ voice presets** — American/British, male/female (e.g., `af_bella`, `am_adam`, `bf_emma`, `bm_george`)
|
||||||
- **~800ms** synthesis time for a 3-second phrase
|
- **~800ms** synthesis time for a 3-second phrase
|
||||||
- Lazy-loaded on first use — no startup cost if TTS is unused
|
- Lazy-loaded on first use — no startup cost if TTS is unused
|
||||||
- Falls back to `espeak-ng` if the ONNX model is not available
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
10
rust/Cargo.lock
generated
10
rust/Cargo.lock
generated
@@ -532,6 +532,15 @@ dependencies = [
|
|||||||
"cc",
|
"cc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cmudict-fast"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2c9f73004e928ed46c3e7fd7406d2b12c8674153295f08af084b49860276dc02"
|
||||||
|
dependencies = [
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "codec-lib"
|
name = "codec-lib"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@@ -1730,6 +1739,7 @@ dependencies = [
|
|||||||
"bincode 2.0.1",
|
"bincode 2.0.1",
|
||||||
"cc",
|
"cc",
|
||||||
"chinese-number",
|
"chinese-number",
|
||||||
|
"cmudict-fast",
|
||||||
"futures",
|
"futures",
|
||||||
"jieba-rs",
|
"jieba-rs",
|
||||||
"log",
|
"log",
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ regex-lite = "0.1"
|
|||||||
webrtc = "0.8"
|
webrtc = "0.8"
|
||||||
rand = "0.8"
|
rand = "0.8"
|
||||||
hound = "3.5"
|
hound = "3.5"
|
||||||
kokoro-tts = { version = "0.3", default-features = false }
|
kokoro-tts = { version = "0.3", default-features = false, features = ["use-cmudict"] }
|
||||||
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
|
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
|
||||||
"std", "download-binaries", "copy-dylibs", "ndarray",
|
"std", "download-binaries", "copy-dylibs", "ndarray",
|
||||||
"tls-native-vendored"
|
"tls-native-vendored"
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ pub enum CallState {
|
|||||||
Ringing,
|
Ringing,
|
||||||
Connected,
|
Connected,
|
||||||
Voicemail,
|
Voicemail,
|
||||||
|
Ivr,
|
||||||
Terminated,
|
Terminated,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -37,6 +38,7 @@ impl CallState {
|
|||||||
Self::Ringing => "ringing",
|
Self::Ringing => "ringing",
|
||||||
Self::Connected => "connected",
|
Self::Connected => "connected",
|
||||||
Self::Voicemail => "voicemail",
|
Self::Voicemail => "voicemail",
|
||||||
|
Self::Ivr => "ivr",
|
||||||
Self::Terminated => "terminated",
|
Self::Terminated => "terminated",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,13 +12,16 @@ use crate::mixer::spawn_mixer;
|
|||||||
use crate::registrar::Registrar;
|
use crate::registrar::Registrar;
|
||||||
use crate::rtp::RtpPortPool;
|
use crate::rtp::RtpPortPool;
|
||||||
use crate::sip_leg::{SipLeg, SipLegAction, SipLegConfig};
|
use crate::sip_leg::{SipLeg, SipLegAction, SipLegConfig};
|
||||||
|
use crate::tts::TtsEngine;
|
||||||
use sip_proto::helpers::{build_sdp, generate_call_id, generate_tag, parse_sdp_endpoint, SdpOptions};
|
use sip_proto::helpers::{build_sdp, generate_call_id, generate_tag, parse_sdp_endpoint, SdpOptions};
|
||||||
use sip_proto::message::{ResponseOptions, SipMessage};
|
use sip_proto::message::{ResponseOptions, SipMessage};
|
||||||
use sip_proto::rewrite::{rewrite_sdp, rewrite_sip_uri};
|
use sip_proto::rewrite::{rewrite_sdp, rewrite_sip_uri};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
|
use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::net::UdpSocket;
|
use tokio::net::UdpSocket;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
/// Result of creating an inbound call — carries both the call id and
|
/// Result of creating an inbound call — carries both the call id and
|
||||||
/// whether browsers should be notified (flows from the matched inbound
|
/// whether browsers should be notified (flows from the matched inbound
|
||||||
@@ -681,6 +684,7 @@ impl CallManager {
|
|||||||
rtp_pool: &mut RtpPortPool,
|
rtp_pool: &mut RtpPortPool,
|
||||||
socket: &UdpSocket,
|
socket: &UdpSocket,
|
||||||
public_ip: Option<&str>,
|
public_ip: Option<&str>,
|
||||||
|
tts_engine: Arc<Mutex<TtsEngine>>,
|
||||||
) -> Option<InboundCallCreated> {
|
) -> Option<InboundCallCreated> {
|
||||||
let call_id = self.next_call_id();
|
let call_id = self.next_call_id();
|
||||||
let lan_ip = &config.proxy.lan_ip;
|
let lan_ip = &config.proxy.lan_ip;
|
||||||
@@ -710,10 +714,27 @@ impl CallManager {
|
|||||||
// - `ring_browsers` is informational only — browsers see a toast but
|
// - `ring_browsers` is informational only — browsers see a toast but
|
||||||
// do not race the SIP device. First-to-answer-wins requires a
|
// do not race the SIP device. First-to-answer-wins requires a
|
||||||
// multi-leg fork + per-leg CANCEL, which is not built yet.
|
// multi-leg fork + per-leg CANCEL, which is not built yet.
|
||||||
// - `voicemail_box`, `ivr_menu_id`, `no_answer_timeout` are not honored.
|
|
||||||
let route = config.resolve_inbound_route(provider_id, &called_number, &caller_number);
|
let route = config.resolve_inbound_route(provider_id, &called_number, &caller_number);
|
||||||
let ring_browsers = route.ring_browsers;
|
let ring_browsers = route.ring_browsers;
|
||||||
|
|
||||||
|
// IVR routing: if the route targets an IVR menu, go there directly.
|
||||||
|
if let Some(ref ivr_menu_id) = route.ivr_menu_id {
|
||||||
|
if let Some(ivr) = &config.ivr {
|
||||||
|
if ivr.enabled {
|
||||||
|
if let Some(menu) = ivr.menus.iter().find(|m| m.id == *ivr_menu_id) {
|
||||||
|
let call_id = self
|
||||||
|
.route_to_ivr(
|
||||||
|
&call_id, invite, from_addr, &caller_number,
|
||||||
|
provider_id, provider_config, config, rtp_pool, socket,
|
||||||
|
public_ip, menu, &tts_engine,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
return Some(InboundCallCreated { call_id, ring_browsers });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Pick the first registered device from the matched targets, or fall
|
// Pick the first registered device from the matched targets, or fall
|
||||||
// back to any-registered-device if the route has no resolved targets.
|
// back to any-registered-device if the route has no resolved targets.
|
||||||
let device_addr = route
|
let device_addr = route
|
||||||
@@ -726,10 +747,17 @@ impl CallManager {
|
|||||||
Some(addr) => addr,
|
Some(addr) => addr,
|
||||||
None => {
|
None => {
|
||||||
// No device registered → voicemail.
|
// No device registered → voicemail.
|
||||||
|
// Resolve greeting WAV on-demand (may trigger TTS generation).
|
||||||
|
let greeting_wav = resolve_greeting_wav(
|
||||||
|
config,
|
||||||
|
route.voicemail_box.as_deref(),
|
||||||
|
&tts_engine,
|
||||||
|
).await;
|
||||||
let call_id = self
|
let call_id = self
|
||||||
.route_to_voicemail(
|
.route_to_voicemail(
|
||||||
&call_id, invite, from_addr, &caller_number,
|
&call_id, invite, from_addr, &caller_number,
|
||||||
provider_id, provider_config, config, rtp_pool, socket, public_ip,
|
provider_id, provider_config, config, rtp_pool, socket, public_ip,
|
||||||
|
greeting_wav,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
return Some(InboundCallCreated { call_id, ring_browsers });
|
return Some(InboundCallCreated { call_id, ring_browsers });
|
||||||
@@ -1536,6 +1564,7 @@ impl CallManager {
|
|||||||
rtp_pool: &mut RtpPortPool,
|
rtp_pool: &mut RtpPortPool,
|
||||||
socket: &UdpSocket,
|
socket: &UdpSocket,
|
||||||
public_ip: Option<&str>,
|
public_ip: Option<&str>,
|
||||||
|
greeting_wav: Option<String>,
|
||||||
) -> Option<String> {
|
) -> Option<String> {
|
||||||
let lan_ip = &config.proxy.lan_ip;
|
let lan_ip = &config.proxy.lan_ip;
|
||||||
let pub_ip = public_ip.unwrap_or(lan_ip.as_str());
|
let pub_ip = public_ip.unwrap_or(lan_ip.as_str());
|
||||||
@@ -1630,8 +1659,6 @@ impl CallManager {
|
|||||||
.as_millis();
|
.as_millis();
|
||||||
let recording_dir = "nogit/voicemail/default".to_string();
|
let recording_dir = "nogit/voicemail/default".to_string();
|
||||||
let recording_path = format!("{recording_dir}/msg-{timestamp}.wav");
|
let recording_path = format!("{recording_dir}/msg-{timestamp}.wav");
|
||||||
let greeting_wav = find_greeting_wav();
|
|
||||||
|
|
||||||
let out_tx = self.out_tx.clone();
|
let out_tx = self.out_tx.clone();
|
||||||
let call_id_owned = call_id.to_string();
|
let call_id_owned = call_id.to_string();
|
||||||
let caller_owned = caller_number.to_string();
|
let caller_owned = caller_number.to_string();
|
||||||
@@ -1648,6 +1675,211 @@ impl CallManager {
|
|||||||
Some(call_id.to_string())
|
Some(call_id.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// IVR routing
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
async fn route_to_ivr(
|
||||||
|
&mut self,
|
||||||
|
call_id: &str,
|
||||||
|
invite: &SipMessage,
|
||||||
|
from_addr: SocketAddr,
|
||||||
|
caller_number: &str,
|
||||||
|
provider_id: &str,
|
||||||
|
provider_config: &ProviderConfig,
|
||||||
|
config: &AppConfig,
|
||||||
|
rtp_pool: &mut RtpPortPool,
|
||||||
|
socket: &UdpSocket,
|
||||||
|
public_ip: Option<&str>,
|
||||||
|
menu: &crate::config::IvrMenuConfig,
|
||||||
|
tts_engine: &Arc<Mutex<TtsEngine>>,
|
||||||
|
) -> Option<String> {
|
||||||
|
let lan_ip = &config.proxy.lan_ip;
|
||||||
|
|
||||||
|
let rtp_alloc = match rtp_pool.allocate().await {
|
||||||
|
Some(a) => a,
|
||||||
|
None => {
|
||||||
|
let resp = SipMessage::create_response(503, "Service Unavailable", invite, None);
|
||||||
|
let _ = socket.send_to(&resp.serialize(), from_addr).await;
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let codec_pt = provider_config.codecs.first().copied().unwrap_or(9);
|
||||||
|
let pub_ip = public_ip.unwrap_or(lan_ip.as_str());
|
||||||
|
|
||||||
|
let sdp = sip_proto::helpers::build_sdp(&sip_proto::helpers::SdpOptions {
|
||||||
|
ip: pub_ip,
|
||||||
|
port: rtp_alloc.port,
|
||||||
|
payload_types: &provider_config.codecs,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
|
||||||
|
let response = SipMessage::create_response(
|
||||||
|
200, "OK", invite,
|
||||||
|
Some(sip_proto::message::ResponseOptions {
|
||||||
|
to_tag: Some(sip_proto::helpers::generate_tag()),
|
||||||
|
contact: Some(format!("<sip:{}:{}>", lan_ip, config.proxy.lan_port)),
|
||||||
|
body: Some(sdp),
|
||||||
|
content_type: Some("application/sdp".to_string()),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
let _ = socket.send_to(&response.serialize(), from_addr).await;
|
||||||
|
|
||||||
|
let provider_media = if invite.has_sdp_body() {
|
||||||
|
parse_sdp_endpoint(&invite.body)
|
||||||
|
.and_then(|ep| format!("{}:{}", ep.address, ep.port).parse().ok())
|
||||||
|
} else {
|
||||||
|
Some(from_addr)
|
||||||
|
};
|
||||||
|
let provider_media = provider_media.unwrap_or(from_addr);
|
||||||
|
|
||||||
|
// Create call with IVR state.
|
||||||
|
let (mixer_cmd_tx, mixer_task) = spawn_mixer(call_id.to_string(), self.out_tx.clone());
|
||||||
|
let mut call = Call::new(
|
||||||
|
call_id.to_string(),
|
||||||
|
CallDirection::Inbound,
|
||||||
|
provider_id.to_string(),
|
||||||
|
mixer_cmd_tx.clone(),
|
||||||
|
mixer_task,
|
||||||
|
);
|
||||||
|
call.state = CallState::Ivr;
|
||||||
|
call.caller_number = Some(caller_number.to_string());
|
||||||
|
|
||||||
|
let provider_leg_id = format!("{call_id}-prov");
|
||||||
|
call.legs.insert(
|
||||||
|
provider_leg_id.clone(),
|
||||||
|
LegInfo {
|
||||||
|
id: provider_leg_id.clone(),
|
||||||
|
kind: LegKind::SipProvider,
|
||||||
|
state: LegState::Connected,
|
||||||
|
codec_pt,
|
||||||
|
sip_leg: None,
|
||||||
|
sip_call_id: Some(invite.call_id().to_string()),
|
||||||
|
webrtc_session_id: None,
|
||||||
|
rtp_socket: Some(rtp_alloc.socket.clone()),
|
||||||
|
rtp_port: rtp_alloc.port,
|
||||||
|
public_ip: public_ip.map(|s| s.to_string()),
|
||||||
|
remote_media: Some(provider_media),
|
||||||
|
signaling_addr: Some(from_addr),
|
||||||
|
metadata: HashMap::new(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
self.sip_index.insert(
|
||||||
|
invite.call_id().to_string(),
|
||||||
|
(call_id.to_string(), provider_leg_id.clone()),
|
||||||
|
);
|
||||||
|
self.calls.insert(call_id.to_string(), call);
|
||||||
|
|
||||||
|
// Emit leg_added for the provider leg.
|
||||||
|
if let Some(call) = self.calls.get(call_id) {
|
||||||
|
for leg in call.legs.values() {
|
||||||
|
emit_leg_added_event(&self.out_tx, call_id, leg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate IVR prompt on-demand via TTS (cached).
|
||||||
|
let voice = menu.prompt_voice.as_deref().unwrap_or("af_bella");
|
||||||
|
let prompt_output = format!(".nogit/tts/ivr-menu-{}.wav", menu.id);
|
||||||
|
let prompt_params = serde_json::json!({
|
||||||
|
"model": ".nogit/tts/kokoro-v1.0.onnx",
|
||||||
|
"voices": ".nogit/tts/voices.bin",
|
||||||
|
"voice": voice,
|
||||||
|
"text": &menu.prompt_text,
|
||||||
|
"output": &prompt_output,
|
||||||
|
"cacheable": true,
|
||||||
|
});
|
||||||
|
|
||||||
|
let prompt_wav = {
|
||||||
|
let mut tts = tts_engine.lock().await;
|
||||||
|
match tts.generate(&prompt_params).await {
|
||||||
|
Ok(_) => Some(prompt_output),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("[ivr] TTS generation failed: {e}");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Load prompt and run interaction via the mixer.
|
||||||
|
let out_tx = self.out_tx.clone();
|
||||||
|
let call_id_owned = call_id.to_string();
|
||||||
|
let expected_digits: Vec<char> = menu
|
||||||
|
.entries
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| e.digit.chars().next())
|
||||||
|
.collect();
|
||||||
|
let timeout_ms = menu.timeout_sec.unwrap_or(5) * 1000;
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
// Load prompt PCM frames if available.
|
||||||
|
let prompt_frames = prompt_wav.as_ref().and_then(|wav| {
|
||||||
|
crate::audio_player::load_prompt_pcm_frames(wav).ok()
|
||||||
|
});
|
||||||
|
|
||||||
|
if let Some(frames) = prompt_frames {
|
||||||
|
let (result_tx, result_rx) = tokio::sync::oneshot::channel();
|
||||||
|
let _ = mixer_cmd_tx
|
||||||
|
.send(crate::mixer::MixerCommand::StartInteraction {
|
||||||
|
leg_id: provider_leg_id.clone(),
|
||||||
|
prompt_pcm_frames: frames,
|
||||||
|
expected_digits: expected_digits.clone(),
|
||||||
|
timeout_ms,
|
||||||
|
result_tx,
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Wait for digit or timeout.
|
||||||
|
let safety = tokio::time::Duration::from_millis(timeout_ms as u64 + 30000);
|
||||||
|
let result = match tokio::time::timeout(safety, result_rx).await {
|
||||||
|
Ok(Ok(r)) => r,
|
||||||
|
Ok(Err(_)) => crate::mixer::InteractionResult::Cancelled,
|
||||||
|
Err(_) => crate::mixer::InteractionResult::Timeout,
|
||||||
|
};
|
||||||
|
|
||||||
|
match &result {
|
||||||
|
crate::mixer::InteractionResult::Digit(d) => {
|
||||||
|
eprintln!("[ivr] caller pressed '{d}' on call {call_id_owned}");
|
||||||
|
emit_event(
|
||||||
|
&out_tx,
|
||||||
|
"ivr_digit",
|
||||||
|
serde_json::json!({
|
||||||
|
"call_id": call_id_owned,
|
||||||
|
"digit": d.to_string(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::mixer::InteractionResult::Timeout => {
|
||||||
|
eprintln!("[ivr] timeout on call {call_id_owned}");
|
||||||
|
emit_event(
|
||||||
|
&out_tx,
|
||||||
|
"ivr_timeout",
|
||||||
|
serde_json::json!({ "call_id": call_id_owned }),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::mixer::InteractionResult::Cancelled => {
|
||||||
|
eprintln!("[ivr] cancelled on call {call_id_owned}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
eprintln!("[ivr] no prompt available for call {call_id_owned}, ending");
|
||||||
|
emit_event(
|
||||||
|
&out_tx,
|
||||||
|
"ivr_error",
|
||||||
|
serde_json::json!({
|
||||||
|
"call_id": call_id_owned,
|
||||||
|
"error": "no prompt available",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Some(call_id.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
// Internal helpers
|
// Internal helpers
|
||||||
// -----------------------------------------------------------------------
|
// -----------------------------------------------------------------------
|
||||||
@@ -1662,13 +1894,56 @@ impl CallManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_greeting_wav() -> Option<String> {
|
/// Resolve the greeting WAV for a voicemail box.
|
||||||
let candidates = [
|
///
|
||||||
|
/// Priority:
|
||||||
|
/// 1. Pre-recorded WAV from voicebox config (`greetingWavPath`)
|
||||||
|
/// 2. On-demand TTS generation from greeting text (cached via `cacheable: true`)
|
||||||
|
/// 3. Legacy hardcoded paths (`.nogit/voicemail/default/greeting.wav`, etc.)
|
||||||
|
/// 4. None — voicemail session plays beep only
|
||||||
|
async fn resolve_greeting_wav(
|
||||||
|
config: &AppConfig,
|
||||||
|
voicebox_id: Option<&str>,
|
||||||
|
tts_engine: &Arc<Mutex<TtsEngine>>,
|
||||||
|
) -> Option<String> {
|
||||||
|
// 1. Look up voicebox config.
|
||||||
|
let vb = voicebox_id
|
||||||
|
.and_then(|id| config.voiceboxes.iter().find(|v| v.id == id && v.enabled));
|
||||||
|
|
||||||
|
if let Some(vb) = vb {
|
||||||
|
// 2. Pre-recorded WAV takes priority.
|
||||||
|
if let Some(ref wav) = vb.greeting_wav_path {
|
||||||
|
if Path::new(wav).exists() {
|
||||||
|
return Some(wav.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 3. TTS on-demand with caching.
|
||||||
|
let text = vb.greeting_text.as_deref().unwrap_or(
|
||||||
|
"The person you are trying to reach is not available. Please leave a message after the tone.",
|
||||||
|
);
|
||||||
|
let voice = vb.greeting_voice.as_deref().unwrap_or("af_bella");
|
||||||
|
let output = format!(".nogit/tts/voicemail-greeting-{}.wav", vb.id);
|
||||||
|
|
||||||
|
let params = serde_json::json!({
|
||||||
|
"model": ".nogit/tts/kokoro-v1.0.onnx",
|
||||||
|
"voices": ".nogit/tts/voices.bin",
|
||||||
|
"voice": voice,
|
||||||
|
"text": text,
|
||||||
|
"output": &output,
|
||||||
|
"cacheable": true,
|
||||||
|
});
|
||||||
|
let mut tts = tts_engine.lock().await;
|
||||||
|
if tts.generate(¶ms).await.is_ok() {
|
||||||
|
return Some(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Fallback: legacy hardcoded paths.
|
||||||
|
for path in &[
|
||||||
".nogit/voicemail/default/greeting.wav",
|
".nogit/voicemail/default/greeting.wav",
|
||||||
".nogit/voicemail/greeting.wav",
|
".nogit/voicemail/greeting.wav",
|
||||||
];
|
] {
|
||||||
for path in &candidates {
|
if Path::new(path).exists() {
|
||||||
if std::path::Path::new(path).exists() {
|
|
||||||
return Some(path.to_string());
|
return Some(path.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -159,6 +159,10 @@ pub struct AppConfig {
|
|||||||
pub providers: Vec<ProviderConfig>,
|
pub providers: Vec<ProviderConfig>,
|
||||||
pub devices: Vec<DeviceConfig>,
|
pub devices: Vec<DeviceConfig>,
|
||||||
pub routing: RoutingConfig,
|
pub routing: RoutingConfig,
|
||||||
|
#[serde(default)]
|
||||||
|
pub voiceboxes: Vec<VoiceboxConfig>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub ivr: Option<IvrConfig>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize)]
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
@@ -166,6 +170,59 @@ pub struct RoutingConfig {
|
|||||||
pub routes: Vec<Route>,
|
pub routes: Vec<Route>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Voicebox config
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct VoiceboxConfig {
|
||||||
|
pub id: String,
|
||||||
|
#[serde(default)]
|
||||||
|
pub enabled: bool,
|
||||||
|
#[serde(rename = "greetingText")]
|
||||||
|
pub greeting_text: Option<String>,
|
||||||
|
#[serde(rename = "greetingVoice")]
|
||||||
|
pub greeting_voice: Option<String>,
|
||||||
|
#[serde(rename = "greetingWavPath")]
|
||||||
|
pub greeting_wav_path: Option<String>,
|
||||||
|
#[serde(rename = "maxRecordingSec")]
|
||||||
|
pub max_recording_sec: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// IVR config
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct IvrConfig {
|
||||||
|
pub enabled: bool,
|
||||||
|
pub menus: Vec<IvrMenuConfig>,
|
||||||
|
#[serde(rename = "entryMenuId")]
|
||||||
|
pub entry_menu_id: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct IvrMenuConfig {
|
||||||
|
pub id: String,
|
||||||
|
#[serde(rename = "promptText")]
|
||||||
|
pub prompt_text: String,
|
||||||
|
#[serde(rename = "promptVoice")]
|
||||||
|
pub prompt_voice: Option<String>,
|
||||||
|
pub entries: Vec<IvrMenuEntry>,
|
||||||
|
#[serde(rename = "timeoutSec")]
|
||||||
|
pub timeout_sec: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct IvrMenuEntry {
|
||||||
|
pub digit: String,
|
||||||
|
pub action: String,
|
||||||
|
pub target: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Pattern matching (ported from ts/config.ts)
|
// Pattern matching (ported from ts/config.ts)
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -50,11 +50,12 @@ struct ProxyEngine {
|
|||||||
registrar: Registrar,
|
registrar: Registrar,
|
||||||
call_mgr: CallManager,
|
call_mgr: CallManager,
|
||||||
rtp_pool: Option<RtpPortPool>,
|
rtp_pool: Option<RtpPortPool>,
|
||||||
|
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
||||||
out_tx: OutTx,
|
out_tx: OutTx,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ProxyEngine {
|
impl ProxyEngine {
|
||||||
fn new(out_tx: OutTx) -> Self {
|
fn new(out_tx: OutTx, tts_engine: Arc<Mutex<tts::TtsEngine>>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
config: None,
|
config: None,
|
||||||
transport: None,
|
transport: None,
|
||||||
@@ -62,6 +63,7 @@ impl ProxyEngine {
|
|||||||
registrar: Registrar::new(out_tx.clone()),
|
registrar: Registrar::new(out_tx.clone()),
|
||||||
call_mgr: CallManager::new(out_tx.clone()),
|
call_mgr: CallManager::new(out_tx.clone()),
|
||||||
rtp_pool: None,
|
rtp_pool: None,
|
||||||
|
tts_engine,
|
||||||
out_tx,
|
out_tx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -88,15 +90,16 @@ async fn main() {
|
|||||||
// Emit ready event.
|
// Emit ready event.
|
||||||
emit_event(&out_tx, "ready", serde_json::json!({}));
|
emit_event(&out_tx, "ready", serde_json::json!({}));
|
||||||
|
|
||||||
// Shared engine state (SIP side).
|
// TTS engine — separate internal lock, lazy-loads model on first use.
|
||||||
let engine = Arc::new(Mutex::new(ProxyEngine::new(out_tx.clone())));
|
let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new()));
|
||||||
|
|
||||||
|
// Shared engine state (SIP side). TTS engine is stored inside so the
|
||||||
|
// SIP packet handler path can reach it for on-demand voicemail/IVR generation.
|
||||||
|
let engine = Arc::new(Mutex::new(ProxyEngine::new(out_tx.clone(), tts_engine)));
|
||||||
|
|
||||||
// WebRTC engine — separate lock to avoid deadlock with SIP handlers.
|
// WebRTC engine — separate lock to avoid deadlock with SIP handlers.
|
||||||
let webrtc = Arc::new(Mutex::new(WebRtcEngine::new(out_tx.clone())));
|
let webrtc = Arc::new(Mutex::new(WebRtcEngine::new(out_tx.clone())));
|
||||||
|
|
||||||
// TTS engine — separate lock, lazy-loads model on first use.
|
|
||||||
let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new()));
|
|
||||||
|
|
||||||
// Read commands from stdin.
|
// Read commands from stdin.
|
||||||
let stdin = tokio::io::stdin();
|
let stdin = tokio::io::stdin();
|
||||||
let reader = BufReader::new(stdin);
|
let reader = BufReader::new(stdin);
|
||||||
@@ -117,12 +120,11 @@ async fn main() {
|
|||||||
|
|
||||||
let engine = engine.clone();
|
let engine = engine.clone();
|
||||||
let webrtc = webrtc.clone();
|
let webrtc = webrtc.clone();
|
||||||
let tts_engine = tts_engine.clone();
|
|
||||||
let out_tx = out_tx.clone();
|
let out_tx = out_tx.clone();
|
||||||
|
|
||||||
// Handle commands — some are async, so we spawn.
|
// Handle commands — some are async, so we spawn.
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
handle_command(engine, webrtc, tts_engine, &out_tx, cmd).await;
|
handle_command(engine, webrtc, &out_tx, cmd).await;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -130,7 +132,6 @@ async fn main() {
|
|||||||
async fn handle_command(
|
async fn handle_command(
|
||||||
engine: Arc<Mutex<ProxyEngine>>,
|
engine: Arc<Mutex<ProxyEngine>>,
|
||||||
webrtc: Arc<Mutex<WebRtcEngine>>,
|
webrtc: Arc<Mutex<WebRtcEngine>>,
|
||||||
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
|
||||||
out_tx: &OutTx,
|
out_tx: &OutTx,
|
||||||
cmd: Command,
|
cmd: Command,
|
||||||
) {
|
) {
|
||||||
@@ -155,8 +156,8 @@ async fn handle_command(
|
|||||||
"add_tool_leg" => handle_add_tool_leg(engine, out_tx, &cmd).await,
|
"add_tool_leg" => handle_add_tool_leg(engine, out_tx, &cmd).await,
|
||||||
"remove_tool_leg" => handle_remove_tool_leg(engine, out_tx, &cmd).await,
|
"remove_tool_leg" => handle_remove_tool_leg(engine, out_tx, &cmd).await,
|
||||||
"set_leg_metadata" => handle_set_leg_metadata(engine, out_tx, &cmd).await,
|
"set_leg_metadata" => handle_set_leg_metadata(engine, out_tx, &cmd).await,
|
||||||
// TTS command — lock tts_engine only (no SIP/WebRTC contention).
|
// TTS command — gets tts_engine from inside ProxyEngine.
|
||||||
"generate_tts" => handle_generate_tts(tts_engine, out_tx, &cmd).await,
|
"generate_tts" => handle_generate_tts(engine, out_tx, &cmd).await,
|
||||||
_ => respond_err(out_tx, &cmd.id, &format!("unknown command: {}", cmd.method)),
|
_ => respond_err(out_tx, &cmd.id, &format!("unknown command: {}", cmd.method)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -325,8 +326,10 @@ async fn handle_sip_packet(
|
|||||||
ref registrar,
|
ref registrar,
|
||||||
ref mut call_mgr,
|
ref mut call_mgr,
|
||||||
ref mut rtp_pool,
|
ref mut rtp_pool,
|
||||||
|
ref tts_engine,
|
||||||
..
|
..
|
||||||
} = *eng;
|
} = *eng;
|
||||||
|
let tts_clone = tts_engine.clone();
|
||||||
let rtp_pool = rtp_pool.as_mut().unwrap();
|
let rtp_pool = rtp_pool.as_mut().unwrap();
|
||||||
let inbound = call_mgr
|
let inbound = call_mgr
|
||||||
.create_inbound_call(
|
.create_inbound_call(
|
||||||
@@ -339,6 +342,7 @@ async fn handle_sip_packet(
|
|||||||
rtp_pool,
|
rtp_pool,
|
||||||
socket,
|
socket,
|
||||||
public_ip.as_deref(),
|
public_ip.as_deref(),
|
||||||
|
tts_clone,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -1231,10 +1235,11 @@ async fn handle_set_leg_metadata(
|
|||||||
|
|
||||||
/// Handle `generate_tts` — synthesize text to a WAV file using Kokoro TTS.
|
/// Handle `generate_tts` — synthesize text to a WAV file using Kokoro TTS.
|
||||||
async fn handle_generate_tts(
|
async fn handle_generate_tts(
|
||||||
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
engine: Arc<Mutex<ProxyEngine>>,
|
||||||
out_tx: &OutTx,
|
out_tx: &OutTx,
|
||||||
cmd: &Command,
|
cmd: &Command,
|
||||||
) {
|
) {
|
||||||
|
let tts_engine = engine.lock().await.tts_engine.clone();
|
||||||
let mut tts = tts_engine.lock().await;
|
let mut tts = tts_engine.lock().await;
|
||||||
match tts.generate(&cmd.params).await {
|
match tts.generate(&cmd.params).await {
|
||||||
Ok(result) => respond_ok(out_tx, &cmd.id, result),
|
Ok(result) => respond_ok(out_tx, &cmd.id, result),
|
||||||
|
|||||||
@@ -1,8 +1,13 @@
|
|||||||
//! Text-to-speech engine — synthesizes text to WAV files using Kokoro neural TTS.
|
//! Text-to-speech engine — synthesizes text to WAV files using Kokoro neural TTS.
|
||||||
//!
|
//!
|
||||||
//! The model is loaded lazily on first use. If the model/voices files are not
|
//! The model is loaded lazily on first use. If the model/voices files are not
|
||||||
//! present, the generate command returns an error and the TS side falls back
|
//! present, the generate command returns an error and the caller skips the prompt.
|
||||||
//! to espeak-ng.
|
//!
|
||||||
|
//! Caching is handled internally via a `.meta` sidecar file next to each WAV.
|
||||||
|
//! When `cacheable` is true, the engine checks whether the existing WAV was
|
||||||
|
//! generated from the same text+voice; if so it returns immediately (cache hit).
|
||||||
|
//! Callers never need to check for cached files — that is entirely this module's
|
||||||
|
//! responsibility.
|
||||||
|
|
||||||
use kokoro_tts::{KokoroTts, Voice};
|
use kokoro_tts::{KokoroTts, Voice};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@@ -32,6 +37,8 @@ impl TtsEngine {
|
|||||||
/// - `voice`: voice name (e.g. "af_bella")
|
/// - `voice`: voice name (e.g. "af_bella")
|
||||||
/// - `text`: text to synthesize
|
/// - `text`: text to synthesize
|
||||||
/// - `output`: output WAV file path
|
/// - `output`: output WAV file path
|
||||||
|
/// - `cacheable`: if true, skip synthesis when the output WAV already
|
||||||
|
/// matches the same text+voice (checked via a `.meta` sidecar file)
|
||||||
pub async fn generate(&mut self, params: &serde_json::Value) -> Result<serde_json::Value, String> {
|
pub async fn generate(&mut self, params: &serde_json::Value) -> Result<serde_json::Value, String> {
|
||||||
let model_path = params.get("model").and_then(|v| v.as_str())
|
let model_path = params.get("model").and_then(|v| v.as_str())
|
||||||
.ok_or("missing 'model' param")?;
|
.ok_or("missing 'model' param")?;
|
||||||
@@ -43,11 +50,19 @@ impl TtsEngine {
|
|||||||
.ok_or("missing 'text' param")?;
|
.ok_or("missing 'text' param")?;
|
||||||
let output_path = params.get("output").and_then(|v| v.as_str())
|
let output_path = params.get("output").and_then(|v| v.as_str())
|
||||||
.ok_or("missing 'output' param")?;
|
.ok_or("missing 'output' param")?;
|
||||||
|
let cacheable = params.get("cacheable").and_then(|v| v.as_bool())
|
||||||
|
.unwrap_or(false);
|
||||||
|
|
||||||
if text.is_empty() {
|
if text.is_empty() {
|
||||||
return Err("empty text".into());
|
return Err("empty text".into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cache check: if cacheable and the sidecar matches, return immediately.
|
||||||
|
if cacheable && self.is_cache_hit(output_path, text, voice_name) {
|
||||||
|
eprintln!("[tts] cache hit: {output_path}");
|
||||||
|
return Ok(serde_json::json!({ "output": output_path }));
|
||||||
|
}
|
||||||
|
|
||||||
// Check that model/voices files exist.
|
// Check that model/voices files exist.
|
||||||
if !Path::new(model_path).exists() {
|
if !Path::new(model_path).exists() {
|
||||||
return Err(format!("model not found: {model_path}"));
|
return Err(format!("model not found: {model_path}"));
|
||||||
@@ -56,6 +71,11 @@ impl TtsEngine {
|
|||||||
return Err(format!("voices not found: {voices_path}"));
|
return Err(format!("voices not found: {voices_path}"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure parent directory exists.
|
||||||
|
if let Some(parent) = Path::new(output_path).parent() {
|
||||||
|
let _ = std::fs::create_dir_all(parent);
|
||||||
|
}
|
||||||
|
|
||||||
// Lazy-load or reload if paths changed.
|
// Lazy-load or reload if paths changed.
|
||||||
if self.tts.is_none()
|
if self.tts.is_none()
|
||||||
|| self.loaded_model_path != model_path
|
|| self.loaded_model_path != model_path
|
||||||
@@ -95,9 +115,41 @@ impl TtsEngine {
|
|||||||
}
|
}
|
||||||
writer.finalize().map_err(|e| format!("WAV finalize: {e}"))?;
|
writer.finalize().map_err(|e| format!("WAV finalize: {e}"))?;
|
||||||
|
|
||||||
|
// Write sidecar for future cache checks.
|
||||||
|
if cacheable {
|
||||||
|
self.write_cache_meta(output_path, text, voice_name);
|
||||||
|
}
|
||||||
|
|
||||||
eprintln!("[tts] wrote {output_path}");
|
eprintln!("[tts] wrote {output_path}");
|
||||||
Ok(serde_json::json!({ "output": output_path }))
|
Ok(serde_json::json!({ "output": output_path }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Cache helpers
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Check if the WAV + sidecar on disk match the given text+voice.
|
||||||
|
fn is_cache_hit(&self, output_path: &str, text: &str, voice: &str) -> bool {
|
||||||
|
let meta_path = format!("{output_path}.meta");
|
||||||
|
if !Path::new(output_path).exists() || !Path::new(&meta_path).exists() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
match std::fs::read_to_string(&meta_path) {
|
||||||
|
Ok(contents) => contents == Self::cache_key(text, voice),
|
||||||
|
Err(_) => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the sidecar `.meta` file next to the WAV.
|
||||||
|
fn write_cache_meta(&self, output_path: &str, text: &str, voice: &str) {
|
||||||
|
let meta_path = format!("{output_path}.meta");
|
||||||
|
let _ = std::fs::write(&meta_path, Self::cache_key(text, voice));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the cache key from text + voice.
|
||||||
|
fn cache_key(text: &str, voice: &str) -> String {
|
||||||
|
format!("{}\0{}", text, voice)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Map voice name string to Kokoro Voice enum variant.
|
/// Map voice name string to Kokoro Voice enum variant.
|
||||||
|
|||||||
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: 'siprouter',
|
name: 'siprouter',
|
||||||
version: '1.21.0',
|
version: '1.22.0',
|
||||||
description: 'undefined'
|
description: 'undefined'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,137 +0,0 @@
|
|||||||
/**
|
|
||||||
* TTS announcement module — generates announcement WAV files at startup.
|
|
||||||
*
|
|
||||||
* Engine priority: espeak-ng (formant TTS, fast) → Kokoro neural TTS via
|
|
||||||
* proxy-engine → disabled.
|
|
||||||
*
|
|
||||||
* The generated WAV is left on disk for Rust's audio_player / start_interaction
|
|
||||||
* to play during calls. No encoding or RTP playback happens in TypeScript.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { execSync } from 'node:child_process';
|
|
||||||
import fs from 'node:fs';
|
|
||||||
import path from 'node:path';
|
|
||||||
import { sendProxyCommand, isProxyReady } from './proxybridge.ts';
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// State
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
|
|
||||||
const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now.";
|
|
||||||
const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav');
|
|
||||||
|
|
||||||
// Kokoro fallback constants.
|
|
||||||
const KOKORO_MODEL = 'kokoro-v1.0.onnx';
|
|
||||||
const KOKORO_VOICES = 'voices.bin';
|
|
||||||
const KOKORO_VOICE = 'af_bella';
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// TTS generators
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/** Check if espeak-ng is available on the system. */
|
|
||||||
function isEspeakAvailable(): boolean {
|
|
||||||
try {
|
|
||||||
execSync('which espeak-ng', { stdio: 'pipe' });
|
|
||||||
return true;
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Generate announcement WAV via espeak-ng (primary engine). */
|
|
||||||
function generateViaEspeak(wavPath: string, text: string, log: (msg: string) => void): boolean {
|
|
||||||
log('[tts] generating announcement audio via espeak-ng...');
|
|
||||||
try {
|
|
||||||
execSync(
|
|
||||||
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
|
|
||||||
{ timeout: 10000, stdio: 'pipe' },
|
|
||||||
);
|
|
||||||
log('[tts] espeak-ng WAV generated');
|
|
||||||
return true;
|
|
||||||
} catch (e: any) {
|
|
||||||
log(`[tts] espeak-ng failed: ${e.message}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Generate announcement WAV via Kokoro TTS (fallback, runs inside proxy-engine). */
|
|
||||||
async function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): Promise<boolean> {
|
|
||||||
const modelPath = path.join(TTS_DIR, KOKORO_MODEL);
|
|
||||||
const voicesPath = path.join(TTS_DIR, KOKORO_VOICES);
|
|
||||||
|
|
||||||
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) {
|
|
||||||
log('[tts] Kokoro model/voices not found — Kokoro fallback unavailable');
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!isProxyReady()) {
|
|
||||||
log('[tts] proxy-engine not ready — Kokoro fallback unavailable');
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
log('[tts] generating announcement audio via Kokoro TTS (fallback)...');
|
|
||||||
try {
|
|
||||||
await sendProxyCommand('generate_tts', {
|
|
||||||
model: modelPath,
|
|
||||||
voices: voicesPath,
|
|
||||||
voice: KOKORO_VOICE,
|
|
||||||
text,
|
|
||||||
output: wavPath,
|
|
||||||
});
|
|
||||||
log('[tts] Kokoro WAV generated (via proxy-engine)');
|
|
||||||
return true;
|
|
||||||
} catch (e: any) {
|
|
||||||
log(`[tts] Kokoro failed: ${e.message}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Initialization
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Pre-generate the announcement WAV file.
|
|
||||||
* Must be called after the proxy engine is initialized.
|
|
||||||
*
|
|
||||||
* Engine priority: espeak-ng → Kokoro → disabled.
|
|
||||||
*/
|
|
||||||
export async function initAnnouncement(log: (msg: string) => void): Promise<boolean> {
|
|
||||||
fs.mkdirSync(TTS_DIR, { recursive: true });
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (!fs.existsSync(CACHE_WAV)) {
|
|
||||||
let generated = false;
|
|
||||||
|
|
||||||
// Try espeak-ng first.
|
|
||||||
if (isEspeakAvailable()) {
|
|
||||||
generated = generateViaEspeak(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
|
||||||
} else {
|
|
||||||
log('[tts] espeak-ng not installed — trying Kokoro fallback');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fall back to Kokoro (via proxy-engine).
|
|
||||||
if (!generated) {
|
|
||||||
generated = await generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!generated) {
|
|
||||||
log('[tts] no TTS engine available — announcements disabled');
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log('[tts] announcement WAV ready');
|
|
||||||
return true;
|
|
||||||
} catch (e: any) {
|
|
||||||
log(`[tts] init error: ${e.message}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get the path to the cached announcement WAV, or null if not generated. */
|
|
||||||
export function getAnnouncementWavPath(): string | null {
|
|
||||||
return fs.existsSync(CACHE_WAV) ? CACHE_WAV : null;
|
|
||||||
}
|
|
||||||
@@ -1,275 +0,0 @@
|
|||||||
/**
|
|
||||||
* PromptCache — manages named audio prompt WAV files for IVR and voicemail.
|
|
||||||
*
|
|
||||||
* Generates WAV files via espeak-ng (primary) or Kokoro TTS through the
|
|
||||||
* proxy-engine (fallback). Also supports loading pre-existing WAV files
|
|
||||||
* and programmatic tone generation.
|
|
||||||
*
|
|
||||||
* All audio playback happens in Rust (audio_player / start_interaction).
|
|
||||||
* This module only manages WAV files on disk.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { execSync } from 'node:child_process';
|
|
||||||
import fs from 'node:fs';
|
|
||||||
import path from 'node:path';
|
|
||||||
import { Buffer } from 'node:buffer';
|
|
||||||
import { sendProxyCommand, isProxyReady } from '../proxybridge.ts';
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Types
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/** A cached prompt — just a WAV file path and metadata. */
|
|
||||||
export interface ICachedPrompt {
|
|
||||||
/** Unique prompt identifier. */
|
|
||||||
id: string;
|
|
||||||
/** Path to the WAV file on disk. */
|
|
||||||
wavPath: string;
|
|
||||||
/** Total duration in milliseconds (approximate, from WAV header). */
|
|
||||||
durationMs: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// TTS helpers
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
|
|
||||||
|
|
||||||
/** Check if espeak-ng is available. */
|
|
||||||
function isEspeakAvailable(): boolean {
|
|
||||||
try {
|
|
||||||
execSync('which espeak-ng', { stdio: 'pipe' });
|
|
||||||
return true;
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Generate WAV via espeak-ng. */
|
|
||||||
function generateViaEspeak(wavPath: string, text: string): boolean {
|
|
||||||
try {
|
|
||||||
execSync(
|
|
||||||
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
|
|
||||||
{ timeout: 10000, stdio: 'pipe' },
|
|
||||||
);
|
|
||||||
return true;
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Generate WAV via Kokoro TTS (runs inside proxy-engine). */
|
|
||||||
async function generateViaKokoro(wavPath: string, text: string, voice: string): Promise<boolean> {
|
|
||||||
const modelPath = path.join(TTS_DIR, 'kokoro-v1.0.onnx');
|
|
||||||
const voicesPath = path.join(TTS_DIR, 'voices.bin');
|
|
||||||
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) return false;
|
|
||||||
if (!isProxyReady()) return false;
|
|
||||||
|
|
||||||
try {
|
|
||||||
await sendProxyCommand('generate_tts', {
|
|
||||||
model: modelPath,
|
|
||||||
voices: voicesPath,
|
|
||||||
voice,
|
|
||||||
text,
|
|
||||||
output: wavPath,
|
|
||||||
});
|
|
||||||
return true;
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Read a WAV file's duration from its header. */
|
|
||||||
function getWavDurationMs(wavPath: string): number {
|
|
||||||
try {
|
|
||||||
const wav = fs.readFileSync(wavPath);
|
|
||||||
if (wav.length < 44) return 0;
|
|
||||||
if (wav.toString('ascii', 0, 4) !== 'RIFF') return 0;
|
|
||||||
|
|
||||||
let sampleRate = 16000;
|
|
||||||
let dataSize = 0;
|
|
||||||
let bitsPerSample = 16;
|
|
||||||
let channels = 1;
|
|
||||||
let offset = 12;
|
|
||||||
|
|
||||||
while (offset < wav.length - 8) {
|
|
||||||
const chunkId = wav.toString('ascii', offset, offset + 4);
|
|
||||||
const chunkSize = wav.readUInt32LE(offset + 4);
|
|
||||||
if (chunkId === 'fmt ') {
|
|
||||||
channels = wav.readUInt16LE(offset + 10);
|
|
||||||
sampleRate = wav.readUInt32LE(offset + 12);
|
|
||||||
bitsPerSample = wav.readUInt16LE(offset + 22);
|
|
||||||
}
|
|
||||||
if (chunkId === 'data') {
|
|
||||||
dataSize = chunkSize;
|
|
||||||
}
|
|
||||||
offset += 8 + chunkSize;
|
|
||||||
if (offset % 2 !== 0) offset++;
|
|
||||||
}
|
|
||||||
|
|
||||||
const bytesPerSample = (bitsPerSample / 8) * channels;
|
|
||||||
const totalSamples = bytesPerSample > 0 ? dataSize / bytesPerSample : 0;
|
|
||||||
return sampleRate > 0 ? Math.round((totalSamples / sampleRate) * 1000) : 0;
|
|
||||||
} catch {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// PromptCache
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
export class PromptCache {
|
|
||||||
private prompts = new Map<string, ICachedPrompt>();
|
|
||||||
private log: (msg: string) => void;
|
|
||||||
private espeakAvailable: boolean | null = null;
|
|
||||||
|
|
||||||
constructor(log: (msg: string) => void) {
|
|
||||||
this.log = log;
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
|
||||||
// Public API
|
|
||||||
// -------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/** Get a cached prompt by ID. */
|
|
||||||
get(id: string): ICachedPrompt | null {
|
|
||||||
return this.prompts.get(id) ?? null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Check if a prompt is cached. */
|
|
||||||
has(id: string): boolean {
|
|
||||||
return this.prompts.has(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** List all cached prompt IDs. */
|
|
||||||
listIds(): string[] {
|
|
||||||
return [...this.prompts.keys()];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a TTS prompt WAV and cache its path.
|
|
||||||
* Uses espeak-ng (primary) or Kokoro (fallback).
|
|
||||||
*/
|
|
||||||
async generatePrompt(id: string, text: string, voice = 'af_bella'): Promise<ICachedPrompt | null> {
|
|
||||||
fs.mkdirSync(TTS_DIR, { recursive: true });
|
|
||||||
const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`);
|
|
||||||
|
|
||||||
// Check espeak availability once.
|
|
||||||
if (this.espeakAvailable === null) {
|
|
||||||
this.espeakAvailable = isEspeakAvailable();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate WAV if not already on disk.
|
|
||||||
if (!fs.existsSync(wavPath)) {
|
|
||||||
let generated = false;
|
|
||||||
if (this.espeakAvailable) {
|
|
||||||
generated = generateViaEspeak(wavPath, text);
|
|
||||||
}
|
|
||||||
if (!generated) {
|
|
||||||
generated = await generateViaKokoro(wavPath, text, voice);
|
|
||||||
}
|
|
||||||
if (!generated) {
|
|
||||||
this.log(`[prompt-cache] failed to generate TTS for "${id}"`);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
this.log(`[prompt-cache] generated WAV for "${id}"`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.registerWav(id, wavPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Load a pre-existing WAV file as a prompt.
|
|
||||||
*/
|
|
||||||
async loadWavPrompt(id: string, wavPath: string): Promise<ICachedPrompt | null> {
|
|
||||||
if (!fs.existsSync(wavPath)) {
|
|
||||||
this.log(`[prompt-cache] WAV not found: ${wavPath}`);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return this.registerWav(id, wavPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a beep tone WAV and cache it.
|
|
||||||
*/
|
|
||||||
async generateBeep(
|
|
||||||
id: string,
|
|
||||||
freqHz = 1000,
|
|
||||||
durationMs = 500,
|
|
||||||
amplitude = 8000,
|
|
||||||
): Promise<ICachedPrompt | null> {
|
|
||||||
fs.mkdirSync(TTS_DIR, { recursive: true });
|
|
||||||
const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`);
|
|
||||||
|
|
||||||
if (!fs.existsSync(wavPath)) {
|
|
||||||
// Generate 16kHz 16-bit mono sine wave WAV.
|
|
||||||
const sampleRate = 16000;
|
|
||||||
const totalSamples = Math.floor((sampleRate * durationMs) / 1000);
|
|
||||||
const pcm = Buffer.alloc(totalSamples * 2);
|
|
||||||
|
|
||||||
for (let i = 0; i < totalSamples; i++) {
|
|
||||||
const t = i / sampleRate;
|
|
||||||
const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade
|
|
||||||
let envelope = 1.0;
|
|
||||||
if (i < fadeLen) envelope = i / fadeLen;
|
|
||||||
else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen;
|
|
||||||
|
|
||||||
const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope);
|
|
||||||
pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write WAV file.
|
|
||||||
const headerSize = 44;
|
|
||||||
const dataSize = pcm.length;
|
|
||||||
const wav = Buffer.alloc(headerSize + dataSize);
|
|
||||||
|
|
||||||
// RIFF header
|
|
||||||
wav.write('RIFF', 0);
|
|
||||||
wav.writeUInt32LE(36 + dataSize, 4);
|
|
||||||
wav.write('WAVE', 8);
|
|
||||||
|
|
||||||
// fmt chunk
|
|
||||||
wav.write('fmt ', 12);
|
|
||||||
wav.writeUInt32LE(16, 16); // chunk size
|
|
||||||
wav.writeUInt16LE(1, 20); // PCM format
|
|
||||||
wav.writeUInt16LE(1, 22); // mono
|
|
||||||
wav.writeUInt32LE(sampleRate, 24);
|
|
||||||
wav.writeUInt32LE(sampleRate * 2, 28); // byte rate
|
|
||||||
wav.writeUInt16LE(2, 32); // block align
|
|
||||||
wav.writeUInt16LE(16, 34); // bits per sample
|
|
||||||
|
|
||||||
// data chunk
|
|
||||||
wav.write('data', 36);
|
|
||||||
wav.writeUInt32LE(dataSize, 40);
|
|
||||||
pcm.copy(wav, 44);
|
|
||||||
|
|
||||||
fs.writeFileSync(wavPath, wav);
|
|
||||||
this.log(`[prompt-cache] beep WAV generated for "${id}"`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.registerWav(id, wavPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Remove a prompt from the cache. */
|
|
||||||
remove(id: string): void {
|
|
||||||
this.prompts.delete(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Clear all cached prompts. */
|
|
||||||
clear(): void {
|
|
||||||
this.prompts.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
|
||||||
// Internal
|
|
||||||
// -------------------------------------------------------------------------
|
|
||||||
|
|
||||||
private registerWav(id: string, wavPath: string): ICachedPrompt {
|
|
||||||
const durationMs = getWavDurationMs(wavPath);
|
|
||||||
const prompt: ICachedPrompt = { id, wavPath, durationMs };
|
|
||||||
this.prompts.set(id, prompt);
|
|
||||||
this.log(`[prompt-cache] cached "${id}": ${wavPath} (${(durationMs / 1000).toFixed(1)}s)`);
|
|
||||||
return prompt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -88,7 +88,7 @@ type TProxyCommands = {
|
|||||||
result: Record<string, never>;
|
result: Record<string, never>;
|
||||||
};
|
};
|
||||||
generate_tts: {
|
generate_tts: {
|
||||||
params: { model: string; voices: string; voice: string; text: string; output: string };
|
params: { model: string; voices: string; voice: string; text: string; output: string; cacheable?: boolean };
|
||||||
result: { output: string };
|
result: { output: string };
|
||||||
};
|
};
|
||||||
// WebRTC signaling — bridged from the browser via the TS control plane.
|
// WebRTC signaling — bridged from the browser via the TS control plane.
|
||||||
|
|||||||
@@ -24,8 +24,6 @@ import {
|
|||||||
getAllBrowserDeviceIds,
|
getAllBrowserDeviceIds,
|
||||||
getBrowserDeviceWs,
|
getBrowserDeviceWs,
|
||||||
} from './webrtcbridge.ts';
|
} from './webrtcbridge.ts';
|
||||||
import { initAnnouncement } from './announcement.ts';
|
|
||||||
import { PromptCache } from './call/prompt-cache.ts';
|
|
||||||
import { VoiceboxManager } from './voicebox.ts';
|
import { VoiceboxManager } from './voicebox.ts';
|
||||||
import {
|
import {
|
||||||
initProxyEngine,
|
initProxyEngine,
|
||||||
@@ -170,7 +168,6 @@ for (const d of appConfig.devices) {
|
|||||||
// Initialize subsystems
|
// Initialize subsystems
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
const promptCache = new PromptCache(log);
|
|
||||||
const voiceboxManager = new VoiceboxManager(log);
|
const voiceboxManager = new VoiceboxManager(log);
|
||||||
voiceboxManager.init(appConfig.voiceboxes ?? []);
|
voiceboxManager.init(appConfig.voiceboxes ?? []);
|
||||||
|
|
||||||
@@ -519,6 +516,8 @@ async function startProxyEngine(): Promise<void> {
|
|||||||
providers: appConfig.providers,
|
providers: appConfig.providers,
|
||||||
devices: appConfig.devices,
|
devices: appConfig.devices,
|
||||||
routing: appConfig.routing,
|
routing: appConfig.routing,
|
||||||
|
voiceboxes: appConfig.voiceboxes ?? [],
|
||||||
|
ivr: appConfig.ivr,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!configured) {
|
if (!configured) {
|
||||||
@@ -530,31 +529,8 @@ async function startProxyEngine(): Promise<void> {
|
|||||||
const deviceList = appConfig.devices.map((d) => d.displayName).join(', ');
|
const deviceList = appConfig.devices.map((d) => d.displayName).join(', ');
|
||||||
log(`proxy engine started | LAN ${appConfig.proxy.lanIp}:${appConfig.proxy.lanPort} | providers: ${providerList} | devices: ${deviceList}`);
|
log(`proxy engine started | LAN ${appConfig.proxy.lanIp}:${appConfig.proxy.lanPort} | providers: ${providerList} | devices: ${deviceList}`);
|
||||||
|
|
||||||
// Generate TTS audio (WAV files on disk, played by Rust audio_player).
|
// TTS prompts (voicemail greetings, IVR menus) are generated on-demand
|
||||||
try {
|
// by the Rust TTS engine when first needed. No startup pre-generation.
|
||||||
await initAnnouncement(log);
|
|
||||||
|
|
||||||
// Pre-generate prompts.
|
|
||||||
await promptCache.generateBeep('voicemail-beep', 1000, 500, 8000);
|
|
||||||
for (const vb of appConfig.voiceboxes ?? []) {
|
|
||||||
if (!vb.enabled) continue;
|
|
||||||
const promptId = `voicemail-greeting-${vb.id}`;
|
|
||||||
if (vb.greetingWavPath) {
|
|
||||||
await promptCache.loadWavPrompt(promptId, vb.greetingWavPath);
|
|
||||||
} else {
|
|
||||||
const text = vb.greetingText || 'The person you are trying to reach is not available. Please leave a message after the tone.';
|
|
||||||
await promptCache.generatePrompt(promptId, text, vb.greetingVoice || 'af_bella');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (appConfig.ivr?.enabled) {
|
|
||||||
for (const menu of appConfig.ivr.menus) {
|
|
||||||
await promptCache.generatePrompt(`ivr-menu-${menu.id}`, menu.promptText, menu.promptVoice || 'af_bella');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
log(`[startup] prompts cached: ${promptCache.listIds().join(', ') || 'none'}`);
|
|
||||||
} catch (e) {
|
|
||||||
log(`[tts] init failed: ${e}`);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -620,6 +596,8 @@ initWebUi(
|
|||||||
providers: fresh.providers,
|
providers: fresh.providers,
|
||||||
devices: fresh.devices,
|
devices: fresh.devices,
|
||||||
routing: fresh.routing,
|
routing: fresh.routing,
|
||||||
|
voiceboxes: fresh.voiceboxes ?? [],
|
||||||
|
ivr: fresh.ivr,
|
||||||
}).then((ok) => {
|
}).then((ok) => {
|
||||||
if (ok) log('[config] reloaded — proxy engine reconfigured');
|
if (ok) log('[config] reloaded — proxy engine reconfigured');
|
||||||
else log('[config] reload failed — proxy engine rejected config');
|
else log('[config] reload failed — proxy engine rejected config');
|
||||||
|
|||||||
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: 'siprouter',
|
name: 'siprouter',
|
||||||
version: '1.21.0',
|
version: '1.22.0',
|
||||||
description: 'undefined'
|
description: 'undefined'
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user