feat(proxy-engine): add on-demand TTS caching for voicemail and IVR prompts
This commit is contained in:
@@ -12,13 +12,16 @@ use crate::mixer::spawn_mixer;
|
||||
use crate::registrar::Registrar;
|
||||
use crate::rtp::RtpPortPool;
|
||||
use crate::sip_leg::{SipLeg, SipLegAction, SipLegConfig};
|
||||
use crate::tts::TtsEngine;
|
||||
use sip_proto::helpers::{build_sdp, generate_call_id, generate_tag, parse_sdp_endpoint, SdpOptions};
|
||||
use sip_proto::message::{ResponseOptions, SipMessage};
|
||||
use sip_proto::rewrite::{rewrite_sdp, rewrite_sip_uri};
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
/// Result of creating an inbound call — carries both the call id and
|
||||
/// whether browsers should be notified (flows from the matched inbound
|
||||
@@ -681,6 +684,7 @@ impl CallManager {
|
||||
rtp_pool: &mut RtpPortPool,
|
||||
socket: &UdpSocket,
|
||||
public_ip: Option<&str>,
|
||||
tts_engine: Arc<Mutex<TtsEngine>>,
|
||||
) -> Option<InboundCallCreated> {
|
||||
let call_id = self.next_call_id();
|
||||
let lan_ip = &config.proxy.lan_ip;
|
||||
@@ -710,10 +714,27 @@ impl CallManager {
|
||||
// - `ring_browsers` is informational only — browsers see a toast but
|
||||
// do not race the SIP device. First-to-answer-wins requires a
|
||||
// multi-leg fork + per-leg CANCEL, which is not built yet.
|
||||
// - `voicemail_box`, `ivr_menu_id`, `no_answer_timeout` are not honored.
|
||||
let route = config.resolve_inbound_route(provider_id, &called_number, &caller_number);
|
||||
let ring_browsers = route.ring_browsers;
|
||||
|
||||
// IVR routing: if the route targets an IVR menu, go there directly.
|
||||
if let Some(ref ivr_menu_id) = route.ivr_menu_id {
|
||||
if let Some(ivr) = &config.ivr {
|
||||
if ivr.enabled {
|
||||
if let Some(menu) = ivr.menus.iter().find(|m| m.id == *ivr_menu_id) {
|
||||
let call_id = self
|
||||
.route_to_ivr(
|
||||
&call_id, invite, from_addr, &caller_number,
|
||||
provider_id, provider_config, config, rtp_pool, socket,
|
||||
public_ip, menu, &tts_engine,
|
||||
)
|
||||
.await?;
|
||||
return Some(InboundCallCreated { call_id, ring_browsers });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pick the first registered device from the matched targets, or fall
|
||||
// back to any-registered-device if the route has no resolved targets.
|
||||
let device_addr = route
|
||||
@@ -726,10 +747,17 @@ impl CallManager {
|
||||
Some(addr) => addr,
|
||||
None => {
|
||||
// No device registered → voicemail.
|
||||
// Resolve greeting WAV on-demand (may trigger TTS generation).
|
||||
let greeting_wav = resolve_greeting_wav(
|
||||
config,
|
||||
route.voicemail_box.as_deref(),
|
||||
&tts_engine,
|
||||
).await;
|
||||
let call_id = self
|
||||
.route_to_voicemail(
|
||||
&call_id, invite, from_addr, &caller_number,
|
||||
provider_id, provider_config, config, rtp_pool, socket, public_ip,
|
||||
greeting_wav,
|
||||
)
|
||||
.await?;
|
||||
return Some(InboundCallCreated { call_id, ring_browsers });
|
||||
@@ -1536,6 +1564,7 @@ impl CallManager {
|
||||
rtp_pool: &mut RtpPortPool,
|
||||
socket: &UdpSocket,
|
||||
public_ip: Option<&str>,
|
||||
greeting_wav: Option<String>,
|
||||
) -> Option<String> {
|
||||
let lan_ip = &config.proxy.lan_ip;
|
||||
let pub_ip = public_ip.unwrap_or(lan_ip.as_str());
|
||||
@@ -1630,8 +1659,6 @@ impl CallManager {
|
||||
.as_millis();
|
||||
let recording_dir = "nogit/voicemail/default".to_string();
|
||||
let recording_path = format!("{recording_dir}/msg-{timestamp}.wav");
|
||||
let greeting_wav = find_greeting_wav();
|
||||
|
||||
let out_tx = self.out_tx.clone();
|
||||
let call_id_owned = call_id.to_string();
|
||||
let caller_owned = caller_number.to_string();
|
||||
@@ -1648,6 +1675,211 @@ impl CallManager {
|
||||
Some(call_id.to_string())
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// IVR routing
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn route_to_ivr(
|
||||
&mut self,
|
||||
call_id: &str,
|
||||
invite: &SipMessage,
|
||||
from_addr: SocketAddr,
|
||||
caller_number: &str,
|
||||
provider_id: &str,
|
||||
provider_config: &ProviderConfig,
|
||||
config: &AppConfig,
|
||||
rtp_pool: &mut RtpPortPool,
|
||||
socket: &UdpSocket,
|
||||
public_ip: Option<&str>,
|
||||
menu: &crate::config::IvrMenuConfig,
|
||||
tts_engine: &Arc<Mutex<TtsEngine>>,
|
||||
) -> Option<String> {
|
||||
let lan_ip = &config.proxy.lan_ip;
|
||||
|
||||
let rtp_alloc = match rtp_pool.allocate().await {
|
||||
Some(a) => a,
|
||||
None => {
|
||||
let resp = SipMessage::create_response(503, "Service Unavailable", invite, None);
|
||||
let _ = socket.send_to(&resp.serialize(), from_addr).await;
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let codec_pt = provider_config.codecs.first().copied().unwrap_or(9);
|
||||
let pub_ip = public_ip.unwrap_or(lan_ip.as_str());
|
||||
|
||||
let sdp = sip_proto::helpers::build_sdp(&sip_proto::helpers::SdpOptions {
|
||||
ip: pub_ip,
|
||||
port: rtp_alloc.port,
|
||||
payload_types: &provider_config.codecs,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let response = SipMessage::create_response(
|
||||
200, "OK", invite,
|
||||
Some(sip_proto::message::ResponseOptions {
|
||||
to_tag: Some(sip_proto::helpers::generate_tag()),
|
||||
contact: Some(format!("<sip:{}:{}>", lan_ip, config.proxy.lan_port)),
|
||||
body: Some(sdp),
|
||||
content_type: Some("application/sdp".to_string()),
|
||||
..Default::default()
|
||||
}),
|
||||
);
|
||||
let _ = socket.send_to(&response.serialize(), from_addr).await;
|
||||
|
||||
let provider_media = if invite.has_sdp_body() {
|
||||
parse_sdp_endpoint(&invite.body)
|
||||
.and_then(|ep| format!("{}:{}", ep.address, ep.port).parse().ok())
|
||||
} else {
|
||||
Some(from_addr)
|
||||
};
|
||||
let provider_media = provider_media.unwrap_or(from_addr);
|
||||
|
||||
// Create call with IVR state.
|
||||
let (mixer_cmd_tx, mixer_task) = spawn_mixer(call_id.to_string(), self.out_tx.clone());
|
||||
let mut call = Call::new(
|
||||
call_id.to_string(),
|
||||
CallDirection::Inbound,
|
||||
provider_id.to_string(),
|
||||
mixer_cmd_tx.clone(),
|
||||
mixer_task,
|
||||
);
|
||||
call.state = CallState::Ivr;
|
||||
call.caller_number = Some(caller_number.to_string());
|
||||
|
||||
let provider_leg_id = format!("{call_id}-prov");
|
||||
call.legs.insert(
|
||||
provider_leg_id.clone(),
|
||||
LegInfo {
|
||||
id: provider_leg_id.clone(),
|
||||
kind: LegKind::SipProvider,
|
||||
state: LegState::Connected,
|
||||
codec_pt,
|
||||
sip_leg: None,
|
||||
sip_call_id: Some(invite.call_id().to_string()),
|
||||
webrtc_session_id: None,
|
||||
rtp_socket: Some(rtp_alloc.socket.clone()),
|
||||
rtp_port: rtp_alloc.port,
|
||||
public_ip: public_ip.map(|s| s.to_string()),
|
||||
remote_media: Some(provider_media),
|
||||
signaling_addr: Some(from_addr),
|
||||
metadata: HashMap::new(),
|
||||
},
|
||||
);
|
||||
|
||||
self.sip_index.insert(
|
||||
invite.call_id().to_string(),
|
||||
(call_id.to_string(), provider_leg_id.clone()),
|
||||
);
|
||||
self.calls.insert(call_id.to_string(), call);
|
||||
|
||||
// Emit leg_added for the provider leg.
|
||||
if let Some(call) = self.calls.get(call_id) {
|
||||
for leg in call.legs.values() {
|
||||
emit_leg_added_event(&self.out_tx, call_id, leg);
|
||||
}
|
||||
}
|
||||
|
||||
// Generate IVR prompt on-demand via TTS (cached).
|
||||
let voice = menu.prompt_voice.as_deref().unwrap_or("af_bella");
|
||||
let prompt_output = format!(".nogit/tts/ivr-menu-{}.wav", menu.id);
|
||||
let prompt_params = serde_json::json!({
|
||||
"model": ".nogit/tts/kokoro-v1.0.onnx",
|
||||
"voices": ".nogit/tts/voices.bin",
|
||||
"voice": voice,
|
||||
"text": &menu.prompt_text,
|
||||
"output": &prompt_output,
|
||||
"cacheable": true,
|
||||
});
|
||||
|
||||
let prompt_wav = {
|
||||
let mut tts = tts_engine.lock().await;
|
||||
match tts.generate(&prompt_params).await {
|
||||
Ok(_) => Some(prompt_output),
|
||||
Err(e) => {
|
||||
eprintln!("[ivr] TTS generation failed: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Load prompt and run interaction via the mixer.
|
||||
let out_tx = self.out_tx.clone();
|
||||
let call_id_owned = call_id.to_string();
|
||||
let expected_digits: Vec<char> = menu
|
||||
.entries
|
||||
.iter()
|
||||
.filter_map(|e| e.digit.chars().next())
|
||||
.collect();
|
||||
let timeout_ms = menu.timeout_sec.unwrap_or(5) * 1000;
|
||||
|
||||
tokio::spawn(async move {
|
||||
// Load prompt PCM frames if available.
|
||||
let prompt_frames = prompt_wav.as_ref().and_then(|wav| {
|
||||
crate::audio_player::load_prompt_pcm_frames(wav).ok()
|
||||
});
|
||||
|
||||
if let Some(frames) = prompt_frames {
|
||||
let (result_tx, result_rx) = tokio::sync::oneshot::channel();
|
||||
let _ = mixer_cmd_tx
|
||||
.send(crate::mixer::MixerCommand::StartInteraction {
|
||||
leg_id: provider_leg_id.clone(),
|
||||
prompt_pcm_frames: frames,
|
||||
expected_digits: expected_digits.clone(),
|
||||
timeout_ms,
|
||||
result_tx,
|
||||
})
|
||||
.await;
|
||||
|
||||
// Wait for digit or timeout.
|
||||
let safety = tokio::time::Duration::from_millis(timeout_ms as u64 + 30000);
|
||||
let result = match tokio::time::timeout(safety, result_rx).await {
|
||||
Ok(Ok(r)) => r,
|
||||
Ok(Err(_)) => crate::mixer::InteractionResult::Cancelled,
|
||||
Err(_) => crate::mixer::InteractionResult::Timeout,
|
||||
};
|
||||
|
||||
match &result {
|
||||
crate::mixer::InteractionResult::Digit(d) => {
|
||||
eprintln!("[ivr] caller pressed '{d}' on call {call_id_owned}");
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"ivr_digit",
|
||||
serde_json::json!({
|
||||
"call_id": call_id_owned,
|
||||
"digit": d.to_string(),
|
||||
}),
|
||||
);
|
||||
}
|
||||
crate::mixer::InteractionResult::Timeout => {
|
||||
eprintln!("[ivr] timeout on call {call_id_owned}");
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"ivr_timeout",
|
||||
serde_json::json!({ "call_id": call_id_owned }),
|
||||
);
|
||||
}
|
||||
crate::mixer::InteractionResult::Cancelled => {
|
||||
eprintln!("[ivr] cancelled on call {call_id_owned}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("[ivr] no prompt available for call {call_id_owned}, ending");
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"ivr_error",
|
||||
serde_json::json!({
|
||||
"call_id": call_id_owned,
|
||||
"error": "no prompt available",
|
||||
}),
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
Some(call_id.to_string())
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// -----------------------------------------------------------------------
|
||||
@@ -1662,13 +1894,56 @@ impl CallManager {
|
||||
}
|
||||
}
|
||||
|
||||
fn find_greeting_wav() -> Option<String> {
|
||||
let candidates = [
|
||||
/// Resolve the greeting WAV for a voicemail box.
|
||||
///
|
||||
/// Priority:
|
||||
/// 1. Pre-recorded WAV from voicebox config (`greetingWavPath`)
|
||||
/// 2. On-demand TTS generation from greeting text (cached via `cacheable: true`)
|
||||
/// 3. Legacy hardcoded paths (`.nogit/voicemail/default/greeting.wav`, etc.)
|
||||
/// 4. None — voicemail session plays beep only
|
||||
async fn resolve_greeting_wav(
|
||||
config: &AppConfig,
|
||||
voicebox_id: Option<&str>,
|
||||
tts_engine: &Arc<Mutex<TtsEngine>>,
|
||||
) -> Option<String> {
|
||||
// 1. Look up voicebox config.
|
||||
let vb = voicebox_id
|
||||
.and_then(|id| config.voiceboxes.iter().find(|v| v.id == id && v.enabled));
|
||||
|
||||
if let Some(vb) = vb {
|
||||
// 2. Pre-recorded WAV takes priority.
|
||||
if let Some(ref wav) = vb.greeting_wav_path {
|
||||
if Path::new(wav).exists() {
|
||||
return Some(wav.clone());
|
||||
}
|
||||
}
|
||||
// 3. TTS on-demand with caching.
|
||||
let text = vb.greeting_text.as_deref().unwrap_or(
|
||||
"The person you are trying to reach is not available. Please leave a message after the tone.",
|
||||
);
|
||||
let voice = vb.greeting_voice.as_deref().unwrap_or("af_bella");
|
||||
let output = format!(".nogit/tts/voicemail-greeting-{}.wav", vb.id);
|
||||
|
||||
let params = serde_json::json!({
|
||||
"model": ".nogit/tts/kokoro-v1.0.onnx",
|
||||
"voices": ".nogit/tts/voices.bin",
|
||||
"voice": voice,
|
||||
"text": text,
|
||||
"output": &output,
|
||||
"cacheable": true,
|
||||
});
|
||||
let mut tts = tts_engine.lock().await;
|
||||
if tts.generate(¶ms).await.is_ok() {
|
||||
return Some(output);
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Fallback: legacy hardcoded paths.
|
||||
for path in &[
|
||||
".nogit/voicemail/default/greeting.wav",
|
||||
".nogit/voicemail/greeting.wav",
|
||||
];
|
||||
for path in &candidates {
|
||||
if std::path::Path::new(path).exists() {
|
||||
] {
|
||||
if Path::new(path).exists() {
|
||||
return Some(path.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user