feat(proxy-engine): add on-demand TTS caching for voicemail and IVR prompts
This commit is contained in:
@@ -50,11 +50,12 @@ struct ProxyEngine {
|
||||
registrar: Registrar,
|
||||
call_mgr: CallManager,
|
||||
rtp_pool: Option<RtpPortPool>,
|
||||
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
||||
out_tx: OutTx,
|
||||
}
|
||||
|
||||
impl ProxyEngine {
|
||||
fn new(out_tx: OutTx) -> Self {
|
||||
fn new(out_tx: OutTx, tts_engine: Arc<Mutex<tts::TtsEngine>>) -> Self {
|
||||
Self {
|
||||
config: None,
|
||||
transport: None,
|
||||
@@ -62,6 +63,7 @@ impl ProxyEngine {
|
||||
registrar: Registrar::new(out_tx.clone()),
|
||||
call_mgr: CallManager::new(out_tx.clone()),
|
||||
rtp_pool: None,
|
||||
tts_engine,
|
||||
out_tx,
|
||||
}
|
||||
}
|
||||
@@ -88,15 +90,16 @@ async fn main() {
|
||||
// Emit ready event.
|
||||
emit_event(&out_tx, "ready", serde_json::json!({}));
|
||||
|
||||
// Shared engine state (SIP side).
|
||||
let engine = Arc::new(Mutex::new(ProxyEngine::new(out_tx.clone())));
|
||||
// TTS engine — separate internal lock, lazy-loads model on first use.
|
||||
let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new()));
|
||||
|
||||
// Shared engine state (SIP side). TTS engine is stored inside so the
|
||||
// SIP packet handler path can reach it for on-demand voicemail/IVR generation.
|
||||
let engine = Arc::new(Mutex::new(ProxyEngine::new(out_tx.clone(), tts_engine)));
|
||||
|
||||
// WebRTC engine — separate lock to avoid deadlock with SIP handlers.
|
||||
let webrtc = Arc::new(Mutex::new(WebRtcEngine::new(out_tx.clone())));
|
||||
|
||||
// TTS engine — separate lock, lazy-loads model on first use.
|
||||
let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new()));
|
||||
|
||||
// Read commands from stdin.
|
||||
let stdin = tokio::io::stdin();
|
||||
let reader = BufReader::new(stdin);
|
||||
@@ -117,12 +120,11 @@ async fn main() {
|
||||
|
||||
let engine = engine.clone();
|
||||
let webrtc = webrtc.clone();
|
||||
let tts_engine = tts_engine.clone();
|
||||
let out_tx = out_tx.clone();
|
||||
|
||||
// Handle commands — some are async, so we spawn.
|
||||
tokio::spawn(async move {
|
||||
handle_command(engine, webrtc, tts_engine, &out_tx, cmd).await;
|
||||
handle_command(engine, webrtc, &out_tx, cmd).await;
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -130,7 +132,6 @@ async fn main() {
|
||||
async fn handle_command(
|
||||
engine: Arc<Mutex<ProxyEngine>>,
|
||||
webrtc: Arc<Mutex<WebRtcEngine>>,
|
||||
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: Command,
|
||||
) {
|
||||
@@ -155,8 +156,8 @@ async fn handle_command(
|
||||
"add_tool_leg" => handle_add_tool_leg(engine, out_tx, &cmd).await,
|
||||
"remove_tool_leg" => handle_remove_tool_leg(engine, out_tx, &cmd).await,
|
||||
"set_leg_metadata" => handle_set_leg_metadata(engine, out_tx, &cmd).await,
|
||||
// TTS command — lock tts_engine only (no SIP/WebRTC contention).
|
||||
"generate_tts" => handle_generate_tts(tts_engine, out_tx, &cmd).await,
|
||||
// TTS command — gets tts_engine from inside ProxyEngine.
|
||||
"generate_tts" => handle_generate_tts(engine, out_tx, &cmd).await,
|
||||
_ => respond_err(out_tx, &cmd.id, &format!("unknown command: {}", cmd.method)),
|
||||
}
|
||||
}
|
||||
@@ -325,8 +326,10 @@ async fn handle_sip_packet(
|
||||
ref registrar,
|
||||
ref mut call_mgr,
|
||||
ref mut rtp_pool,
|
||||
ref tts_engine,
|
||||
..
|
||||
} = *eng;
|
||||
let tts_clone = tts_engine.clone();
|
||||
let rtp_pool = rtp_pool.as_mut().unwrap();
|
||||
let inbound = call_mgr
|
||||
.create_inbound_call(
|
||||
@@ -339,6 +342,7 @@ async fn handle_sip_packet(
|
||||
rtp_pool,
|
||||
socket,
|
||||
public_ip.as_deref(),
|
||||
tts_clone,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -1231,10 +1235,11 @@ async fn handle_set_leg_metadata(
|
||||
|
||||
/// Handle `generate_tts` — synthesize text to a WAV file using Kokoro TTS.
|
||||
async fn handle_generate_tts(
|
||||
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
||||
engine: Arc<Mutex<ProxyEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: &Command,
|
||||
) {
|
||||
let tts_engine = engine.lock().await.tts_engine.clone();
|
||||
let mut tts = tts_engine.lock().await;
|
||||
match tts.generate(&cmd.params).await {
|
||||
Ok(result) => respond_ok(out_tx, &cmd.id, result),
|
||||
|
||||
Reference in New Issue
Block a user