feat(proxy-engine): integrate Kokoro TTS generation into proxy-engine and simplify TypeScript prompt handling to use cached WAV files

This commit is contained in:
2026-04-10 15:21:44 +00:00
parent c9ae747c95
commit 66112091a2
18 changed files with 340 additions and 1202 deletions

View File

@@ -21,6 +21,7 @@ mod rtp;
mod sip_leg;
mod sip_transport;
mod tool_leg;
mod tts;
mod voicemail;
mod webrtc_engine;
@@ -93,6 +94,9 @@ async fn main() {
// WebRTC engine — separate lock to avoid deadlock with SIP handlers.
let webrtc = Arc::new(Mutex::new(WebRtcEngine::new(out_tx.clone())));
// TTS engine — separate lock, lazy-loads model on first use.
let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new()));
// Read commands from stdin.
let stdin = tokio::io::stdin();
let reader = BufReader::new(stdin);
@@ -113,11 +117,12 @@ async fn main() {
let engine = engine.clone();
let webrtc = webrtc.clone();
let tts_engine = tts_engine.clone();
let out_tx = out_tx.clone();
// Handle commands — some are async, so we spawn.
tokio::spawn(async move {
handle_command(engine, webrtc, &out_tx, cmd).await;
handle_command(engine, webrtc, tts_engine, &out_tx, cmd).await;
});
}
}
@@ -125,6 +130,7 @@ async fn main() {
async fn handle_command(
engine: Arc<Mutex<ProxyEngine>>,
webrtc: Arc<Mutex<WebRtcEngine>>,
tts_engine: Arc<Mutex<tts::TtsEngine>>,
out_tx: &OutTx,
cmd: Command,
) {
@@ -150,6 +156,8 @@ async fn handle_command(
"add_tool_leg" => handle_add_tool_leg(engine, out_tx, &cmd).await,
"remove_tool_leg" => handle_remove_tool_leg(engine, out_tx, &cmd).await,
"set_leg_metadata" => handle_set_leg_metadata(engine, out_tx, &cmd).await,
// TTS command — lock tts_engine only (no SIP/WebRTC contention).
"generate_tts" => handle_generate_tts(tts_engine, out_tx, &cmd).await,
_ => respond_err(out_tx, &cmd.id, &format!("unknown command: {}", cmd.method)),
}
}
@@ -1218,3 +1226,16 @@ async fn handle_set_leg_metadata(
leg.metadata.insert(key, value);
respond_ok(out_tx, &cmd.id, serde_json::json!({}));
}
/// Handle `generate_tts` — synthesize text to a WAV file using Kokoro TTS.
async fn handle_generate_tts(
tts_engine: Arc<Mutex<tts::TtsEngine>>,
out_tx: &OutTx,
cmd: &Command,
) {
let mut tts = tts_engine.lock().await;
match tts.generate(&cmd.params).await {
Ok(result) => respond_ok(out_tx, &cmd.id, result),
Err(e) => respond_err(out_tx, &cmd.id, &e),
}
}