feat(proxy-engine): integrate Kokoro TTS generation into proxy-engine and simplify TypeScript prompt handling to use cached WAV files
This commit is contained in:
@@ -21,6 +21,7 @@ mod rtp;
|
||||
mod sip_leg;
|
||||
mod sip_transport;
|
||||
mod tool_leg;
|
||||
mod tts;
|
||||
mod voicemail;
|
||||
mod webrtc_engine;
|
||||
|
||||
@@ -93,6 +94,9 @@ async fn main() {
|
||||
// WebRTC engine — separate lock to avoid deadlock with SIP handlers.
|
||||
let webrtc = Arc::new(Mutex::new(WebRtcEngine::new(out_tx.clone())));
|
||||
|
||||
// TTS engine — separate lock, lazy-loads model on first use.
|
||||
let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new()));
|
||||
|
||||
// Read commands from stdin.
|
||||
let stdin = tokio::io::stdin();
|
||||
let reader = BufReader::new(stdin);
|
||||
@@ -113,11 +117,12 @@ async fn main() {
|
||||
|
||||
let engine = engine.clone();
|
||||
let webrtc = webrtc.clone();
|
||||
let tts_engine = tts_engine.clone();
|
||||
let out_tx = out_tx.clone();
|
||||
|
||||
// Handle commands — some are async, so we spawn.
|
||||
tokio::spawn(async move {
|
||||
handle_command(engine, webrtc, &out_tx, cmd).await;
|
||||
handle_command(engine, webrtc, tts_engine, &out_tx, cmd).await;
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -125,6 +130,7 @@ async fn main() {
|
||||
async fn handle_command(
|
||||
engine: Arc<Mutex<ProxyEngine>>,
|
||||
webrtc: Arc<Mutex<WebRtcEngine>>,
|
||||
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: Command,
|
||||
) {
|
||||
@@ -150,6 +156,8 @@ async fn handle_command(
|
||||
"add_tool_leg" => handle_add_tool_leg(engine, out_tx, &cmd).await,
|
||||
"remove_tool_leg" => handle_remove_tool_leg(engine, out_tx, &cmd).await,
|
||||
"set_leg_metadata" => handle_set_leg_metadata(engine, out_tx, &cmd).await,
|
||||
// TTS command — lock tts_engine only (no SIP/WebRTC contention).
|
||||
"generate_tts" => handle_generate_tts(tts_engine, out_tx, &cmd).await,
|
||||
_ => respond_err(out_tx, &cmd.id, &format!("unknown command: {}", cmd.method)),
|
||||
}
|
||||
}
|
||||
@@ -1218,3 +1226,16 @@ async fn handle_set_leg_metadata(
|
||||
leg.metadata.insert(key, value);
|
||||
respond_ok(out_tx, &cmd.id, serde_json::json!({}));
|
||||
}
|
||||
|
||||
/// Handle `generate_tts` — synthesize text to a WAV file using Kokoro TTS.
|
||||
async fn handle_generate_tts(
|
||||
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: &Command,
|
||||
) {
|
||||
let mut tts = tts_engine.lock().await;
|
||||
match tts.generate(&cmd.params).await {
|
||||
Ok(result) => respond_ok(out_tx, &cmd.id, result),
|
||||
Err(e) => respond_err(out_tx, &cmd.id, &e),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user