feat(mixer): enhance mixer functionality with interaction and tool legs

- Updated mixer to handle participant and isolated leg roles, allowing for IVR and consent interactions.
- Introduced commands for starting and canceling interactions, managing tool legs for recording and transcription.
- Implemented per-source audio handling for tool legs, enabling separate audio processing.
- Enhanced DTMF handling to forward events between participant legs only.
- Added support for PCM recording directly from tool legs, with WAV file generation.
- Updated TypeScript definitions and functions to support new interaction and tool leg features.
This commit is contained in:
2026-04-10 14:54:21 +00:00
parent 6a130db7c7
commit 7d59361352
13 changed files with 1448 additions and 94 deletions

View File

@@ -20,6 +20,7 @@ mod registrar;
mod rtp;
mod sip_leg;
mod sip_transport;
mod tool_leg;
mod voicemail;
mod webrtc_engine;
@@ -141,6 +142,11 @@ async fn handle_command(
"webrtc_close" => handle_webrtc_close(webrtc, out_tx, &cmd).await,
// webrtc_link needs both: engine (for mixer channels) and webrtc (for session).
"webrtc_link" => handle_webrtc_link(engine, webrtc, out_tx, &cmd).await,
// Leg interaction and tool leg commands.
"start_interaction" => handle_start_interaction(engine, out_tx, &cmd).await,
"add_tool_leg" => handle_add_tool_leg(engine, out_tx, &cmd).await,
"remove_tool_leg" => handle_remove_tool_leg(engine, out_tx, &cmd).await,
"set_leg_metadata" => handle_set_leg_metadata(engine, out_tx, &cmd).await,
_ => respond_err(out_tx, &cmd.id, &format!("unknown command: {}", cmd.method)),
}
}
@@ -373,11 +379,14 @@ async fn handle_sip_packet(
);
if let Some(route) = route_result {
let public_ip = if let Some(ps_arc) = eng.provider_mgr.find_by_address(&from_addr).await {
// Look up provider state by config ID (not by device address).
let (public_ip, registered_aor) = if let Some(ps_arc) =
eng.provider_mgr.find_by_provider_id(&route.provider.id).await
{
let ps = ps_arc.lock().await;
ps.public_ip.clone()
(ps.public_ip.clone(), ps.registered_aor.clone())
} else {
None
(None, format!("sip:{}@{}", route.provider.username, route.provider.domain))
};
let ProxyEngine {
@@ -387,7 +396,7 @@ async fn handle_sip_packet(
} = *eng;
let rtp_pool = rtp_pool.as_mut().unwrap();
let call_id = call_mgr
.create_outbound_passthrough(
.create_device_outbound_call(
&msg,
from_addr,
&route.provider,
@@ -395,6 +404,7 @@ async fn handle_sip_packet(
rtp_pool,
socket,
public_ip.as_deref(),
&registered_aor,
)
.await;
@@ -645,6 +655,7 @@ async fn handle_webrtc_link(
rtp_port: 0,
remote_media: None,
signaling_addr: None,
metadata: std::collections::HashMap::new(),
},
);
}
@@ -773,3 +784,319 @@ async fn handle_webrtc_close(webrtc: Arc<Mutex<WebRtcEngine>>, out_tx: &OutTx, c
Err(e) => respond_err(out_tx, &cmd.id, &e),
}
}
// ---------------------------------------------------------------------------
// Leg interaction & tool leg commands
// ---------------------------------------------------------------------------
/// Handle `start_interaction` — isolate a leg, play a prompt, collect DTMF.
/// This command blocks until the interaction completes (digit, timeout, or cancel).
async fn handle_start_interaction(
engine: Arc<Mutex<ProxyEngine>>,
out_tx: &OutTx,
cmd: &Command,
) {
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
};
let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
};
let prompt_wav = match cmd.params.get("prompt_wav").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing prompt_wav"); return; }
};
let expected_digits: Vec<char> = cmd
.params
.get("expected_digits")
.and_then(|v| v.as_str())
.unwrap_or("12")
.chars()
.collect();
let timeout_ms = cmd
.params
.get("timeout_ms")
.and_then(|v| v.as_u64())
.unwrap_or(15000) as u32;
// Load prompt audio from WAV file.
let prompt_frames = match crate::audio_player::load_prompt_pcm_frames(&prompt_wav) {
Ok(f) => f,
Err(e) => {
respond_err(out_tx, &cmd.id, &format!("prompt load failed: {e}"));
return;
}
};
// Create oneshot channel for the result.
let (result_tx, result_rx) = tokio::sync::oneshot::channel();
// Send StartInteraction to the mixer.
{
let eng = engine.lock().await;
let call = match eng.call_mgr.calls.get(&call_id) {
Some(c) => c,
None => {
respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
return;
}
};
let _ = call
.mixer_cmd_tx
.send(crate::mixer::MixerCommand::StartInteraction {
leg_id: leg_id.clone(),
prompt_pcm_frames: prompt_frames,
expected_digits: expected_digits.clone(),
timeout_ms,
result_tx,
})
.await;
} // engine lock released — we block on the oneshot, not the lock.
// Await the interaction result (blocks this task until complete).
let safety_timeout = tokio::time::Duration::from_millis(timeout_ms as u64 + 30000);
let result = match tokio::time::timeout(safety_timeout, result_rx).await {
Ok(Ok(r)) => r,
Ok(Err(_)) => crate::mixer::InteractionResult::Cancelled, // oneshot dropped
Err(_) => crate::mixer::InteractionResult::Timeout, // safety timeout
};
// Store consent result in leg metadata.
let (result_str, digit_str) = match &result {
crate::mixer::InteractionResult::Digit(d) => ("digit", Some(d.to_string())),
crate::mixer::InteractionResult::Timeout => ("timeout", None),
crate::mixer::InteractionResult::Cancelled => ("cancelled", None),
};
{
let mut eng = engine.lock().await;
if let Some(call) = eng.call_mgr.calls.get_mut(&call_id) {
if let Some(leg) = call.legs.get_mut(&leg_id) {
leg.metadata.insert(
"last_interaction_result".to_string(),
serde_json::json!(result_str),
);
if let Some(ref d) = digit_str {
leg.metadata.insert(
"last_interaction_digit".to_string(),
serde_json::json!(d),
);
}
}
}
}
let mut resp = serde_json::json!({ "result": result_str });
if let Some(d) = digit_str {
resp["digit"] = serde_json::json!(d);
}
respond_ok(out_tx, &cmd.id, resp);
}
/// Handle `add_tool_leg` — add a recording or transcription tool leg to a call.
async fn handle_add_tool_leg(
engine: Arc<Mutex<ProxyEngine>>,
out_tx: &OutTx,
cmd: &Command,
) {
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
};
let tool_type_str = match cmd.params.get("tool_type").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing tool_type"); return; }
};
let tool_type = match tool_type_str.as_str() {
"recording" => crate::mixer::ToolType::Recording,
"transcription" => crate::mixer::ToolType::Transcription,
other => {
respond_err(out_tx, &cmd.id, &format!("unknown tool_type: {other}"));
return;
}
};
let tool_leg_id = format!("{call_id}-tool-{}", rand::random::<u32>());
// Spawn the appropriate background task.
let (audio_tx, _task_handle) = match tool_type {
crate::mixer::ToolType::Recording => {
let base_dir = cmd
.params
.get("config")
.and_then(|c| c.get("base_dir"))
.and_then(|v| v.as_str())
.unwrap_or(".nogit/recordings")
.to_string();
crate::tool_leg::spawn_recording_tool(
tool_leg_id.clone(),
call_id.clone(),
base_dir,
out_tx.clone(),
)
}
crate::mixer::ToolType::Transcription => {
crate::tool_leg::spawn_transcription_tool(
tool_leg_id.clone(),
call_id.clone(),
out_tx.clone(),
)
}
};
// Send AddToolLeg to the mixer and register in call.
{
let mut eng = engine.lock().await;
let call = match eng.call_mgr.calls.get_mut(&call_id) {
Some(c) => c,
None => {
respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
return;
}
};
let _ = call
.mixer_cmd_tx
.send(crate::mixer::MixerCommand::AddToolLeg {
leg_id: tool_leg_id.clone(),
tool_type,
audio_tx,
})
.await;
// Register tool leg in the call's leg map.
let mut metadata = std::collections::HashMap::new();
metadata.insert(
"tool_type".to_string(),
serde_json::json!(tool_type_str),
);
call.legs.insert(
tool_leg_id.clone(),
crate::call::LegInfo {
id: tool_leg_id.clone(),
kind: crate::call::LegKind::Tool,
state: crate::call::LegState::Connected,
codec_pt: 0,
sip_leg: None,
sip_call_id: None,
webrtc_session_id: None,
rtp_socket: None,
rtp_port: 0,
remote_media: None,
signaling_addr: None,
metadata,
},
);
}
emit_event(
out_tx,
"leg_added",
serde_json::json!({
"call_id": call_id,
"leg_id": tool_leg_id,
"kind": "tool",
"tool_type": tool_type_str,
"state": "connected",
}),
);
respond_ok(
out_tx,
&cmd.id,
serde_json::json!({ "tool_leg_id": tool_leg_id }),
);
}
/// Handle `remove_tool_leg` — remove a tool leg from a call.
async fn handle_remove_tool_leg(
engine: Arc<Mutex<ProxyEngine>>,
out_tx: &OutTx,
cmd: &Command,
) {
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
};
let tool_leg_id = match cmd.params.get("tool_leg_id").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing tool_leg_id"); return; }
};
let mut eng = engine.lock().await;
let call = match eng.call_mgr.calls.get_mut(&call_id) {
Some(c) => c,
None => {
respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
return;
}
};
// Remove from mixer (drops audio_tx → background task finalizes).
let _ = call
.mixer_cmd_tx
.send(crate::mixer::MixerCommand::RemoveToolLeg {
leg_id: tool_leg_id.clone(),
})
.await;
// Remove from call's leg map.
call.legs.remove(&tool_leg_id);
emit_event(
out_tx,
"leg_removed",
serde_json::json!({
"call_id": call_id,
"leg_id": tool_leg_id,
}),
);
respond_ok(out_tx, &cmd.id, serde_json::json!({}));
}
/// Handle `set_leg_metadata` — set a metadata key on a leg.
async fn handle_set_leg_metadata(
engine: Arc<Mutex<ProxyEngine>>,
out_tx: &OutTx,
cmd: &Command,
) {
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
};
let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
};
let key = match cmd.params.get("key").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => { respond_err(out_tx, &cmd.id, "missing key"); return; }
};
let value = match cmd.params.get("value") {
Some(v) => v.clone(),
None => { respond_err(out_tx, &cmd.id, "missing value"); return; }
};
let mut eng = engine.lock().await;
let call = match eng.call_mgr.calls.get_mut(&call_id) {
Some(c) => c,
None => {
respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
return;
}
};
let leg = match call.legs.get_mut(&leg_id) {
Some(l) => l,
None => {
respond_err(out_tx, &cmd.id, &format!("leg {leg_id} not found"));
return;
}
};
leg.metadata.insert(key, value);
respond_ok(out_tx, &cmd.id, serde_json::json!({}));
}