v1.17.0

feat(proxy-engine): upgrade the internal audio bus to 48kHz f32 with per-leg denoising and improve SIP leg routing
v1.16.0
2026-04-10 15:58:41 +00:00 · 2026-04-10 15:58:41 +00:00 · 2026-04-10 15:21:44 +00:00 · 2026-04-10 15:21:44 +00:00 · 2026-04-10 15:12:30 +00:00 · 2026-04-10 15:12:30 +00:00
32 changed files with 4443 additions and 1923 deletions
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,46 @@
 # Changelog

+## 2026-04-10 - 1.17.0 - feat(proxy-engine)
+upgrade the internal audio bus to 48kHz f32 with per-leg denoising and improve SIP leg routing
+
+- switch mixer, prompt playback, and tool leg audio handling from 16kHz i16 to 48kHz f32 for higher-quality internal processing
+- add f32 decode/encode and resampling support plus standalone RNNoise denoiser creation in codec-lib
+- apply per-leg inbound noise suppression in the mixer before mix-minus generation
+- fix passthrough call routing by matching the actual leg from the signaling source address when Call-IDs are shared
+- correct dialed number extraction from bare SIP request URIs by parsing the user part directly
+
+## 2026-04-10 - 1.16.0 - feat(proxy-engine)
+integrate Kokoro TTS generation into proxy-engine and simplify TypeScript prompt handling to use cached WAV files
+
+- adds a generate_tts command to proxy-engine with lazy-loaded Kokoro model support and WAV output generation
+- removes standalone opus-codec and tts-engine workspace binaries by consolidating TTS generation into proxy-engine
+- updates announcement and prompt cache flows to generate and cache WAV files on disk instead of pre-encoding RTP frames in TypeScript
+
+## 2026-04-10 - 1.15.0 - feat(proxy-engine)
+add device leg, leg transfer, and leg replacement call controls
+
+- adds proxy-engine commands and call manager support for inviting a registered SIP device into an active call
+- supports transferring an existing leg between calls while preserving the active connection and updating mixer routing
+- supports replacing a call leg by removing the current leg and dialing a new outbound destination
+- wires the frontend add-leg API and TypeScript bridge to the new device leg and leg control commands
+
+## 2026-04-10 - 1.14.0 - feat(proxy-engine)
+add multiparty call mixing with dynamic SIP and WebRTC leg management
+
+- replace passthrough call handling with a mixer-backed call model that tracks multiple legs and exposes leg status in call state output
+- add mixer and leg I/O infrastructure to bridge SIP RTP and WebRTC audio through channel-based mix-minus processing
+- introduce add_leg and remove_leg proxy commands and wire frontend bridge APIs to manage external call legs
+- emit leg lifecycle events for observability and mark unimplemented device-leg and transfer HTTP endpoints with 501 responses
+
+## 2026-04-10 - 1.13.0 - feat(proxy-engine,webrtc)
+add B2BUA SIP leg handling and WebRTC call bridging for outbound calls
+
+- introduce a new SipLeg module to manage outbound provider dialogs, including INVITE lifecycle, digest auth retries, ACK handling, media endpoint tracking, and termination
+- store outbound dashboard calls as B2BUA calls in the call manager and emit provider media details on call_answered for bridge setup
+- separate SIP and WebRTC engine locking to avoid contention and deadlocks while linking sessions to call RTP sockets
+- add bidirectional RTP bridging between provider SIP media and browser WebRTC audio using the allocated RTP socket
+- wire browser webrtc-accept events in the frontend and sipproxy so session-to-call linking can occur when media and acceptance arrive in either order
+
 ## 2026-04-10 - 1.12.0 - feat(proxy-engine)
 add Rust-based outbound calling, WebRTC bridging, and voicemail handling

--- a/nogit/voicemail/default/msg-1775825168199.wav
+++ b/nogit/voicemail/default/msg-1775825168199.wav
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "siprouter",
-  "version": "1.12.0",
+  "version": "1.17.0",
  "private": true,
  "type": "module",
  "scripts": {
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -1881,16 +1881,6 @@ dependencies = [
 "vcpkg",
 ]

-[[package]]
-name = "opus-codec"
-version = "0.2.0"
-dependencies = [
- "base64 0.22.1",
- "codec-lib",
- "serde",
- "serde_json",
-]
-
 [[package]]
 name = "ort"
 version = "2.0.0-rc.11"
@@ -2188,6 +2178,9 @@ dependencies = [
 "base64 0.22.1",
 "codec-lib",
 "hound",
+ "kokoro-tts",
+ "nnnoiseless",
+ "ort",
 "rand 0.8.5",
 "regex-lite",
 "serde",
@@ -3008,16 +3001,6 @@ dependencies = [
 "strength_reduce",
 ]

-[[package]]
-name = "tts-engine"
-version = "0.1.0"
-dependencies = [
- "hound",
- "kokoro-tts",
- "ort",
- "tokio",
-]
-
 [[package]]
 name = "turn"
 version = "0.6.1"
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -1,8 +1,6 @@
 [workspace]
 members = [
    "crates/codec-lib",
-    "crates/opus-codec",
-    "crates/tts-engine",
    "crates/sip-proto",
    "crates/proxy-engine",
 ]
--- a/rust/crates/codec-lib/src/lib.rs
+++ b/rust/crates/codec-lib/src/lib.rs
@@ -1,7 +1,7 @@
 //! Audio codec library for the SIP router.
 //!
 //! Handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding with ML noise suppression.
-//! Used by both the standalone `opus-codec` CLI and the `proxy-engine` binary.
+//! Used by the `proxy-engine` binary for all audio transcoding.

 use audiopus::coder::{Decoder as OpusDecoder, Encoder as OpusEncoder};
 use audiopus::packet::Packet as OpusPacket;
@@ -104,6 +104,8 @@ pub struct TranscodeState {
    g722_dec: libg722::decoder::Decoder,
    /// Cached FFT resamplers keyed by (from_rate, to_rate, chunk_size).
    resamplers: HashMap<(u32, u32, usize), FftFixedIn<f64>>,
+    /// Cached f32 FFT resamplers keyed by (from_rate, to_rate, chunk_size).
+    resamplers_f32: HashMap<(u32, u32, usize), FftFixedIn<f32>>,
    /// ML noise suppression for the SIP-bound direction.
    denoiser_to_sip: Box<DenoiseState<'static>>,
    /// ML noise suppression for the browser-bound direction.
@@ -133,6 +135,7 @@ impl TranscodeState {
            g722_enc,
            g722_dec,
            resamplers: HashMap::new(),
+            resamplers_f32: HashMap::new(),
            denoiser_to_sip: DenoiseState::new(),
            denoiser_to_browser: DenoiseState::new(),
        })
@@ -293,6 +296,86 @@ impl TranscodeState {
            _ => Err(format!("unsupported target PT {pt}")),
        }
    }
+
+    // ---- f32 API for high-quality internal bus ----------------------------
+
+    /// Decode an encoded audio payload to f32 PCM samples in [-1.0, 1.0].
+    /// Returns (samples, sample_rate).
+    pub fn decode_to_f32(&mut self, data: &[u8], pt: u8) -> Result<(Vec<f32>, u32), String> {
+        let (pcm_i16, rate) = self.decode_to_pcm(data, pt)?;
+        let pcm_f32 = pcm_i16.iter().map(|&s| s as f32 / 32768.0).collect();
+        Ok((pcm_f32, rate))
+    }
+
+    /// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
+    pub fn encode_from_f32(&mut self, pcm: &[f32], pt: u8) -> Result<Vec<u8>, String> {
+        let pcm_i16: Vec<i16> = pcm
+            .iter()
+            .map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
+            .collect();
+        self.encode_from_pcm(&pcm_i16, pt)
+    }
+
+    /// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
+    /// Uses a separate cache from the i16 resampler.
+    pub fn resample_f32(
+        &mut self,
+        pcm: &[f32],
+        from_rate: u32,
+        to_rate: u32,
+    ) -> Result<Vec<f32>, String> {
+        if from_rate == to_rate || pcm.is_empty() {
+            return Ok(pcm.to_vec());
+        }
+
+        let chunk = pcm.len();
+        let key = (from_rate, to_rate, chunk);
+
+        if !self.resamplers_f32.contains_key(&key) {
+            let r =
+                FftFixedIn::<f32>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
+                    .map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
+            self.resamplers_f32.insert(key, r);
+        }
+        let resampler = self.resamplers_f32.get_mut(&key).unwrap();
+
+        let input = vec![pcm.to_vec()];
+        let result = resampler
+            .process(&input, None)
+            .map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
+
+        Ok(result[0].clone())
+    }
+
+    /// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.
+    /// Processes in 480-sample (10ms) frames. State persists across calls.
+    /// Operates natively in f32 — no i16 conversion overhead.
+    pub fn denoise_f32(denoiser: &mut DenoiseState, pcm: &[f32]) -> Vec<f32> {
+        let frame_size = DenoiseState::FRAME_SIZE; // 480
+        let total = pcm.len();
+        let whole = (total / frame_size) * frame_size;
+        let mut output = Vec::with_capacity(total);
+        let mut out_buf = [0.0f32; 480];
+
+        // nnnoiseless expects f32 samples scaled as i16 range (-32768..32767).
+        for offset in (0..whole).step_by(frame_size) {
+            let input: Vec<f32> = pcm[offset..offset + frame_size]
+                .iter()
+                .map(|&s| s * 32768.0)
+                .collect();
+            denoiser.process_frame(&mut out_buf, &input);
+            output.extend(out_buf.iter().map(|&s| s / 32768.0));
+        }
+        if whole < total {
+            output.extend_from_slice(&pcm[whole..]);
+        }
+        output
+    }
+}
+
+/// Create a new standalone denoiser for per-leg inbound processing.
+pub fn new_denoiser() -> Box<DenoiseState<'static>> {
+    DenoiseState::new()
 }

 #[cfg(test)]
--- a/rust/crates/opus-codec/Cargo.toml
+++ b/rust/crates/opus-codec/Cargo.toml
@@ -1,14 +0,0 @@
-[package]
-name = "opus-codec"
-version = "0.2.0"
-edition = "2021"
-
-[[bin]]
-name = "opus-codec"
-path = "src/main.rs"
-
-[dependencies]
-codec-lib = { path = "../codec-lib" }
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-base64 = "0.22"
--- a/rust/crates/opus-codec/src/main.rs
+++ b/rust/crates/opus-codec/src/main.rs
@@ -1,286 +0,0 @@
-/// Audio transcoding bridge for smartrust.
-///
-/// Thin CLI wrapper around `codec-lib`. Handles Opus ↔ G.722 ↔ PCMU transcoding.
-///
-/// Protocol:
-///   -> {"id":"1","method":"init","params":{}}
-///   <- {"id":"1","success":true,"result":{}}
-///   -> {"id":"2","method":"create_session","params":{"session_id":"call-abc"}}
-///   <- {"id":"2","success":true,"result":{}}
-///   -> {"id":"3","method":"transcode","params":{"session_id":"call-abc","data_b64":"...","from_pt":111,"to_pt":9}}
-///   <- {"id":"3","success":true,"result":{"data_b64":"..."}}
-///   -> {"id":"4","method":"destroy_session","params":{"session_id":"call-abc"}}
-///   <- {"id":"4","success":true,"result":{}}
-
-use base64::engine::general_purpose::STANDARD as B64;
-use base64::Engine as _;
-use codec_lib::{codec_sample_rate, TranscodeState};
-use serde::Deserialize;
-use std::collections::HashMap;
-use std::io::{self, BufRead, Write};
-
-#[derive(Deserialize)]
-struct Request {
-    id: String,
-    method: String,
-    #[serde(default)]
-    params: serde_json::Value,
-}
-
-fn respond(
-    out: &mut impl Write,
-    id: &str,
-    success: bool,
-    result: Option<serde_json::Value>,
-    error: Option<&str>,
-) {
-    let mut resp = serde_json::json!({ "id": id, "success": success });
-    if let Some(r) = result {
-        resp["result"] = r;
-    }
-    if let Some(e) = error {
-        resp["error"] = serde_json::Value::String(e.to_string());
-    }
-    let _ = writeln!(out, "{}", resp);
-    let _ = out.flush();
-}
-
-/// Resolve a session: if session_id is provided, look it up in the sessions map;
-/// otherwise fall back to the default state (backward compat with `init`).
-fn get_session<'a>(
-    sessions: &'a mut HashMap<String, TranscodeState>,
-    default: &'a mut Option<TranscodeState>,
-    params: &serde_json::Value,
-) -> Option<&'a mut TranscodeState> {
-    if let Some(sid) = params.get("session_id").and_then(|v| v.as_str()) {
-        sessions.get_mut(sid)
-    } else {
-        default.as_mut()
-    }
-}
-
-fn main() {
-    let stdin = io::stdin();
-    let stdout = io::stdout();
-    let mut out = io::BufWriter::new(stdout.lock());
-
-    let _ = writeln!(out, r#"{{"event":"ready","data":{{}}}}"#);
-    let _ = out.flush();
-
-    let mut default_state: Option<TranscodeState> = None;
-    let mut sessions: HashMap<String, TranscodeState> = HashMap::new();
-
-    for line in stdin.lock().lines() {
-        let line = match line {
-            Ok(l) if !l.trim().is_empty() => l,
-            Ok(_) => continue,
-            Err(_) => break,
-        };
-
-        let req: Request = match serde_json::from_str(&line) {
-            Ok(r) => r,
-            Err(e) => {
-                respond(&mut out, "", false, None, Some(&format!("parse: {e}")));
-                continue;
-            }
-        };
-
-        match req.method.as_str() {
-            "init" => match TranscodeState::new() {
-                Ok(s) => {
-                    default_state = Some(s);
-                    respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
-                }
-                Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
-            },
-
-            "create_session" => {
-                let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
-                    Some(s) => s.to_string(),
-                    None => {
-                        respond(&mut out, &req.id, false, None, Some("missing session_id"));
-                        continue;
-                    }
-                };
-                if sessions.contains_key(&session_id) {
-                    respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
-                    continue;
-                }
-                match TranscodeState::new() {
-                    Ok(s) => {
-                        sessions.insert(session_id, s);
-                        respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
-                    }
-                    Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
-                }
-            }
-
-            "destroy_session" => {
-                let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
-                    Some(s) => s,
-                    None => {
-                        respond(&mut out, &req.id, false, None, Some("missing session_id"));
-                        continue;
-                    }
-                };
-                sessions.remove(session_id);
-                respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
-            }
-
-            "transcode" => {
-                let st = match get_session(&mut sessions, &mut default_state, &req.params) {
-                    Some(s) => s,
-                    None => {
-                        respond(
-                            &mut out,
-                            &req.id,
-                            false,
-                            None,
-                            Some("not initialized (no session or default state)"),
-                        );
-                        continue;
-                    }
-                };
-                let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
-                    Some(s) => s,
-                    None => {
-                        respond(&mut out, &req.id, false, None, Some("missing data_b64"));
-                        continue;
-                    }
-                };
-                let from_pt =
-                    req.params.get("from_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
-                let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
-                let direction = req.params.get("direction").and_then(|v| v.as_str());
-
-                let data = match B64.decode(data_b64) {
-                    Ok(b) => b,
-                    Err(e) => {
-                        respond(
-                            &mut out,
-                            &req.id,
-                            false,
-                            None,
-                            Some(&format!("b64: {e}")),
-                        );
-                        continue;
-                    }
-                };
-
-                match st.transcode(&data, from_pt, to_pt, direction) {
-                    Ok(result) => {
-                        respond(
-                            &mut out,
-                            &req.id,
-                            true,
-                            Some(serde_json::json!({ "data_b64": B64.encode(&result) })),
-                            None,
-                        );
-                    }
-                    Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
-                }
-            }
-
-            "encode_pcm" => {
-                let st = match get_session(&mut sessions, &mut default_state, &req.params) {
-                    Some(s) => s,
-                    None => {
-                        respond(
-                            &mut out,
-                            &req.id,
-                            false,
-                            None,
-                            Some("not initialized (no session or default state)"),
-                        );
-                        continue;
-                    }
-                };
-                let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
-                    Some(s) => s,
-                    None => {
-                        respond(&mut out, &req.id, false, None, Some("missing data_b64"));
-                        continue;
-                    }
-                };
-                let sample_rate = req
-                    .params
-                    .get("sample_rate")
-                    .and_then(|v| v.as_u64())
-                    .unwrap_or(22050) as u32;
-                let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(9) as u8;
-
-                let data = match B64.decode(data_b64) {
-                    Ok(b) => b,
-                    Err(e) => {
-                        respond(
-                            &mut out,
-                            &req.id,
-                            false,
-                            None,
-                            Some(&format!("b64: {e}")),
-                        );
-                        continue;
-                    }
-                };
-
-                if data.len() % 2 != 0 {
-                    respond(
-                        &mut out,
-                        &req.id,
-                        false,
-                        None,
-                        Some("PCM data has odd byte count (expected 16-bit LE samples)"),
-                    );
-                    continue;
-                }
-
-                let pcm: Vec<i16> = data
-                    .chunks_exact(2)
-                    .map(|c| i16::from_le_bytes([c[0], c[1]]))
-                    .collect();
-
-                let target_rate = codec_sample_rate(to_pt);
-                let resampled = match st.resample(&pcm, sample_rate, target_rate) {
-                    Ok(r) => r,
-                    Err(e) => {
-                        respond(&mut out, &req.id, false, None, Some(&e));
-                        continue;
-                    }
-                };
-
-                match st.encode_from_pcm(&resampled, to_pt) {
-                    Ok(encoded) => {
-                        respond(
-                            &mut out,
-                            &req.id,
-                            true,
-                            Some(serde_json::json!({ "data_b64": B64.encode(&encoded) })),
-                            None,
-                        );
-                    }
-                    Err(e) => {
-                        respond(&mut out, &req.id, false, None, Some(&e));
-                    }
-                }
-            }
-
-            "encode" | "decode" => {
-                respond(
-                    &mut out,
-                    &req.id,
-                    false,
-                    None,
-                    Some("use 'transcode' command instead"),
-                );
-            }
-
-            _ => respond(
-                &mut out,
-                &req.id,
-                false,
-                None,
-                Some(&format!("unknown: {}", req.method)),
-            ),
-        }
-    }
-}
--- a/rust/crates/proxy-engine/Cargo.toml
+++ b/rust/crates/proxy-engine/Cargo.toml
@@ -10,6 +10,7 @@ path = "src/main.rs"
 [dependencies]
 codec-lib = { path = "../codec-lib" }
 sip-proto = { path = "../sip-proto" }
+nnnoiseless = { version = "0.5", default-features = false }
 tokio = { version = "1", features = ["full"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
@@ -18,3 +19,8 @@ regex-lite = "0.1"
 webrtc = "0.8"
 rand = "0.8"
 hound = "3.5"
+kokoro-tts = { version = "0.3", default-features = false }
+ort = { version = "=2.0.0-rc.11", default-features = false, features = [
+    "std", "download-binaries", "copy-dylibs", "ndarray",
+    "tls-native-vendored"
+] }
--- a/rust/crates/proxy-engine/src/audio_player.rs
+++ b/rust/crates/proxy-engine/src/audio_player.rs
@@ -1,4 +1,5 @@
 //! Audio player — reads a WAV file and streams it as RTP packets.
+//! Also provides prompt preparation for the leg interaction system.

 use crate::rtp::{build_rtp_header, rtp_clock_increment};
 use codec_lib::{codec_sample_rate, TranscodeState};
@@ -8,6 +9,11 @@ use std::sync::Arc;
 use tokio::net::UdpSocket;
 use tokio::time::{self, Duration};

+/// Mixing sample rate used by the mixer (must stay in sync with mixer::MIX_RATE).
+const MIX_RATE: u32 = 48000;
+/// Samples per 20ms frame at the mixing rate.
+const MIX_FRAME_SIZE: usize = 960;
+
 /// Play a WAV file as RTP to a destination.
 /// Returns when playback is complete.
 pub async fn play_wav_file(
@@ -171,3 +177,64 @@ pub async fn play_beep(

    Ok((seq, ts))
 }
+
+/// Load a WAV file and split it into 20ms f32 PCM frames at 48kHz.
+/// Used by the leg interaction system to prepare prompt audio for the mixer.
+pub fn load_prompt_pcm_frames(wav_path: &str) -> Result<Vec<Vec<f32>>, String> {
+    let path = Path::new(wav_path);
+    if !path.exists() {
+        return Err(format!("WAV file not found: {wav_path}"));
+    }
+
+    let mut reader =
+        hound::WavReader::open(path).map_err(|e| format!("open WAV {wav_path}: {e}"))?;
+    let spec = reader.spec();
+    let wav_rate = spec.sample_rate;
+
+    // Read all samples as f32 in [-1.0, 1.0].
+    let samples: Vec<f32> = if spec.bits_per_sample == 16 {
+        reader
+            .samples::<i16>()
+            .filter_map(|s| s.ok())
+            .map(|s| s as f32 / 32768.0)
+            .collect()
+    } else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
+        reader
+            .samples::<f32>()
+            .filter_map(|s| s.ok())
+            .collect()
+    } else {
+        return Err(format!(
+            "unsupported WAV format: {}bit {:?}",
+            spec.bits_per_sample, spec.sample_format
+        ));
+    };
+
+    if samples.is_empty() {
+        return Ok(vec![]);
+    }
+
+    // Resample to MIX_RATE (48kHz) if needed.
+    let resampled = if wav_rate != MIX_RATE {
+        let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
+        transcoder
+            .resample_f32(&samples, wav_rate, MIX_RATE)
+            .map_err(|e| format!("resample: {e}"))?
+    } else {
+        samples
+    };
+
+    // Split into MIX_FRAME_SIZE (960) sample frames.
+    let mut frames = Vec::new();
+    let mut offset = 0;
+    while offset < resampled.len() {
+        let end = (offset + MIX_FRAME_SIZE).min(resampled.len());
+        let mut frame = resampled[offset..end].to_vec();
+        // Pad short final frame with silence.
+        frame.resize(MIX_FRAME_SIZE, 0.0);
+        frames.push(frame);
+        offset += MIX_FRAME_SIZE;
+    }
+
+    Ok(frames)
+}
--- a/rust/crates/proxy-engine/src/call.rs
+++ b/rust/crates/proxy-engine/src/call.rs
@@ -1,12 +1,20 @@
-//! Call hub — owns legs and bridges media.
+//! Call hub — owns N legs and a mixer task.
 //!
-//! Each Call has a unique ID and tracks its state, direction, and associated
-//! SIP Call-IDs for message routing.
+//! Every call has a central mixer that provides mix-minus audio to all
+//! participants. Legs can be added and removed dynamically mid-call.

+use crate::mixer::{MixerCommand, RtpPacket};
+use crate::sip_leg::SipLeg;
+use sip_proto::message::SipMessage;
+use std::collections::HashMap;
 use std::net::SocketAddr;
 use std::sync::Arc;
 use std::time::Instant;
 use tokio::net::UdpSocket;
+use tokio::sync::mpsc;
+use tokio::task::JoinHandle;
+
+pub type LegId = String;

 /// Call state machine.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -15,8 +23,6 @@ pub enum CallState {
    Ringing,
    Connected,
    Voicemail,
-    Ivr,
-    Terminating,
    Terminated,
 }

@@ -27,8 +33,6 @@ impl CallState {
            Self::Ringing => "ringing",
            Self::Connected => "connected",
            Self::Voicemail => "voicemail",
-            Self::Ivr => "ivr",
-            Self::Terminating => "terminating",
            Self::Terminated => "terminated",
        }
    }
@@ -49,43 +53,191 @@ impl CallDirection {
    }
 }

-/// A passthrough call — both sides share the same SIP Call-ID.
-/// The proxy rewrites SDP/Contact/Request-URI and relays RTP.
-pub struct PassthroughCall {
+/// The type of a call leg.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum LegKind {
+    SipProvider,
+    SipDevice,
+    WebRtc,
+    Media, // voicemail playback, IVR, recording
+    Tool,  // observer leg for recording, transcription, etc.
+}
+
+impl LegKind {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::SipProvider => "sip-provider",
+            Self::SipDevice => "sip-device",
+            Self::WebRtc => "webrtc",
+            Self::Media => "media",
+            Self::Tool => "tool",
+        }
+    }
+}
+
+/// Per-leg state.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum LegState {
+    Inviting,
+    Ringing,
+    Connected,
+    Terminated,
+}
+
+impl LegState {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Inviting => "inviting",
+            Self::Ringing => "ringing",
+            Self::Connected => "connected",
+            Self::Terminated => "terminated",
+        }
+    }
+}
+
+/// Information about a single leg in a call.
+pub struct LegInfo {
+    pub id: LegId,
+    pub kind: LegKind,
+    pub state: LegState,
+    pub codec_pt: u8,
+
+    /// For SIP legs: the SIP dialog manager (handles 407 auth, BYE, etc).
+    pub sip_leg: Option<SipLeg>,
+    /// For SIP legs: the SIP Call-ID for message routing.
+    pub sip_call_id: Option<String>,
+    /// For WebRTC legs: the session ID in WebRtcEngine.
+    pub webrtc_session_id: Option<String>,
+    /// The RTP socket allocated for this leg.
+    pub rtp_socket: Option<Arc<UdpSocket>>,
+    /// The RTP port number.
+    pub rtp_port: u16,
+    /// The remote media endpoint (learned from SDP or address learning).
+    pub remote_media: Option<SocketAddr>,
+    /// SIP signaling address (provider or device).
+    pub signaling_addr: Option<SocketAddr>,
+
+    /// Flexible key-value metadata (consent state, tool config, etc.).
+    /// Persisted into call history on call end.
+    pub metadata: HashMap<String, serde_json::Value>,
+}
+
+/// A multiparty call with N legs and a central mixer.
+pub struct Call {
    pub id: String,
-    pub sip_call_id: String,
    pub state: CallState,
    pub direction: CallDirection,
    pub created_at: Instant,

-    // Call metadata.
+    // Metadata.
    pub caller_number: Option<String>,
    pub callee_number: Option<String>,
    pub provider_id: String,

-    // Provider side.
-    pub provider_addr: SocketAddr,
-    pub provider_media: Option<SocketAddr>,
+    /// Original INVITE from the device (for device-originated outbound calls).
+    /// Used to construct proper 180/200/error responses back to the device.
+    pub device_invite: Option<SipMessage>,

-    // Device side.
-    pub device_addr: SocketAddr,
-    pub device_media: Option<SocketAddr>,
+    /// All legs in this call, keyed by leg ID.
+    pub legs: HashMap<LegId, LegInfo>,

-    // RTP relay.
-    pub rtp_port: u16,
-    pub rtp_socket: Arc<UdpSocket>,
+    /// Channel to send commands to the mixer task.
+    pub mixer_cmd_tx: mpsc::Sender<MixerCommand>,

-    // Packet counters.
-    pub pkt_from_device: u64,
-    pub pkt_from_provider: u64,
+    /// Handle to the mixer task (aborted on call teardown).
+    mixer_task: Option<JoinHandle<()>>,
 }

-impl PassthroughCall {
+impl Call {
+    pub fn new(
+        id: String,
+        direction: CallDirection,
+        provider_id: String,
+        mixer_cmd_tx: mpsc::Sender<MixerCommand>,
+        mixer_task: JoinHandle<()>,
+    ) -> Self {
+        Self {
+            id,
+            state: CallState::SettingUp,
+            direction,
+            created_at: Instant::now(),
+            caller_number: None,
+            callee_number: None,
+            provider_id,
+            device_invite: None,
+            legs: HashMap::new(),
+            mixer_cmd_tx,
+            mixer_task: Some(mixer_task),
+        }
+    }
+
+    /// Add a leg to the mixer. Sends the AddLeg command with channel endpoints.
+    pub async fn add_leg_to_mixer(
+        &self,
+        leg_id: &str,
+        codec_pt: u8,
+        inbound_rx: mpsc::Receiver<RtpPacket>,
+        outbound_tx: mpsc::Sender<Vec<u8>>,
+    ) {
+        let _ = self
+            .mixer_cmd_tx
+            .send(MixerCommand::AddLeg {
+                leg_id: leg_id.to_string(),
+                codec_pt,
+                inbound_rx,
+                outbound_tx,
+            })
+            .await;
+    }
+
+    /// Remove a leg from the mixer.
+    pub async fn remove_leg_from_mixer(&self, leg_id: &str) {
+        let _ = self
+            .mixer_cmd_tx
+            .send(MixerCommand::RemoveLeg {
+                leg_id: leg_id.to_string(),
+            })
+            .await;
+    }
+
    pub fn duration_secs(&self) -> u64 {
        self.created_at.elapsed().as_secs()
    }

+    /// Shut down the mixer and abort its task.
+    pub async fn shutdown_mixer(&mut self) {
+        let _ = self.mixer_cmd_tx.send(MixerCommand::Shutdown).await;
+        if let Some(handle) = self.mixer_task.take() {
+            handle.abort();
+        }
+    }
+
+    /// Produce a JSON status snapshot for the dashboard.
    pub fn to_status_json(&self) -> serde_json::Value {
+        let legs: Vec<serde_json::Value> = self
+            .legs
+            .values()
+            .filter(|l| l.state != LegState::Terminated)
+            .map(|l| {
+                let metadata: serde_json::Value = if l.metadata.is_empty() {
+                    serde_json::json!({})
+                } else {
+                    serde_json::Value::Object(
+                        l.metadata.iter().map(|(k, v)| (k.clone(), v.clone())).collect(),
+                    )
+                };
+                serde_json::json!({
+                    "id": l.id,
+                    "type": l.kind.as_str(),
+                    "state": l.state.as_str(),
+                    "codec": sip_proto::helpers::codec_name(l.codec_pt),
+                    "rtpPort": l.rtp_port,
+                    "remoteMedia": l.remote_media.map(|a| format!("{}:{}", a.ip(), a.port())),
+                    "metadata": metadata,
+                })
+            })
+            .collect();
+
        serde_json::json!({
            "id": self.id,
            "state": self.state.as_str(),
@@ -93,11 +245,8 @@ impl PassthroughCall {
            "callerNumber": self.caller_number,
            "calleeNumber": self.callee_number,
            "providerUsed": self.provider_id,
-            "createdAt": self.created_at.elapsed().as_millis(),
            "duration": self.duration_secs(),
-            "rtpPort": self.rtp_port,
-            "pktFromDevice": self.pkt_from_device,
-            "pktFromProvider": self.pkt_from_provider,
+            "legs": legs,
        })
    }
 }
--- a/rust/crates/proxy-engine/src/call_manager.rs
+++ b/rust/crates/proxy-engine/src/call_manager.rs
--- a/rust/crates/proxy-engine/src/leg_io.rs
+++ b/rust/crates/proxy-engine/src/leg_io.rs
@@ -0,0 +1,82 @@
+//! Leg I/O task spawners.
+//!
+//! Each SIP leg gets two tasks:
+//! - Inbound: recv_from on RTP socket → strip header → send RtpPacket to mixer channel
+//! - Outbound: recv encoded RTP from mixer channel → send_to remote media endpoint
+//!
+//! WebRTC leg I/O is handled inside webrtc_engine.rs (on_track + track.write).
+
+use crate::mixer::RtpPacket;
+use std::net::SocketAddr;
+use std::sync::Arc;
+use tokio::net::UdpSocket;
+use tokio::sync::mpsc;
+
+/// Channel pair for connecting a leg to the mixer.
+pub struct LegChannels {
+    /// Mixer receives decoded packets from this leg.
+    pub inbound_tx: mpsc::Sender<RtpPacket>,
+    pub inbound_rx: mpsc::Receiver<RtpPacket>,
+    /// Mixer sends encoded RTP to this leg.
+    pub outbound_tx: mpsc::Sender<Vec<u8>>,
+    pub outbound_rx: mpsc::Receiver<Vec<u8>>,
+}
+
+/// Create a channel pair for a leg.
+pub fn create_leg_channels() -> LegChannels {
+    let (inbound_tx, inbound_rx) = mpsc::channel::<RtpPacket>(64);
+    let (outbound_tx, outbound_rx) = mpsc::channel::<Vec<u8>>(8);
+    LegChannels {
+        inbound_tx,
+        inbound_rx,
+        outbound_tx,
+        outbound_rx,
+    }
+}
+
+/// Spawn the inbound I/O task for a SIP leg.
+/// Reads RTP from the socket, strips the 12-byte header, sends payload to the mixer.
+/// Returns the JoinHandle (exits when the inbound_tx channel is dropped).
+pub fn spawn_sip_inbound(
+    rtp_socket: Arc<UdpSocket>,
+    inbound_tx: mpsc::Sender<RtpPacket>,
+) -> tokio::task::JoinHandle<()> {
+    tokio::spawn(async move {
+        let mut buf = vec![0u8; 1500];
+        loop {
+            match rtp_socket.recv_from(&mut buf).await {
+                Ok((n, _from)) => {
+                    if n < 12 {
+                        continue; // Too small for RTP header.
+                    }
+                    let pt = buf[1] & 0x7F;
+                    let marker = (buf[1] & 0x80) != 0;
+                    let timestamp = u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]);
+                    let payload = buf[12..n].to_vec();
+                    if payload.is_empty() {
+                        continue;
+                    }
+                    if inbound_tx.send(RtpPacket { payload, payload_type: pt, marker, timestamp }).await.is_err() {
+                        break; // Channel closed — leg removed.
+                    }
+                }
+                Err(_) => break, // Socket error.
+            }
+        }
+    })
+}
+
+/// Spawn the outbound I/O task for a SIP leg.
+/// Reads encoded RTP packets from the mixer and sends them to the remote media endpoint.
+/// Returns the JoinHandle (exits when the outbound_rx channel is closed).
+pub fn spawn_sip_outbound(
+    rtp_socket: Arc<UdpSocket>,
+    remote_media: SocketAddr,
+    mut outbound_rx: mpsc::Receiver<Vec<u8>>,
+) -> tokio::task::JoinHandle<()> {
+    tokio::spawn(async move {
+        while let Some(rtp_data) = outbound_rx.recv().await {
+            let _ = rtp_socket.send_to(&rtp_data, remote_media).await;
+        }
+    })
+}
--- a/rust/crates/proxy-engine/src/main.rs
+++ b/rust/crates/proxy-engine/src/main.rs
@@ -12,11 +12,16 @@ mod call_manager;
 mod config;
 mod dtmf;
 mod ipc;
+mod leg_io;
+mod mixer;
 mod provider;
 mod recorder;
 mod registrar;
 mod rtp;
+mod sip_leg;
 mod sip_transport;
+mod tool_leg;
+mod tts;
 mod voicemail;
 mod webrtc_engine;

@@ -35,14 +40,15 @@ use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
 use tokio::net::UdpSocket;
 use tokio::sync::{mpsc, Mutex};

-/// Shared mutable state for the proxy engine.
+/// Shared mutable state for the proxy engine (SIP side).
+/// WebRTC is intentionally kept in a separate lock to avoid contention
+/// between SIP packet handlers and WebRTC command handlers.
 struct ProxyEngine {
    config: Option<AppConfig>,
    transport: Option<SipTransport>,
    provider_mgr: ProviderManager,
    registrar: Registrar,
    call_mgr: CallManager,
-    webrtc: WebRtcEngine,
    rtp_pool: Option<RtpPortPool>,
    out_tx: OutTx,
 }
@@ -55,7 +61,6 @@ impl ProxyEngine {
            provider_mgr: ProviderManager::new(out_tx.clone()),
            registrar: Registrar::new(out_tx.clone()),
            call_mgr: CallManager::new(out_tx.clone()),
-            webrtc: WebRtcEngine::new(out_tx.clone()),
            rtp_pool: None,
            out_tx,
        }
@@ -83,9 +88,15 @@ async fn main() {
    // Emit ready event.
    emit_event(&out_tx, "ready", serde_json::json!({}));

-    // Shared engine state.
+    // Shared engine state (SIP side).
    let engine = Arc::new(Mutex::new(ProxyEngine::new(out_tx.clone())));

+    // WebRTC engine — separate lock to avoid deadlock with SIP handlers.
+    let webrtc = Arc::new(Mutex::new(WebRtcEngine::new(out_tx.clone())));
+
+    // TTS engine — separate lock, lazy-loads model on first use.
+    let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new()));
+
    // Read commands from stdin.
    let stdin = tokio::io::stdin();
    let reader = BufReader::new(stdin);
@@ -105,25 +116,48 @@ async fn main() {
        };

        let engine = engine.clone();
+        let webrtc = webrtc.clone();
+        let tts_engine = tts_engine.clone();
        let out_tx = out_tx.clone();

        // Handle commands — some are async, so we spawn.
        tokio::spawn(async move {
-            handle_command(engine, &out_tx, cmd).await;
+            handle_command(engine, webrtc, tts_engine, &out_tx, cmd).await;
        });
    }
 }

-async fn handle_command(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: Command) {
+async fn handle_command(
+    engine: Arc<Mutex<ProxyEngine>>,
+    webrtc: Arc<Mutex<WebRtcEngine>>,
+    tts_engine: Arc<Mutex<tts::TtsEngine>>,
+    out_tx: &OutTx,
+    cmd: Command,
+) {
    match cmd.method.as_str() {
+        // SIP commands — lock engine only.
        "configure" => handle_configure(engine, out_tx, &cmd).await,
        "hangup" => handle_hangup(engine, out_tx, &cmd).await,
        "make_call" => handle_make_call(engine, out_tx, &cmd).await,
        "get_status" => handle_get_status(engine, out_tx, &cmd).await,
-        "webrtc_offer" => handle_webrtc_offer(engine, out_tx, &cmd).await,
-        "webrtc_ice" => handle_webrtc_ice(engine, out_tx, &cmd).await,
-        "webrtc_link" => handle_webrtc_link(engine, out_tx, &cmd).await,
-        "webrtc_close" => handle_webrtc_close(engine, out_tx, &cmd).await,
+        "add_leg" => handle_add_leg(engine, out_tx, &cmd).await,
+        "remove_leg" => handle_remove_leg(engine, out_tx, &cmd).await,
+        // WebRTC commands — lock webrtc only (no engine contention).
+        "webrtc_offer" => handle_webrtc_offer(webrtc, out_tx, &cmd).await,
+        "webrtc_ice" => handle_webrtc_ice(webrtc, out_tx, &cmd).await,
+        "webrtc_close" => handle_webrtc_close(webrtc, out_tx, &cmd).await,
+        // webrtc_link needs both: engine (for mixer channels) and webrtc (for session).
+        "webrtc_link" => handle_webrtc_link(engine, webrtc, out_tx, &cmd).await,
+        "add_device_leg" => handle_add_device_leg(engine, out_tx, &cmd).await,
+        "transfer_leg" => handle_transfer_leg(engine, out_tx, &cmd).await,
+        "replace_leg" => handle_replace_leg(engine, out_tx, &cmd).await,
+        // Leg interaction and tool leg commands.
+        "start_interaction" => handle_start_interaction(engine, out_tx, &cmd).await,
+        "add_tool_leg" => handle_add_tool_leg(engine, out_tx, &cmd).await,
+        "remove_tool_leg" => handle_remove_tool_leg(engine, out_tx, &cmd).await,
+        "set_leg_metadata" => handle_set_leg_metadata(engine, out_tx, &cmd).await,
+        // TTS command — lock tts_engine only (no SIP/WebRTC contention).
+        "generate_tts" => handle_generate_tts(tts_engine, out_tx, &cmd).await,
        _ => respond_err(out_tx, &cmd.id, &format!("unknown command: {}", cmd.method)),
    }
 }
@@ -246,14 +280,11 @@ async fn handle_sip_packet(
    }

    // 3. Route to existing call by SIP Call-ID.
-    // Check if this Call-ID belongs to an active call (avoids borrow conflict).
    if eng.call_mgr.has_call(msg.call_id()) {
        let config_ref = eng.config.as_ref().unwrap().clone();
-        // Temporarily take registrar to avoid overlapping borrows.
-        let registrar_dummy = Registrar::new(eng.out_tx.clone());
        if eng
            .call_mgr
-            .route_sip_message(&msg, from_addr, socket, &config_ref, &registrar_dummy)
+            .route_sip_message(&msg, from_addr, socket, &config_ref)
            .await
        {
            return;
@@ -359,11 +390,14 @@ async fn handle_sip_packet(
        );

        if let Some(route) = route_result {
-            let public_ip = if let Some(ps_arc) = eng.provider_mgr.find_by_address(&from_addr).await {
+            // Look up provider state by config ID (not by device address).
+            let (public_ip, registered_aor) = if let Some(ps_arc) =
+                eng.provider_mgr.find_by_provider_id(&route.provider.id).await
+            {
                let ps = ps_arc.lock().await;
-                ps.public_ip.clone()
+                (ps.public_ip.clone(), ps.registered_aor.clone())
            } else {
-                None
+                (None, format!("sip:{}@{}", route.provider.username, route.provider.domain))
            };

            let ProxyEngine {
@@ -373,7 +407,7 @@ async fn handle_sip_packet(
            } = *eng;
            let rtp_pool = rtp_pool.as_mut().unwrap();
            let call_id = call_mgr
-                .create_outbound_passthrough(
+                .create_device_outbound_call(
                    &msg,
                    from_addr,
                    &route.provider,
@@ -381,6 +415,7 @@ async fn handle_sip_packet(
                    rtp_pool,
                    socket,
                    public_ip.as_deref(),
+                    &registered_aor,
                )
                .await;

@@ -524,7 +559,8 @@ async fn handle_hangup(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Co
 }

 /// Handle `webrtc_offer` — browser sends SDP offer, we create PeerConnection and return answer.
-async fn handle_webrtc_offer(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
+/// Uses only the WebRTC lock — no contention with SIP handlers.
+async fn handle_webrtc_offer(webrtc: Arc<Mutex<WebRtcEngine>>, out_tx: &OutTx, cmd: &Command) {
    let session_id = match cmd.params.get("session_id").and_then(|v| v.as_str()) {
        Some(s) => s.to_string(),
        None => { respond_err(out_tx, &cmd.id, "missing session_id"); return; }
@@ -534,8 +570,8 @@ async fn handle_webrtc_offer(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cm
        None => { respond_err(out_tx, &cmd.id, "missing sdp"); return; }
    };

-    let mut eng = engine.lock().await;
-    match eng.webrtc.handle_offer(&session_id, &offer_sdp).await {
+    let mut wrtc = webrtc.lock().await;
+    match wrtc.handle_offer(&session_id, &offer_sdp).await {
        Ok(answer_sdp) => {
            respond_ok(out_tx, &cmd.id, serde_json::json!({
                "session_id": session_id,
@@ -547,7 +583,8 @@ async fn handle_webrtc_offer(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cm
 }

 /// Handle `webrtc_ice` — forward ICE candidate from browser to Rust PeerConnection.
-async fn handle_webrtc_ice(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
+/// Uses only the WebRTC lock.
+async fn handle_webrtc_ice(webrtc: Arc<Mutex<WebRtcEngine>>, out_tx: &OutTx, cmd: &Command) {
    let session_id = match cmd.params.get("session_id").and_then(|v| v.as_str()) {
        Some(s) => s.to_string(),
        None => { respond_err(out_tx, &cmd.id, "missing session_id"); return; }
@@ -556,15 +593,22 @@ async fn handle_webrtc_ice(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd:
    let sdp_mid = cmd.params.get("sdp_mid").and_then(|v| v.as_str());
    let sdp_mline_index = cmd.params.get("sdp_mline_index").and_then(|v| v.as_u64()).map(|v| v as u16);

-    let eng = engine.lock().await;
-    match eng.webrtc.add_ice_candidate(&session_id, candidate, sdp_mid, sdp_mline_index).await {
+    let wrtc = webrtc.lock().await;
+    match wrtc.add_ice_candidate(&session_id, candidate, sdp_mid, sdp_mline_index).await {
        Ok(()) => respond_ok(out_tx, &cmd.id, serde_json::json!({})),
        Err(e) => respond_err(out_tx, &cmd.id, &e),
    }
 }

-/// Handle `webrtc_link` — link a WebRTC session to a SIP call for audio bridging.
-async fn handle_webrtc_link(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
+/// Handle `webrtc_link` — link a WebRTC session to a call's mixer for audio bridging.
+/// Creates channels, adds WebRTC leg to the call, wires the WebRTC engine.
+/// Locks are never held simultaneously — no deadlock possible.
+async fn handle_webrtc_link(
+    engine: Arc<Mutex<ProxyEngine>>,
+    webrtc: Arc<Mutex<WebRtcEngine>>,
+    out_tx: &OutTx,
+    cmd: &Command,
+) {
    let session_id = match cmd.params.get("session_id").and_then(|v| v.as_str()) {
        Some(s) => s.to_string(),
        None => { respond_err(out_tx, &cmd.id, "missing session_id"); return; }
@@ -573,34 +617,68 @@ async fn handle_webrtc_link(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd
        Some(s) => s.to_string(),
        None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
    };
-    let provider_addr = match cmd.params.get("provider_media_addr").and_then(|v| v.as_str()) {
-        Some(s) => s.to_string(),
-        None => { respond_err(out_tx, &cmd.id, "missing provider_media_addr"); return; }
-    };
-    let provider_port = match cmd.params.get("provider_media_port").and_then(|v| v.as_u64()) {
-        Some(p) => p as u16,
-        None => { respond_err(out_tx, &cmd.id, "missing provider_media_port"); return; }
-    };
-    let sip_pt = cmd.params.get("sip_pt").and_then(|v| v.as_u64()).unwrap_or(9) as u8;

-    let provider_media: SocketAddr = match format!("{provider_addr}:{provider_port}").parse() {
-        Ok(a) => a,
-        Err(e) => { respond_err(out_tx, &cmd.id, &format!("bad address: {e}")); return; }
-    };
+    // Create channels for the WebRTC leg.
+    let channels = crate::leg_io::create_leg_channels();

-    let mut eng = engine.lock().await;
-    let sip_socket = match &eng.transport {
-        Some(t) => t.socket(),
-        None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
-    };
+    // Briefly lock engine to add the WebRTC leg to the call's mixer.
+    {
+        let eng = engine.lock().await;
+        let call = match eng.call_mgr.calls.get(&call_id) {
+            Some(c) => c,
+            None => {
+                respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
+                return;
+            }
+        };
+        // Add to mixer via channel.
+        call.add_leg_to_mixer(
+            &session_id,
+            codec_lib::PT_OPUS,
+            channels.inbound_rx,
+            channels.outbound_tx,
+        )
+        .await;
+    } // engine lock released

-    let bridge_info = crate::webrtc_engine::SipBridgeInfo {
-        provider_media,
-        sip_pt,
-        sip_socket,
-    };
+    // Lock webrtc to wire the channels.
+    let mut wrtc = webrtc.lock().await;
+    if wrtc
+        .link_to_mixer(&session_id, &call_id, channels.inbound_tx, channels.outbound_rx)
+        .await
+    {
+        // Also store the WebRTC leg info in the call.
+        drop(wrtc); // Release webrtc lock before re-acquiring engine.
+        {
+            let mut eng = engine.lock().await;
+            if let Some(call) = eng.call_mgr.calls.get_mut(&call_id) {
+                call.legs.insert(
+                    session_id.clone(),
+                    crate::call::LegInfo {
+                        id: session_id.clone(),
+                        kind: crate::call::LegKind::WebRtc,
+                        state: crate::call::LegState::Connected,
+                        codec_pt: codec_lib::PT_OPUS,
+                        sip_leg: None,
+                        sip_call_id: None,
+                        webrtc_session_id: Some(session_id.clone()),
+                        rtp_socket: None,
+                        rtp_port: 0,
+                        remote_media: None,
+                        signaling_addr: None,
+                        metadata: std::collections::HashMap::new(),
+                    },
+                );
+            }
+        }
+
+        emit_event(out_tx, "leg_added", serde_json::json!({
+            "call_id": call_id,
+            "leg_id": session_id,
+            "kind": "webrtc",
+            "state": "connected",
+        }));

-    if eng.webrtc.link_to_sip(&session_id, &call_id, bridge_info).await {
        respond_ok(out_tx, &cmd.id, serde_json::json!({
            "session_id": session_id,
            "call_id": call_id,
@@ -611,16 +689,553 @@ async fn handle_webrtc_link(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd
    }
 }

+/// Handle `add_leg` — add a new SIP leg to an existing call.
+async fn handle_add_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
+    let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
+    };
+    let number = match cmd.params.get("number").and_then(|v| v.as_str()) {
+        Some(n) => n.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing number"); return; }
+    };
+    let provider_id = cmd.params.get("provider_id").and_then(|v| v.as_str());
+
+    let mut eng = engine.lock().await;
+    let config_ref = match &eng.config {
+        Some(c) => c.clone(),
+        None => { respond_err(out_tx, &cmd.id, "not configured"); return; }
+    };
+
+    // Resolve provider.
+    let provider_config = if let Some(pid) = provider_id {
+        config_ref.providers.iter().find(|p| p.id == pid).cloned()
+    } else {
+        config_ref.resolve_outbound_route(&number, None, &|_| true).map(|r| r.provider)
+    };
+
+    let provider_config = match provider_config {
+        Some(p) => p,
+        None => { respond_err(out_tx, &cmd.id, "no provider available"); return; }
+    };
+
+    // Get registered AOR.
+    let registered_aor = if let Some(ps_arc) = eng.provider_mgr.find_by_address(
+        &provider_config.outbound_proxy.to_socket_addr().unwrap_or_else(|| "0.0.0.0:0".parse().unwrap())
+    ).await {
+        let ps = ps_arc.lock().await;
+        ps.registered_aor.clone()
+    } else {
+        format!("sip:{}@{}", provider_config.username, provider_config.domain)
+    };
+
+    let public_ip = if let Some(ps_arc) = eng.provider_mgr.find_by_address(
+        &provider_config.outbound_proxy.to_socket_addr().unwrap_or_else(|| "0.0.0.0:0".parse().unwrap())
+    ).await {
+        let ps = ps_arc.lock().await;
+        ps.public_ip.clone()
+    } else {
+        None
+    };
+
+    let socket = match &eng.transport {
+        Some(t) => t.socket(),
+        None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
+    };
+
+    let ProxyEngine { ref mut call_mgr, ref mut rtp_pool, .. } = *eng;
+    let rtp_pool = rtp_pool.as_mut().unwrap();
+
+    let leg_id = call_mgr.add_external_leg(
+        &call_id, &number, &provider_config, &config_ref,
+        rtp_pool, &socket, public_ip.as_deref(), &registered_aor,
+    ).await;
+
+    match leg_id {
+        Some(lid) => respond_ok(out_tx, &cmd.id, serde_json::json!({ "leg_id": lid })),
+        None => respond_err(out_tx, &cmd.id, "failed to add leg"),
+    }
+}
+
+/// Handle `add_device_leg` — add a local SIP device to an existing call.
+async fn handle_add_device_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
+    let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
+    };
+    let device_id = match cmd.params.get("device_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing device_id"); return; }
+    };
+
+    let mut eng = engine.lock().await;
+    let config_ref = match &eng.config {
+        Some(c) => c.clone(),
+        None => { respond_err(out_tx, &cmd.id, "not configured"); return; }
+    };
+    let socket = match &eng.transport {
+        Some(t) => t.socket(),
+        None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
+    };
+
+    let ProxyEngine { ref registrar, ref mut call_mgr, ref mut rtp_pool, .. } = *eng;
+    let rtp_pool = rtp_pool.as_mut().unwrap();
+
+    let leg_id = call_mgr.add_device_leg(
+        &call_id, &device_id, registrar, &config_ref, rtp_pool, &socket,
+    ).await;
+
+    match leg_id {
+        Some(lid) => respond_ok(out_tx, &cmd.id, serde_json::json!({ "leg_id": lid })),
+        None => respond_err(out_tx, &cmd.id, "failed to add device leg — device not registered or call not found"),
+    }
+}
+
+/// Handle `transfer_leg` — move a leg from one call to another.
+async fn handle_transfer_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
+    let source_call_id = match cmd.params.get("source_call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing source_call_id"); return; }
+    };
+    let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
+    };
+    let target_call_id = match cmd.params.get("target_call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing target_call_id"); return; }
+    };
+
+    let mut eng = engine.lock().await;
+    if eng.call_mgr.transfer_leg(&source_call_id, &leg_id, &target_call_id).await {
+        respond_ok(out_tx, &cmd.id, serde_json::json!({}));
+    } else {
+        respond_err(out_tx, &cmd.id, "transfer failed — call or leg not found");
+    }
+}
+
+/// Handle `replace_leg` — terminate a leg and dial a replacement into the same call.
+async fn handle_replace_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
+    let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
+    };
+    let old_leg_id = match cmd.params.get("old_leg_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing old_leg_id"); return; }
+    };
+    let number = match cmd.params.get("number").and_then(|v| v.as_str()) {
+        Some(n) => n.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing number"); return; }
+    };
+    let provider_id = cmd.params.get("provider_id").and_then(|v| v.as_str());
+
+    let mut eng = engine.lock().await;
+    let config_ref = match &eng.config {
+        Some(c) => c.clone(),
+        None => { respond_err(out_tx, &cmd.id, "not configured"); return; }
+    };
+    let socket = match &eng.transport {
+        Some(t) => t.socket(),
+        None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
+    };
+
+    // Resolve provider.
+    let provider_config = if let Some(pid) = provider_id {
+        config_ref.providers.iter().find(|p| p.id == pid).cloned()
+    } else {
+        config_ref.resolve_outbound_route(&number, None, &|_| true).map(|r| r.provider)
+    };
+    let provider_config = match provider_config {
+        Some(p) => p,
+        None => { respond_err(out_tx, &cmd.id, "no provider available"); return; }
+    };
+
+    let (public_ip, registered_aor) = if let Some(ps_arc) = eng.provider_mgr.find_by_provider_id(&provider_config.id).await {
+        let ps = ps_arc.lock().await;
+        (ps.public_ip.clone(), ps.registered_aor.clone())
+    } else {
+        (None, format!("sip:{}@{}", provider_config.username, provider_config.domain))
+    };
+
+    let ProxyEngine { ref mut call_mgr, ref mut rtp_pool, .. } = *eng;
+    let rtp_pool = rtp_pool.as_mut().unwrap();
+
+    let new_leg_id = call_mgr.replace_leg(
+        &call_id, &old_leg_id, &number, &provider_config, &config_ref,
+        rtp_pool, &socket, public_ip.as_deref(), &registered_aor,
+    ).await;
+
+    match new_leg_id {
+        Some(lid) => respond_ok(out_tx, &cmd.id, serde_json::json!({ "new_leg_id": lid })),
+        None => respond_err(out_tx, &cmd.id, "replace failed — call ended or dial failed"),
+    }
+}
+
+/// Handle `remove_leg` — remove a leg from a call.
+async fn handle_remove_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
+    let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
+    };
+    let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
+    };
+
+    let mut eng = engine.lock().await;
+    let socket = match &eng.transport {
+        Some(t) => t.socket(),
+        None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
+    };
+
+    if eng.call_mgr.remove_leg(&call_id, &leg_id, &socket).await {
+        respond_ok(out_tx, &cmd.id, serde_json::json!({}));
+    } else {
+        respond_err(out_tx, &cmd.id, &format!("call/leg not found"));
+    }
+}
+
 /// Handle `webrtc_close` — close a WebRTC session.
-async fn handle_webrtc_close(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
+/// Uses only the WebRTC lock.
+async fn handle_webrtc_close(webrtc: Arc<Mutex<WebRtcEngine>>, out_tx: &OutTx, cmd: &Command) {
    let session_id = match cmd.params.get("session_id").and_then(|v| v.as_str()) {
        Some(s) => s.to_string(),
        None => { respond_err(out_tx, &cmd.id, "missing session_id"); return; }
    };

-    let mut eng = engine.lock().await;
-    match eng.webrtc.close_session(&session_id).await {
+    let mut wrtc = webrtc.lock().await;
+    match wrtc.close_session(&session_id).await {
        Ok(()) => respond_ok(out_tx, &cmd.id, serde_json::json!({})),
        Err(e) => respond_err(out_tx, &cmd.id, &e),
    }
 }
+
+// ---------------------------------------------------------------------------
+// Leg interaction & tool leg commands
+// ---------------------------------------------------------------------------
+
+/// Handle `start_interaction` — isolate a leg, play a prompt, collect DTMF.
+/// This command blocks until the interaction completes (digit, timeout, or cancel).
+async fn handle_start_interaction(
+    engine: Arc<Mutex<ProxyEngine>>,
+    out_tx: &OutTx,
+    cmd: &Command,
+) {
+    let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
+    };
+    let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
+    };
+    let prompt_wav = match cmd.params.get("prompt_wav").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing prompt_wav"); return; }
+    };
+    let expected_digits: Vec<char> = cmd
+        .params
+        .get("expected_digits")
+        .and_then(|v| v.as_str())
+        .unwrap_or("12")
+        .chars()
+        .collect();
+    let timeout_ms = cmd
+        .params
+        .get("timeout_ms")
+        .and_then(|v| v.as_u64())
+        .unwrap_or(15000) as u32;
+
+    // Load prompt audio from WAV file.
+    let prompt_frames = match crate::audio_player::load_prompt_pcm_frames(&prompt_wav) {
+        Ok(f) => f,
+        Err(e) => {
+            respond_err(out_tx, &cmd.id, &format!("prompt load failed: {e}"));
+            return;
+        }
+    };
+
+    // Create oneshot channel for the result.
+    let (result_tx, result_rx) = tokio::sync::oneshot::channel();
+
+    // Send StartInteraction to the mixer.
+    {
+        let eng = engine.lock().await;
+        let call = match eng.call_mgr.calls.get(&call_id) {
+            Some(c) => c,
+            None => {
+                respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
+                return;
+            }
+        };
+        let _ = call
+            .mixer_cmd_tx
+            .send(crate::mixer::MixerCommand::StartInteraction {
+                leg_id: leg_id.clone(),
+                prompt_pcm_frames: prompt_frames,
+                expected_digits: expected_digits.clone(),
+                timeout_ms,
+                result_tx,
+            })
+            .await;
+    } // engine lock released — we block on the oneshot, not the lock.
+
+    // Await the interaction result (blocks this task until complete).
+    let safety_timeout = tokio::time::Duration::from_millis(timeout_ms as u64 + 30000);
+    let result = match tokio::time::timeout(safety_timeout, result_rx).await {
+        Ok(Ok(r)) => r,
+        Ok(Err(_)) => crate::mixer::InteractionResult::Cancelled, // oneshot dropped
+        Err(_) => crate::mixer::InteractionResult::Timeout,       // safety timeout
+    };
+
+    // Store consent result in leg metadata.
+    let (result_str, digit_str) = match &result {
+        crate::mixer::InteractionResult::Digit(d) => ("digit", Some(d.to_string())),
+        crate::mixer::InteractionResult::Timeout => ("timeout", None),
+        crate::mixer::InteractionResult::Cancelled => ("cancelled", None),
+    };
+
+    {
+        let mut eng = engine.lock().await;
+        if let Some(call) = eng.call_mgr.calls.get_mut(&call_id) {
+            if let Some(leg) = call.legs.get_mut(&leg_id) {
+                leg.metadata.insert(
+                    "last_interaction_result".to_string(),
+                    serde_json::json!(result_str),
+                );
+                if let Some(ref d) = digit_str {
+                    leg.metadata.insert(
+                        "last_interaction_digit".to_string(),
+                        serde_json::json!(d),
+                    );
+                }
+            }
+        }
+    }
+
+    let mut resp = serde_json::json!({ "result": result_str });
+    if let Some(d) = digit_str {
+        resp["digit"] = serde_json::json!(d);
+    }
+    respond_ok(out_tx, &cmd.id, resp);
+}
+
+/// Handle `add_tool_leg` — add a recording or transcription tool leg to a call.
+async fn handle_add_tool_leg(
+    engine: Arc<Mutex<ProxyEngine>>,
+    out_tx: &OutTx,
+    cmd: &Command,
+) {
+    let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
+    };
+    let tool_type_str = match cmd.params.get("tool_type").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing tool_type"); return; }
+    };
+
+    let tool_type = match tool_type_str.as_str() {
+        "recording" => crate::mixer::ToolType::Recording,
+        "transcription" => crate::mixer::ToolType::Transcription,
+        other => {
+            respond_err(out_tx, &cmd.id, &format!("unknown tool_type: {other}"));
+            return;
+        }
+    };
+
+    let tool_leg_id = format!("{call_id}-tool-{}", rand::random::<u32>());
+
+    // Spawn the appropriate background task.
+    let (audio_tx, _task_handle) = match tool_type {
+        crate::mixer::ToolType::Recording => {
+            let base_dir = cmd
+                .params
+                .get("config")
+                .and_then(|c| c.get("base_dir"))
+                .and_then(|v| v.as_str())
+                .unwrap_or(".nogit/recordings")
+                .to_string();
+            crate::tool_leg::spawn_recording_tool(
+                tool_leg_id.clone(),
+                call_id.clone(),
+                base_dir,
+                out_tx.clone(),
+            )
+        }
+        crate::mixer::ToolType::Transcription => {
+            crate::tool_leg::spawn_transcription_tool(
+                tool_leg_id.clone(),
+                call_id.clone(),
+                out_tx.clone(),
+            )
+        }
+    };
+
+    // Send AddToolLeg to the mixer and register in call.
+    {
+        let mut eng = engine.lock().await;
+        let call = match eng.call_mgr.calls.get_mut(&call_id) {
+            Some(c) => c,
+            None => {
+                respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
+                return;
+            }
+        };
+
+        let _ = call
+            .mixer_cmd_tx
+            .send(crate::mixer::MixerCommand::AddToolLeg {
+                leg_id: tool_leg_id.clone(),
+                tool_type,
+                audio_tx,
+            })
+            .await;
+
+        // Register tool leg in the call's leg map.
+        let mut metadata = std::collections::HashMap::new();
+        metadata.insert(
+            "tool_type".to_string(),
+            serde_json::json!(tool_type_str),
+        );
+        call.legs.insert(
+            tool_leg_id.clone(),
+            crate::call::LegInfo {
+                id: tool_leg_id.clone(),
+                kind: crate::call::LegKind::Tool,
+                state: crate::call::LegState::Connected,
+                codec_pt: 0,
+                sip_leg: None,
+                sip_call_id: None,
+                webrtc_session_id: None,
+                rtp_socket: None,
+                rtp_port: 0,
+                remote_media: None,
+                signaling_addr: None,
+                metadata,
+            },
+        );
+    }
+
+    emit_event(
+        out_tx,
+        "leg_added",
+        serde_json::json!({
+            "call_id": call_id,
+            "leg_id": tool_leg_id,
+            "kind": "tool",
+            "tool_type": tool_type_str,
+            "state": "connected",
+        }),
+    );
+
+    respond_ok(
+        out_tx,
+        &cmd.id,
+        serde_json::json!({ "tool_leg_id": tool_leg_id }),
+    );
+}
+
+/// Handle `remove_tool_leg` — remove a tool leg from a call.
+async fn handle_remove_tool_leg(
+    engine: Arc<Mutex<ProxyEngine>>,
+    out_tx: &OutTx,
+    cmd: &Command,
+) {
+    let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
+    };
+    let tool_leg_id = match cmd.params.get("tool_leg_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing tool_leg_id"); return; }
+    };
+
+    let mut eng = engine.lock().await;
+    let call = match eng.call_mgr.calls.get_mut(&call_id) {
+        Some(c) => c,
+        None => {
+            respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
+            return;
+        }
+    };
+
+    // Remove from mixer (drops audio_tx → background task finalizes).
+    let _ = call
+        .mixer_cmd_tx
+        .send(crate::mixer::MixerCommand::RemoveToolLeg {
+            leg_id: tool_leg_id.clone(),
+        })
+        .await;
+
+    // Remove from call's leg map.
+    call.legs.remove(&tool_leg_id);
+
+    emit_event(
+        out_tx,
+        "leg_removed",
+        serde_json::json!({
+            "call_id": call_id,
+            "leg_id": tool_leg_id,
+        }),
+    );
+
+    respond_ok(out_tx, &cmd.id, serde_json::json!({}));
+}
+
+/// Handle `set_leg_metadata` — set a metadata key on a leg.
+async fn handle_set_leg_metadata(
+    engine: Arc<Mutex<ProxyEngine>>,
+    out_tx: &OutTx,
+    cmd: &Command,
+) {
+    let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
+    };
+    let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
+    };
+    let key = match cmd.params.get("key").and_then(|v| v.as_str()) {
+        Some(s) => s.to_string(),
+        None => { respond_err(out_tx, &cmd.id, "missing key"); return; }
+    };
+    let value = match cmd.params.get("value") {
+        Some(v) => v.clone(),
+        None => { respond_err(out_tx, &cmd.id, "missing value"); return; }
+    };
+
+    let mut eng = engine.lock().await;
+    let call = match eng.call_mgr.calls.get_mut(&call_id) {
+        Some(c) => c,
+        None => {
+            respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
+            return;
+        }
+    };
+    let leg = match call.legs.get_mut(&leg_id) {
+        Some(l) => l,
+        None => {
+            respond_err(out_tx, &cmd.id, &format!("leg {leg_id} not found"));
+            return;
+        }
+    };
+
+    leg.metadata.insert(key, value);
+    respond_ok(out_tx, &cmd.id, serde_json::json!({}));
+}
+
+/// Handle `generate_tts` — synthesize text to a WAV file using Kokoro TTS.
+async fn handle_generate_tts(
+    tts_engine: Arc<Mutex<tts::TtsEngine>>,
+    out_tx: &OutTx,
+    cmd: &Command,
+) {
+    let mut tts = tts_engine.lock().await;
+    match tts.generate(&cmd.params).await {
+        Ok(result) => respond_ok(out_tx, &cmd.id, result),
+        Err(e) => respond_err(out_tx, &cmd.id, &e),
+    }
+}
--- a/rust/crates/proxy-engine/src/mixer.rs
+++ b/rust/crates/proxy-engine/src/mixer.rs
@@ -0,0 +1,589 @@
+//! Audio mixer — mix-minus engine for multiparty calls.
+//!
+//! Each Call spawns one mixer task. Legs communicate with the mixer via
+//! tokio mpsc channels — no shared mutable state, no lock contention.
+//!
+//! Internal bus format: 48kHz f32 PCM (960 samples per 20ms frame).
+//! All encoding/decoding happens at leg boundaries. Per-leg inbound denoising at 48kHz.
+//!
+//! The mixer runs a 20ms tick loop:
+//! 1. Drain inbound channels, decode to f32, resample to 48kHz, denoise per-leg
+//! 2. Compute total mix (sum of all **participant** legs' f32 PCM as f64)
+//! 3. For each participant leg: mix-minus = total - own, resample to leg codec rate, encode, send
+//! 4. For each isolated leg: play prompt frame or silence, check DTMF
+//! 5. For each tool leg: send per-source unmerged audio batch
+//! 6. Forward DTMF between participant legs only
+
+use crate::ipc::{emit_event, OutTx};
+use crate::rtp::{build_rtp_header, rtp_clock_increment};
+use codec_lib::{codec_sample_rate, new_denoiser, TranscodeState};
+use nnnoiseless::DenoiseState;
+use std::collections::{HashMap, VecDeque};
+use tokio::sync::{mpsc, oneshot};
+use tokio::task::JoinHandle;
+use tokio::time::{self, Duration, MissedTickBehavior};
+
+/// Mixing sample rate — 48kHz. Opus is native, G.722 needs 3× upsample, G.711 needs 6× upsample.
+/// All processing (denoising, mixing) happens at this rate in f32 for maximum quality.
+const MIX_RATE: u32 = 48000;
+/// Samples per 20ms frame at the mixing rate.
+const MIX_FRAME_SIZE: usize = 960; // 48000 * 0.020
+
+/// A raw RTP payload received from a leg (no RTP header).
+pub struct RtpPacket {
+    pub payload: Vec<u8>,
+    pub payload_type: u8,
+    /// RTP marker bit (first packet of a DTMF event, etc.).
+    pub marker: bool,
+    /// RTP timestamp from the original packet header.
+    pub timestamp: u32,
+}
+
+// ---------------------------------------------------------------------------
+// Leg roles
+// ---------------------------------------------------------------------------
+
+/// What role a leg currently plays in the mixer.
+enum LegRole {
+    /// Normal participant: contributes to mix, receives mix-minus.
+    Participant,
+    /// Temporarily isolated for IVR/consent interaction.
+    Isolated(IsolationState),
+}
+
+struct IsolationState {
+    /// PCM frames at MIX_RATE (960 samples each, 48kHz f32) queued for playback.
+    prompt_frames: VecDeque<Vec<f32>>,
+    /// Digits that complete the interaction (e.g., ['1', '2']).
+    expected_digits: Vec<char>,
+    /// Ticks remaining before timeout (decremented each tick after prompt ends).
+    timeout_ticks_remaining: u32,
+    /// Whether we've finished playing the prompt.
+    prompt_done: bool,
+    /// Channel to send the result back to the command handler.
+    result_tx: Option<oneshot::Sender<InteractionResult>>,
+}
+
+/// Result of a leg interaction (consent prompt, IVR, etc.).
+pub enum InteractionResult {
+    /// The participant pressed one of the expected digits.
+    Digit(char),
+    /// No digit was received within the timeout.
+    Timeout,
+    /// The leg was removed or the call tore down before completion.
+    Cancelled,
+}
+
+// ---------------------------------------------------------------------------
+// Tool legs
+// ---------------------------------------------------------------------------
+
+/// Type of tool leg.
+#[derive(Debug, Clone, Copy)]
+pub enum ToolType {
+    Recording,
+    Transcription,
+}
+
+/// Per-source audio delivered to a tool leg each mixer tick.
+pub struct ToolAudioBatch {
+    pub sources: Vec<ToolAudioSource>,
+}
+
+/// One participant's 20ms audio frame.
+pub struct ToolAudioSource {
+    pub leg_id: String,
+    /// PCM at 48kHz f32, MIX_FRAME_SIZE (960) samples.
+    pub pcm_48k: Vec<f32>,
+}
+
+/// Internal storage for a tool leg inside the mixer.
+struct ToolLegSlot {
+    #[allow(dead_code)]
+    tool_type: ToolType,
+    audio_tx: mpsc::Sender<ToolAudioBatch>,
+}
+
+// ---------------------------------------------------------------------------
+// Commands
+// ---------------------------------------------------------------------------
+
+/// Commands sent to the mixer task via a control channel.
+pub enum MixerCommand {
+    /// Add a new participant leg to the mix.
+    AddLeg {
+        leg_id: String,
+        codec_pt: u8,
+        inbound_rx: mpsc::Receiver<RtpPacket>,
+        outbound_tx: mpsc::Sender<Vec<u8>>,
+    },
+    /// Remove a leg from the mix (channels are dropped, I/O tasks exit).
+    RemoveLeg { leg_id: String },
+    /// Shut down the mixer.
+    Shutdown,
+
+    /// Isolate a leg and start an interaction (consent prompt, IVR).
+    /// The leg is removed from the mix and hears the prompt instead.
+    /// DTMF from the leg is checked against expected_digits.
+    StartInteraction {
+        leg_id: String,
+        /// PCM frames at MIX_RATE (48kHz f32), each 960 samples.
+        prompt_pcm_frames: Vec<Vec<f32>>,
+        expected_digits: Vec<char>,
+        timeout_ms: u32,
+        result_tx: oneshot::Sender<InteractionResult>,
+    },
+    /// Cancel an in-progress interaction (e.g., leg being removed).
+    CancelInteraction { leg_id: String },
+
+    /// Add a tool leg that receives per-source unmerged audio.
+    AddToolLeg {
+        leg_id: String,
+        tool_type: ToolType,
+        audio_tx: mpsc::Sender<ToolAudioBatch>,
+    },
+    /// Remove a tool leg (drops the channel, background task finalizes).
+    RemoveToolLeg { leg_id: String },
+}
+
+// ---------------------------------------------------------------------------
+// Mixer internals
+// ---------------------------------------------------------------------------
+
+/// Internal per-leg state inside the mixer.
+struct MixerLegSlot {
+    codec_pt: u8,
+    transcoder: TranscodeState,
+    /// Per-leg inbound denoiser (48kHz, 480-sample frames).
+    denoiser: Box<DenoiseState<'static>>,
+    inbound_rx: mpsc::Receiver<RtpPacket>,
+    outbound_tx: mpsc::Sender<Vec<u8>>,
+    /// Last decoded+denoised PCM frame at MIX_RATE (960 samples, 48kHz f32).
+    last_pcm_frame: Vec<f32>,
+    /// Number of consecutive ticks with no inbound packet.
+    silent_ticks: u32,
+    // RTP output state.
+    rtp_seq: u16,
+    rtp_ts: u32,
+    rtp_ssrc: u32,
+    /// Current role of this leg in the mixer.
+    role: LegRole,
+}
+
+/// Spawn the mixer task for a call. Returns the command sender and task handle.
+pub fn spawn_mixer(
+    call_id: String,
+    out_tx: OutTx,
+) -> (mpsc::Sender<MixerCommand>, JoinHandle<()>) {
+    let (cmd_tx, cmd_rx) = mpsc::channel::<MixerCommand>(32);
+
+    let handle = tokio::spawn(async move {
+        mixer_loop(call_id, cmd_rx, out_tx).await;
+    });
+
+    (cmd_tx, handle)
+}
+
+/// The 20ms mixing loop.
+async fn mixer_loop(
+    call_id: String,
+    mut cmd_rx: mpsc::Receiver<MixerCommand>,
+    out_tx: OutTx,
+) {
+    let mut legs: HashMap<String, MixerLegSlot> = HashMap::new();
+    let mut tool_legs: HashMap<String, ToolLegSlot> = HashMap::new();
+    let mut interval = time::interval(Duration::from_millis(20));
+    interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
+
+    loop {
+        interval.tick().await;
+
+        // ── 1. Process control commands (non-blocking). ─────────────
+        loop {
+            match cmd_rx.try_recv() {
+                Ok(MixerCommand::AddLeg {
+                    leg_id,
+                    codec_pt,
+                    inbound_rx,
+                    outbound_tx,
+                }) => {
+                    let transcoder = match TranscodeState::new() {
+                        Ok(t) => t,
+                        Err(e) => {
+                            emit_event(
+                                &out_tx,
+                                "mixer_error",
+                                serde_json::json!({
+                                    "call_id": call_id,
+                                    "leg_id": leg_id,
+                                    "error": format!("codec init: {e}"),
+                                }),
+                            );
+                            continue;
+                        }
+                    };
+                    legs.insert(
+                        leg_id,
+                        MixerLegSlot {
+                            codec_pt,
+                            transcoder,
+                            denoiser: new_denoiser(),
+                            inbound_rx,
+                            outbound_tx,
+                            last_pcm_frame: vec![0.0f32; MIX_FRAME_SIZE],
+                            silent_ticks: 0,
+                            rtp_seq: 0,
+                            rtp_ts: 0,
+                            rtp_ssrc: rand::random(),
+                            role: LegRole::Participant,
+                        },
+                    );
+                }
+                Ok(MixerCommand::RemoveLeg { leg_id }) => {
+                    // If the leg is isolated, send Cancelled before dropping.
+                    if let Some(slot) = legs.get_mut(&leg_id) {
+                        if let LegRole::Isolated(ref mut state) = slot.role {
+                            if let Some(tx) = state.result_tx.take() {
+                                let _ = tx.send(InteractionResult::Cancelled);
+                            }
+                        }
+                    }
+                    legs.remove(&leg_id);
+                    // Channels drop → I/O tasks exit cleanly.
+                }
+                Ok(MixerCommand::Shutdown) => {
+                    // Cancel all outstanding interactions before shutting down.
+                    for slot in legs.values_mut() {
+                        if let LegRole::Isolated(ref mut state) = slot.role {
+                            if let Some(tx) = state.result_tx.take() {
+                                let _ = tx.send(InteractionResult::Cancelled);
+                            }
+                        }
+                    }
+                    return;
+                }
+                Ok(MixerCommand::StartInteraction {
+                    leg_id,
+                    prompt_pcm_frames,
+                    expected_digits,
+                    timeout_ms,
+                    result_tx,
+                }) => {
+                    if let Some(slot) = legs.get_mut(&leg_id) {
+                        // Cancel any existing interaction first.
+                        if let LegRole::Isolated(ref mut old_state) = slot.role {
+                            if let Some(tx) = old_state.result_tx.take() {
+                                let _ = tx.send(InteractionResult::Cancelled);
+                            }
+                        }
+                        let timeout_ticks = timeout_ms / 20;
+                        slot.role = LegRole::Isolated(IsolationState {
+                            prompt_frames: VecDeque::from(prompt_pcm_frames),
+                            expected_digits,
+                            timeout_ticks_remaining: timeout_ticks,
+                            prompt_done: false,
+                            result_tx: Some(result_tx),
+                        });
+                    } else {
+                        // Leg not found — immediately cancel.
+                        let _ = result_tx.send(InteractionResult::Cancelled);
+                    }
+                }
+                Ok(MixerCommand::CancelInteraction { leg_id }) => {
+                    if let Some(slot) = legs.get_mut(&leg_id) {
+                        if let LegRole::Isolated(ref mut state) = slot.role {
+                            if let Some(tx) = state.result_tx.take() {
+                                let _ = tx.send(InteractionResult::Cancelled);
+                            }
+                        }
+                        slot.role = LegRole::Participant;
+                    }
+                }
+                Ok(MixerCommand::AddToolLeg {
+                    leg_id,
+                    tool_type,
+                    audio_tx,
+                }) => {
+                    tool_legs.insert(leg_id, ToolLegSlot { tool_type, audio_tx });
+                }
+                Ok(MixerCommand::RemoveToolLeg { leg_id }) => {
+                    tool_legs.remove(&leg_id);
+                    // Dropping the ToolLegSlot drops audio_tx → background task sees channel close.
+                }
+                Err(mpsc::error::TryRecvError::Empty) => break,
+                Err(mpsc::error::TryRecvError::Disconnected) => return,
+            }
+        }
+
+        if legs.is_empty() && tool_legs.is_empty() {
+            continue;
+        }
+
+        // ── 2. Drain inbound packets, decode to 16kHz PCM. ─────────
+        //    DTMF (PT 101) packets are collected separately.
+        let leg_ids: Vec<String> = legs.keys().cloned().collect();
+        let mut dtmf_forward: Vec<(String, RtpPacket)> = Vec::new();
+
+        for lid in &leg_ids {
+            let slot = legs.get_mut(lid).unwrap();
+
+            // Drain channel — collect DTMF packets separately, keep latest audio.
+            let mut latest_audio: Option<RtpPacket> = None;
+            loop {
+                match slot.inbound_rx.try_recv() {
+                    Ok(pkt) => {
+                        if pkt.payload_type == 101 {
+                            // DTMF telephone-event: collect for processing.
+                            dtmf_forward.push((lid.clone(), pkt));
+                        } else {
+                            latest_audio = Some(pkt);
+                        }
+                    }
+                    Err(_) => break,
+                }
+            }
+
+            if let Some(pkt) = latest_audio {
+                slot.silent_ticks = 0;
+                match slot.transcoder.decode_to_f32(&pkt.payload, pkt.payload_type) {
+                    Ok((pcm, rate)) => {
+                        // Resample to 48kHz mixing rate if needed.
+                        let pcm_48k = if rate == MIX_RATE {
+                            pcm
+                        } else {
+                            slot.transcoder
+                                .resample_f32(&pcm, rate, MIX_RATE)
+                                .unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
+                        };
+                        // Per-leg inbound denoising at 48kHz.
+                        let denoised = TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k);
+                        // Pad or truncate to exactly MIX_FRAME_SIZE.
+                        let mut frame = denoised;
+                        frame.resize(MIX_FRAME_SIZE, 0.0);
+                        slot.last_pcm_frame = frame;
+                    }
+                    Err(_) => {
+                        // Decode failed — use silence.
+                        slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
+                    }
+                }
+            } else if dtmf_forward.iter().any(|(src, _)| src == lid) {
+                // Got DTMF but no audio — don't bump silent_ticks (DTMF counts as activity).
+                slot.silent_ticks = 0;
+            } else {
+                slot.silent_ticks += 1;
+                // After 150 ticks (3 seconds) of silence, zero out to avoid stale audio.
+                if slot.silent_ticks > 150 {
+                    slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
+                }
+            }
+        }
+
+        // ── 3. Compute total mix from PARTICIPANT legs only. ────────
+        //    Accumulate as f64 to prevent precision loss when summing f32.
+        let mut total_mix = vec![0.0f64; MIX_FRAME_SIZE];
+        for slot in legs.values() {
+            if matches!(slot.role, LegRole::Participant) {
+                for (i, &s) in slot.last_pcm_frame.iter().enumerate().take(MIX_FRAME_SIZE) {
+                    total_mix[i] += s as f64;
+                }
+            }
+        }
+
+        // ── 4. Per-leg output. ──────────────────────────────────────
+        // Collect interaction completions to apply after the loop
+        // (can't mutate role while iterating mutably for encode).
+        let mut completed_interactions: Vec<(String, InteractionResult)> = Vec::new();
+
+        for (lid, slot) in legs.iter_mut() {
+            match &mut slot.role {
+                LegRole::Participant => {
+                    // Mix-minus: total minus this leg's own contribution, clamped to [-1.0, 1.0].
+                    let mut mix_minus = Vec::with_capacity(MIX_FRAME_SIZE);
+                    for i in 0..MIX_FRAME_SIZE {
+                        let sample =
+                            (total_mix[i] - slot.last_pcm_frame[i] as f64) as f32;
+                        mix_minus.push(sample.clamp(-1.0, 1.0));
+                    }
+
+                    // Resample from 48kHz to the leg's codec native rate.
+                    let target_rate = codec_sample_rate(slot.codec_pt);
+                    let resampled = if target_rate == MIX_RATE {
+                        mix_minus
+                    } else {
+                        slot.transcoder
+                            .resample_f32(&mix_minus, MIX_RATE, target_rate)
+                            .unwrap_or_default()
+                    };
+
+                    // Encode to the leg's codec (f32 → i16 → codec inside encode_from_f32).
+                    let encoded =
+                        match slot.transcoder.encode_from_f32(&resampled, slot.codec_pt) {
+                            Ok(e) if !e.is_empty() => e,
+                            _ => continue,
+                        };
+
+                    // Build RTP packet with header.
+                    let header =
+                        build_rtp_header(slot.codec_pt, slot.rtp_seq, slot.rtp_ts, slot.rtp_ssrc);
+                    let mut rtp = header.to_vec();
+                    rtp.extend_from_slice(&encoded);
+
+                    slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
+                    slot.rtp_ts = slot.rtp_ts.wrapping_add(rtp_clock_increment(slot.codec_pt));
+
+                    // Non-blocking send — drop frame if channel is full.
+                    let _ = slot.outbound_tx.try_send(rtp);
+                }
+                LegRole::Isolated(state) => {
+                    // Check for DTMF digit from this leg.
+                    let mut matched_digit: Option<char> = None;
+                    for (src_lid, dtmf_pkt) in &dtmf_forward {
+                        if src_lid == lid && dtmf_pkt.payload.len() >= 4 {
+                            let event_id = dtmf_pkt.payload[0];
+                            let end_bit = (dtmf_pkt.payload[1] & 0x80) != 0;
+                            if end_bit {
+                                const EVENT_CHARS: &[char] = &[
+                                    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '*', '#',
+                                    'A', 'B', 'C', 'D',
+                                ];
+                                if let Some(&ch) = EVENT_CHARS.get(event_id as usize) {
+                                    if state.expected_digits.contains(&ch) {
+                                        matched_digit = Some(ch);
+                                        break;
+                                    }
+                                }
+                            }
+                        }
+                    }
+
+                    if let Some(digit) = matched_digit {
+                        // Interaction complete — digit matched.
+                        completed_interactions
+                            .push((lid.clone(), InteractionResult::Digit(digit)));
+                    } else {
+                        // Play prompt frame or silence.
+                        let pcm_frame = if let Some(frame) = state.prompt_frames.pop_front() {
+                            frame
+                        } else {
+                            state.prompt_done = true;
+                            vec![0.0f32; MIX_FRAME_SIZE]
+                        };
+
+                        // Encode prompt frame to the leg's codec.
+                        let target_rate = codec_sample_rate(slot.codec_pt);
+                        let resampled = if target_rate == MIX_RATE {
+                            pcm_frame
+                        } else {
+                            slot.transcoder
+                                .resample_f32(&pcm_frame, MIX_RATE, target_rate)
+                                .unwrap_or_default()
+                        };
+
+                        if let Ok(encoded) =
+                            slot.transcoder.encode_from_f32(&resampled, slot.codec_pt)
+                        {
+                            if !encoded.is_empty() {
+                                let header = build_rtp_header(
+                                    slot.codec_pt,
+                                    slot.rtp_seq,
+                                    slot.rtp_ts,
+                                    slot.rtp_ssrc,
+                                );
+                                let mut rtp = header.to_vec();
+                                rtp.extend_from_slice(&encoded);
+                                slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
+                                slot.rtp_ts = slot
+                                    .rtp_ts
+                                    .wrapping_add(rtp_clock_increment(slot.codec_pt));
+                                let _ = slot.outbound_tx.try_send(rtp);
+                            }
+                        }
+
+                        // Check timeout (only after prompt finishes).
+                        if state.prompt_done {
+                            if state.timeout_ticks_remaining == 0 {
+                                completed_interactions
+                                    .push((lid.clone(), InteractionResult::Timeout));
+                            } else {
+                                state.timeout_ticks_remaining -= 1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Apply completed interactions — revert legs to Participant.
+        for (lid, result) in completed_interactions {
+            if let Some(slot) = legs.get_mut(&lid) {
+                if let LegRole::Isolated(ref mut state) = slot.role {
+                    if let Some(tx) = state.result_tx.take() {
+                        let _ = tx.send(result);
+                    }
+                }
+                slot.role = LegRole::Participant;
+            }
+        }
+
+        // ── 5. Distribute per-source audio to tool legs. ────────────
+        if !tool_legs.is_empty() {
+            // Collect participant PCM frames (computed in step 2).
+            let sources: Vec<ToolAudioSource> = legs
+                .iter()
+                .filter(|(_, s)| matches!(s.role, LegRole::Participant))
+                .map(|(lid, s)| ToolAudioSource {
+                    leg_id: lid.clone(),
+                    pcm_48k: s.last_pcm_frame.clone(),
+                })
+                .collect();
+
+            for tool in tool_legs.values() {
+                let batch = ToolAudioBatch {
+                    sources: sources
+                        .iter()
+                        .map(|s| ToolAudioSource {
+                            leg_id: s.leg_id.clone(),
+                            pcm_48k: s.pcm_48k.clone(),
+                        })
+                        .collect(),
+                };
+                // Non-blocking send — drop batch if tool can't keep up.
+                let _ = tool.audio_tx.try_send(batch);
+            }
+        }
+
+        // ── 6. Forward DTMF packets between participant legs only. ──
+        for (source_lid, dtmf_pkt) in &dtmf_forward {
+            // Skip if the source is an isolated leg (its DTMF was handled in step 4).
+            if let Some(src_slot) = legs.get(source_lid) {
+                if matches!(src_slot.role, LegRole::Isolated(_)) {
+                    continue;
+                }
+            }
+            for (target_lid, target_slot) in legs.iter_mut() {
+                if target_lid == source_lid {
+                    continue; // Don't echo DTMF back to sender.
+                }
+                // Don't forward to isolated legs.
+                if matches!(target_slot.role, LegRole::Isolated(_)) {
+                    continue;
+                }
+                let mut header = build_rtp_header(
+                    101,
+                    target_slot.rtp_seq,
+                    target_slot.rtp_ts,
+                    target_slot.rtp_ssrc,
+                );
+                if dtmf_pkt.marker {
+                    header[1] |= 0x80; // Set marker bit.
+                }
+                let mut rtp_out = header.to_vec();
+                rtp_out.extend_from_slice(&dtmf_pkt.payload);
+                target_slot.rtp_seq = target_slot.rtp_seq.wrapping_add(1);
+                // Don't increment rtp_ts for DTMF — it shares timestamp context with audio.
+                let _ = target_slot.outbound_tx.try_send(rtp_out);
+            }
+        }
+    }
+}
--- a/rust/crates/proxy-engine/src/provider.rs
+++ b/rust/crates/proxy-engine/src/provider.rs
@@ -321,6 +321,17 @@ impl ProviderManager {
        None
    }

+    /// Find a provider by its config ID (e.g. "easybell").
+    pub async fn find_by_provider_id(&self, provider_id: &str) -> Option<Arc<Mutex<ProviderState>>> {
+        for ps_arc in &self.providers {
+            let ps = ps_arc.lock().await;
+            if ps.config.id == provider_id {
+                return Some(ps_arc.clone());
+            }
+        }
+        None
+    }
+
    /// Check if a provider is currently registered.
    pub async fn is_registered(&self, provider_id: &str) -> bool {
        for ps_arc in &self.providers {
--- a/rust/crates/proxy-engine/src/recorder.rs
+++ b/rust/crates/proxy-engine/src/recorder.rs
@@ -55,6 +55,56 @@ impl Recorder {
        })
    }

+    /// Create a recorder that writes raw PCM at a given sample rate.
+    /// Used by tool legs that already have decoded PCM (no RTP processing needed).
+    pub fn new_pcm(file_path: &str, sample_rate: u32, max_duration_ms: Option<u64>) -> Result<Self, String> {
+        if let Some(parent) = Path::new(file_path).parent() {
+            std::fs::create_dir_all(parent)
+                .map_err(|e| format!("create dir: {e}"))?;
+        }
+
+        let spec = hound::WavSpec {
+            channels: 1,
+            sample_rate,
+            bits_per_sample: 16,
+            sample_format: hound::SampleFormat::Int,
+        };
+
+        let writer = hound::WavWriter::create(file_path, spec)
+            .map_err(|e| format!("create WAV {file_path}: {e}"))?;
+
+        // source_pt is unused for PCM recording; set to 0.
+        let transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
+        let max_samples = max_duration_ms.map(|ms| (sample_rate as u64 * ms) / 1000);
+
+        Ok(Self {
+            writer,
+            transcoder,
+            source_pt: 0,
+            total_samples: 0,
+            sample_rate,
+            max_samples,
+            file_path: file_path.to_string(),
+        })
+    }
+
+    /// Write raw PCM samples directly (no RTP decoding).
+    /// Returns true if recording should continue, false if max duration reached.
+    pub fn write_pcm(&mut self, samples: &[i16]) -> bool {
+        for &sample in samples {
+            if self.writer.write_sample(sample).is_err() {
+                return false;
+            }
+            self.total_samples += 1;
+            if let Some(max) = self.max_samples {
+                if self.total_samples >= max {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+
    /// Process an incoming RTP packet (full packet with header).
    /// Returns true if recording should continue, false if max duration reached.
    pub fn process_rtp(&mut self, data: &[u8]) -> bool {
--- a/rust/crates/proxy-engine/src/sip_leg.rs
+++ b/rust/crates/proxy-engine/src/sip_leg.rs
@@ -0,0 +1,475 @@
+//! SipLeg — manages one side of a B2BUA call.
+//!
+//! Handles the full INVITE lifecycle:
+//! - Send INVITE with SDP
+//! - Handle 407 Proxy Authentication (digest auth retry)
+//! - Handle 200 OK (ACK, learn media endpoint)
+//! - Handle BYE/CANCEL (teardown)
+//! - Track SIP dialog state (early → confirmed → terminated)
+//!
+//! Ported from ts/call/sip-leg.ts.
+
+use sip_proto::dialog::{DialogState, SipDialog};
+use sip_proto::helpers::{
+    build_sdp, compute_digest_auth, generate_branch, generate_tag, parse_digest_challenge,
+    parse_sdp_endpoint, SdpOptions,
+};
+use sip_proto::message::{RequestOptions, SipMessage};
+use std::net::SocketAddr;
+use std::sync::Arc;
+use tokio::net::UdpSocket;
+
+/// State of a SIP leg.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum LegState {
+    Inviting,
+    Ringing,
+    Connected,
+    Terminating,
+    Terminated,
+}
+
+/// Configuration for creating a SIP leg.
+pub struct SipLegConfig {
+    /// Proxy LAN IP (for Via, Contact, SDP).
+    pub lan_ip: String,
+    /// Proxy LAN port.
+    pub lan_port: u16,
+    /// Public IP (for provider-facing legs).
+    pub public_ip: Option<String>,
+    /// SIP target endpoint (provider outbound proxy or device address).
+    pub sip_target: SocketAddr,
+    /// Provider credentials (for 407 auth).
+    pub username: Option<String>,
+    pub password: Option<String>,
+    pub registered_aor: Option<String>,
+    /// Codec payload types to offer.
+    pub codecs: Vec<u8>,
+    /// Our RTP port for SDP.
+    pub rtp_port: u16,
+}
+
+/// A SIP leg with full dialog management.
+pub struct SipLeg {
+    pub id: String,
+    pub state: LegState,
+    pub config: SipLegConfig,
+    pub dialog: Option<SipDialog>,
+
+    /// The INVITE we sent (needed for CANCEL and 407 ACK).
+    invite: Option<SipMessage>,
+    /// Original unauthenticated INVITE (for re-ACKing retransmitted 407s).
+    orig_invite: Option<SipMessage>,
+    /// Whether we've attempted digest auth.
+    auth_attempted: bool,
+
+    /// Remote media endpoint (learned from SDP in 200 OK).
+    pub remote_media: Option<SocketAddr>,
+}
+
+impl SipLeg {
+    pub fn new(id: String, config: SipLegConfig) -> Self {
+        Self {
+            id,
+            state: LegState::Inviting,
+            config,
+            dialog: None,
+            invite: None,
+            orig_invite: None,
+            auth_attempted: false,
+            remote_media: None,
+        }
+    }
+
+    /// Build and send an INVITE to establish this leg.
+    pub async fn send_invite(
+        &mut self,
+        from_uri: &str,
+        to_uri: &str,
+        sip_call_id: &str,
+        socket: &UdpSocket,
+    ) {
+        let ip = self
+            .config
+            .public_ip
+            .as_deref()
+            .unwrap_or(&self.config.lan_ip);
+
+        let sdp = build_sdp(&SdpOptions {
+            ip,
+            port: self.config.rtp_port,
+            payload_types: &self.config.codecs,
+            ..Default::default()
+        });
+
+        let invite = SipMessage::create_request(
+            "INVITE",
+            to_uri,
+            RequestOptions {
+                via_host: ip.to_string(),
+                via_port: self.config.lan_port,
+                via_transport: None,
+                via_branch: Some(generate_branch()),
+                from_uri: from_uri.to_string(),
+                from_display_name: None,
+                from_tag: Some(generate_tag()),
+                to_uri: to_uri.to_string(),
+                to_display_name: None,
+                to_tag: None,
+                call_id: Some(sip_call_id.to_string()),
+                cseq: Some(1),
+                contact: Some(format!("<sip:{ip}:{}>", self.config.lan_port)),
+                max_forwards: Some(70),
+                body: Some(sdp),
+                content_type: Some("application/sdp".to_string()),
+                extra_headers: Some(vec![
+                    ("User-Agent".to_string(), "SipRouter/1.0".to_string()),
+                ]),
+            },
+        );
+
+        self.dialog = Some(SipDialog::from_uac_invite(&invite, ip, self.config.lan_port));
+        self.invite = Some(invite.clone());
+        self.state = LegState::Inviting;
+
+        let _ = socket.send_to(&invite.serialize(), self.config.sip_target).await;
+    }
+
+    /// Handle an incoming SIP message routed to this leg.
+    /// Returns an optional reply to send (e.g. ACK, auth retry INVITE).
+    pub fn handle_message(&mut self, msg: &SipMessage) -> SipLegAction {
+        if msg.is_response() {
+            self.handle_response(msg)
+        } else {
+            self.handle_request(msg)
+        }
+    }
+
+    fn handle_response(&mut self, msg: &SipMessage) -> SipLegAction {
+        let code = msg.status_code().unwrap_or(0);
+        let cseq_method = msg.cseq_method().unwrap_or("").to_uppercase();
+
+        if cseq_method != "INVITE" {
+            return SipLegAction::None;
+        }
+
+        // Handle retransmitted 407 for the original unauthenticated INVITE.
+        if self.auth_attempted {
+            if let Some(dialog) = &self.dialog {
+                let response_cseq: u32 = msg
+                    .get_header("CSeq")
+                    .and_then(|s| s.split_whitespace().next())
+                    .and_then(|s| s.parse().ok())
+                    .unwrap_or(0);
+                if response_cseq < dialog.local_cseq && code >= 400 {
+                    // ACK the retransmitted error response.
+                    if let Some(orig) = &self.orig_invite {
+                        let ack = build_non_2xx_ack(orig, msg);
+                        return SipLegAction::Send(ack.serialize());
+                    }
+                    return SipLegAction::None;
+                }
+            }
+        }
+
+        // Handle 407 Proxy Authentication Required.
+        if code == 407 {
+            return self.handle_auth_challenge(msg);
+        }
+
+        // Update dialog state.
+        if let Some(dialog) = &mut self.dialog {
+            dialog.process_response(msg);
+        }
+
+        if code == 180 || code == 183 {
+            self.state = LegState::Ringing;
+            SipLegAction::StateChange(LegState::Ringing)
+        } else if code >= 200 && code < 300 {
+            // ACK the 200 OK.
+            let ack_buf = if let Some(dialog) = &self.dialog {
+                let ack = dialog.create_ack();
+                Some(ack.serialize())
+            } else {
+                None
+            };
+
+            // If already connected (200 retransmit), just re-ACK.
+            if self.state == LegState::Connected {
+                return match ack_buf {
+                    Some(buf) => SipLegAction::Send(buf),
+                    None => SipLegAction::None,
+                };
+            }
+
+            // Learn media endpoint from SDP.
+            if msg.has_sdp_body() {
+                if let Some(ep) = parse_sdp_endpoint(&msg.body) {
+                    if let Ok(addr) = format!("{}:{}", ep.address, ep.port).parse() {
+                        self.remote_media = Some(addr);
+                    }
+                }
+            }
+
+            self.state = LegState::Connected;
+
+            match ack_buf {
+                Some(buf) => SipLegAction::ConnectedWithAck(buf),
+                None => SipLegAction::StateChange(LegState::Connected),
+            }
+        } else if code >= 300 {
+            self.state = LegState::Terminated;
+            if let Some(dialog) = &mut self.dialog {
+                dialog.terminate();
+            }
+            SipLegAction::Terminated(format!("rejected_{code}"))
+        } else {
+            SipLegAction::None // 1xx provisional
+        }
+    }
+
+    fn handle_auth_challenge(&mut self, msg: &SipMessage) -> SipLegAction {
+        if self.auth_attempted {
+            self.state = LegState::Terminated;
+            if let Some(dialog) = &mut self.dialog {
+                dialog.terminate();
+            }
+            return SipLegAction::Terminated("auth_rejected".to_string());
+        }
+        self.auth_attempted = true;
+
+        let challenge_header = match msg.get_header("Proxy-Authenticate") {
+            Some(h) => h,
+            None => {
+                self.state = LegState::Terminated;
+                return SipLegAction::Terminated("407_no_challenge".to_string());
+            }
+        };
+
+        let challenge = match parse_digest_challenge(challenge_header) {
+            Some(c) => c,
+            None => {
+                self.state = LegState::Terminated;
+                return SipLegAction::Terminated("407_bad_challenge".to_string());
+            }
+        };
+
+        let password = match &self.config.password {
+            Some(p) => p.clone(),
+            None => {
+                self.state = LegState::Terminated;
+                return SipLegAction::Terminated("407_no_password".to_string());
+            }
+        };
+
+        let aor = match &self.config.registered_aor {
+            Some(a) => a.clone(),
+            None => {
+                self.state = LegState::Terminated;
+                return SipLegAction::Terminated("407_no_aor".to_string());
+            }
+        };
+
+        let username = aor
+            .trim_start_matches("sip:")
+            .trim_start_matches("sips:")
+            .split('@')
+            .next()
+            .unwrap_or("")
+            .to_string();
+
+        let dest_uri = self
+            .invite
+            .as_ref()
+            .and_then(|i| i.request_uri())
+            .unwrap_or("")
+            .to_string();
+
+        let auth_value = compute_digest_auth(
+            &username,
+            &password,
+            &challenge.realm,
+            &challenge.nonce,
+            "INVITE",
+            &dest_uri,
+            challenge.algorithm.as_deref(),
+            challenge.opaque.as_deref(),
+        );
+
+        // ACK the 407.
+        let mut ack_buf = None;
+        if let Some(invite) = &self.invite {
+            let ack = build_non_2xx_ack(invite, msg);
+            ack_buf = Some(ack.serialize());
+        }
+
+        // Save original INVITE for retransmission handling.
+        self.orig_invite = self.invite.clone();
+
+        // Build authenticated INVITE with same From tag, CSeq=2.
+        let ip = self
+            .config
+            .public_ip
+            .as_deref()
+            .unwrap_or(&self.config.lan_ip);
+        let from_tag = self
+            .dialog
+            .as_ref()
+            .map(|d| d.local_tag.clone())
+            .unwrap_or_else(generate_tag);
+
+        let sdp = build_sdp(&SdpOptions {
+            ip,
+            port: self.config.rtp_port,
+            payload_types: &self.config.codecs,
+            ..Default::default()
+        });
+
+        let call_id = self
+            .dialog
+            .as_ref()
+            .map(|d| d.call_id.clone())
+            .unwrap_or_default();
+
+        let invite_auth = SipMessage::create_request(
+            "INVITE",
+            &dest_uri,
+            RequestOptions {
+                via_host: ip.to_string(),
+                via_port: self.config.lan_port,
+                via_transport: None,
+                via_branch: Some(generate_branch()),
+                from_uri: aor,
+                from_display_name: None,
+                from_tag: Some(from_tag),
+                to_uri: dest_uri.clone(),
+                to_display_name: None,
+                to_tag: None,
+                call_id: Some(call_id),
+                cseq: Some(2),
+                contact: Some(format!("<sip:{ip}:{}>", self.config.lan_port)),
+                max_forwards: Some(70),
+                body: Some(sdp),
+                content_type: Some("application/sdp".to_string()),
+                extra_headers: Some(vec![
+                    ("Proxy-Authorization".to_string(), auth_value),
+                    ("User-Agent".to_string(), "SipRouter/1.0".to_string()),
+                ]),
+            },
+        );
+
+        self.invite = Some(invite_auth.clone());
+        if let Some(dialog) = &mut self.dialog {
+            dialog.local_cseq = 2;
+        }
+
+        // Return both the ACK for the 407 and the new authenticated INVITE.
+        let invite_buf = invite_auth.serialize();
+        SipLegAction::AuthRetry {
+            ack_407: ack_buf,
+            invite_with_auth: invite_buf,
+        }
+    }
+
+    fn handle_request(&mut self, msg: &SipMessage) -> SipLegAction {
+        let method = msg.method().unwrap_or("");
+
+        if method == "BYE" {
+            let ok = SipMessage::create_response(200, "OK", msg, None);
+            self.state = LegState::Terminated;
+            if let Some(dialog) = &mut self.dialog {
+                dialog.terminate();
+            }
+            return SipLegAction::SendAndTerminate(ok.serialize(), "bye".to_string());
+        }
+
+        if method == "INFO" {
+            let ok = SipMessage::create_response(200, "OK", msg, None);
+            return SipLegAction::Send(ok.serialize());
+        }
+
+        SipLegAction::None
+    }
+
+    /// Build a BYE or CANCEL to tear down this leg.
+    pub fn build_hangup(&mut self) -> Option<Vec<u8>> {
+        let dialog = self.dialog.as_mut()?;
+
+        let msg = if dialog.state == DialogState::Confirmed {
+            dialog.create_request("BYE", None, None, None)
+        } else if dialog.state == DialogState::Early {
+            if let Some(invite) = &self.invite {
+                dialog.create_cancel(invite)
+            } else {
+                return None;
+            }
+        } else {
+            return None;
+        };
+
+        self.state = LegState::Terminating;
+        dialog.terminate();
+        Some(msg.serialize())
+    }
+
+    /// Get the SIP Call-ID for routing.
+    pub fn sip_call_id(&self) -> Option<&str> {
+        self.dialog.as_ref().map(|d| d.call_id.as_str())
+    }
+}
+
+/// Actions produced by the SipLeg message handler.
+pub enum SipLegAction {
+    /// No action needed.
+    None,
+    /// Send a SIP message (ACK, 200 OK to INFO, etc.).
+    Send(Vec<u8>),
+    /// Leg state changed.
+    StateChange(LegState),
+    /// Connected — send this ACK.
+    ConnectedWithAck(Vec<u8>),
+    /// Terminated with a reason.
+    Terminated(String),
+    /// Send 200 OK and terminate.
+    SendAndTerminate(Vec<u8>, String),
+    /// 407 auth retry — send ACK for 407, then send new INVITE with auth.
+    AuthRetry {
+        ack_407: Option<Vec<u8>>,
+        invite_with_auth: Vec<u8>,
+    },
+}
+
+/// Build an ACK for a non-2xx response (same transaction as the INVITE).
+fn build_non_2xx_ack(original_invite: &SipMessage, response: &SipMessage) -> SipMessage {
+    let via = original_invite.get_header("Via").unwrap_or("").to_string();
+    let from = original_invite
+        .get_header("From")
+        .unwrap_or("")
+        .to_string();
+    let to = response.get_header("To").unwrap_or("").to_string();
+    let call_id = original_invite.call_id().to_string();
+    let cseq_num: u32 = original_invite
+        .get_header("CSeq")
+        .and_then(|s| s.split_whitespace().next())
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(1);
+
+    let ruri = original_invite
+        .request_uri()
+        .unwrap_or("sip:unknown")
+        .to_string();
+
+    SipMessage::new(
+        format!("ACK {ruri} SIP/2.0"),
+        vec![
+            ("Via".to_string(), via),
+            ("From".to_string(), from),
+            ("To".to_string(), to),
+            ("Call-ID".to_string(), call_id),
+            ("CSeq".to_string(), format!("{cseq_num} ACK")),
+            ("Max-Forwards".to_string(), "70".to_string()),
+            ("Content-Length".to_string(), "0".to_string()),
+        ],
+        String::new(),
+    )
+}
--- a/rust/crates/proxy-engine/src/tool_leg.rs
+++ b/rust/crates/proxy-engine/src/tool_leg.rs
@@ -0,0 +1,143 @@
+//! Tool leg consumers — background tasks that process per-source unmerged audio.
+//!
+//! Tool legs are observer legs that receive individual audio streams from each
+//! participant in a call. The mixer pipes `ToolAudioBatch` every 20ms containing
+//! each participant's decoded PCM@48kHz f32 tagged with source leg ID.
+//!
+//! Consumers:
+//! - **Recording**: writes per-source WAV files for speaker-separated recording.
+//! - **Transcription**: stub for future Whisper integration (accumulates audio in Rust).
+
+use crate::ipc::{emit_event, OutTx};
+use crate::mixer::ToolAudioBatch;
+use crate::recorder::Recorder;
+use std::collections::HashMap;
+use tokio::sync::mpsc;
+use tokio::task::JoinHandle;
+
+// ---------------------------------------------------------------------------
+// Recording consumer
+// ---------------------------------------------------------------------------
+
+/// Spawn a recording tool leg that writes per-source WAV files.
+///
+/// Returns the channel sender (for the mixer to send batches) and the task handle.
+/// When the channel is closed (tool leg removed), all WAV files are finalized
+/// and a `tool_recording_done` event is emitted.
+pub fn spawn_recording_tool(
+    tool_leg_id: String,
+    call_id: String,
+    base_dir: String,
+    out_tx: OutTx,
+) -> (mpsc::Sender<ToolAudioBatch>, JoinHandle<()>) {
+    let (tx, mut rx) = mpsc::channel::<ToolAudioBatch>(64);
+
+    let handle = tokio::spawn(async move {
+        let mut recorders: HashMap<String, Recorder> = HashMap::new();
+
+        while let Some(batch) = rx.recv().await {
+            for source in &batch.sources {
+                // Skip silence-only frames (near-zero = no audio activity).
+                let has_audio = source.pcm_48k.iter().any(|&s| s.abs() > 1e-6);
+                if !has_audio && !recorders.contains_key(&source.leg_id) {
+                    continue; // Don't create a file for silence-only sources.
+                }
+
+                let recorder = recorders.entry(source.leg_id.clone()).or_insert_with(|| {
+                    let path = format!("{}/{}-{}.wav", base_dir, call_id, source.leg_id);
+                    Recorder::new_pcm(&path, 48000, None).unwrap_or_else(|e| {
+                        panic!("failed to create recorder for {}: {e}", source.leg_id);
+                    })
+                });
+
+                // Convert f32 [-1.0, 1.0] to i16 for WAV writing.
+                let pcm_i16: Vec<i16> = source.pcm_48k
+                    .iter()
+                    .map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
+                    .collect();
+                if !recorder.write_pcm(&pcm_i16) {
+                    // Max duration reached — stop recording this source.
+                    break;
+                }
+            }
+        }
+
+        // Channel closed — finalize all recordings.
+        let mut files = Vec::new();
+        for (leg_id, rec) in recorders {
+            let result = rec.stop();
+            files.push(serde_json::json!({
+                "source_leg_id": leg_id,
+                "file_path": result.file_path,
+                "duration_ms": result.duration_ms,
+            }));
+        }
+
+        emit_event(
+            &out_tx,
+            "tool_recording_done",
+            serde_json::json!({
+                "call_id": call_id,
+                "tool_leg_id": tool_leg_id,
+                "files": files,
+            }),
+        );
+    });
+
+    (tx, handle)
+}
+
+// ---------------------------------------------------------------------------
+// Transcription consumer (stub — real plumbing, stub consumer)
+// ---------------------------------------------------------------------------
+
+/// Spawn a transcription tool leg.
+///
+/// The plumbing is fully real: it receives per-source unmerged PCM@48kHz f32 from
+/// the mixer every 20ms. The consumer is a stub that accumulates audio and
+/// reports metadata on close. Future: will stream to a Whisper HTTP endpoint.
+pub fn spawn_transcription_tool(
+    tool_leg_id: String,
+    call_id: String,
+    out_tx: OutTx,
+) -> (mpsc::Sender<ToolAudioBatch>, JoinHandle<()>) {
+    let (tx, mut rx) = mpsc::channel::<ToolAudioBatch>(64);
+
+    let handle = tokio::spawn(async move {
+        // Track per-source sample counts for duration reporting.
+        let mut source_samples: HashMap<String, u64> = HashMap::new();
+
+        while let Some(batch) = rx.recv().await {
+            for source in &batch.sources {
+                *source_samples.entry(source.leg_id.clone()).or_insert(0) +=
+                    source.pcm_48k.len() as u64;
+
+                // TODO: Future — accumulate chunks and stream to Whisper endpoint.
+                // For now, the audio is received and counted but not processed.
+            }
+        }
+
+        // Channel closed — report metadata.
+        let sources: Vec<serde_json::Value> = source_samples
+            .iter()
+            .map(|(leg_id, samples)| {
+                serde_json::json!({
+                    "source_leg_id": leg_id,
+                    "duration_ms": (samples * 1000) / 48000,
+                })
+            })
+            .collect();
+
+        emit_event(
+            &out_tx,
+            "tool_transcription_done",
+            serde_json::json!({
+                "call_id": call_id,
+                "tool_leg_id": tool_leg_id,
+                "sources": sources,
+            }),
+        );
+    });
+
+    (tx, handle)
+}
--- a/rust/crates/proxy-engine/src/tts.rs
+++ b/rust/crates/proxy-engine/src/tts.rs
@@ -0,0 +1,138 @@
+//! Text-to-speech engine — synthesizes text to WAV files using Kokoro neural TTS.
+//!
+//! The model is loaded lazily on first use. If the model/voices files are not
+//! present, the generate command returns an error and the TS side falls back
+//! to espeak-ng.
+
+use kokoro_tts::{KokoroTts, Voice};
+use std::path::Path;
+
+/// Wraps the Kokoro TTS engine with lazy model loading.
+pub struct TtsEngine {
+    tts: Option<KokoroTts>,
+    /// Path that was used to load the current model (for cache invalidation).
+    loaded_model_path: String,
+    loaded_voices_path: String,
+}
+
+impl TtsEngine {
+    pub fn new() -> Self {
+        Self {
+            tts: None,
+            loaded_model_path: String::new(),
+            loaded_voices_path: String::new(),
+        }
+    }
+
+    /// Generate a WAV file from text.
+    ///
+    /// Params (from IPC JSON):
+    ///   - `model`: path to the ONNX model file
+    ///   - `voices`: path to the voices.bin file
+    ///   - `voice`: voice name (e.g. "af_bella")
+    ///   - `text`: text to synthesize
+    ///   - `output`: output WAV file path
+    pub async fn generate(&mut self, params: &serde_json::Value) -> Result<serde_json::Value, String> {
+        let model_path = params.get("model").and_then(|v| v.as_str())
+            .ok_or("missing 'model' param")?;
+        let voices_path = params.get("voices").and_then(|v| v.as_str())
+            .ok_or("missing 'voices' param")?;
+        let voice_name = params.get("voice").and_then(|v| v.as_str())
+            .unwrap_or("af_bella");
+        let text = params.get("text").and_then(|v| v.as_str())
+            .ok_or("missing 'text' param")?;
+        let output_path = params.get("output").and_then(|v| v.as_str())
+            .ok_or("missing 'output' param")?;
+
+        if text.is_empty() {
+            return Err("empty text".into());
+        }
+
+        // Check that model/voices files exist.
+        if !Path::new(model_path).exists() {
+            return Err(format!("model not found: {model_path}"));
+        }
+        if !Path::new(voices_path).exists() {
+            return Err(format!("voices not found: {voices_path}"));
+        }
+
+        // Lazy-load or reload if paths changed.
+        if self.tts.is_none()
+            || self.loaded_model_path != model_path
+            || self.loaded_voices_path != voices_path
+        {
+            eprintln!("[tts] loading model: {model_path}");
+            let tts = KokoroTts::new(model_path, voices_path)
+                .await
+                .map_err(|e| format!("model load failed: {e:?}"))?;
+            self.tts = Some(tts);
+            self.loaded_model_path = model_path.to_string();
+            self.loaded_voices_path = voices_path.to_string();
+        }
+
+        let tts = self.tts.as_ref().unwrap();
+        let voice = select_voice(voice_name);
+
+        eprintln!("[tts] synthesizing voice '{voice_name}': \"{text}\"");
+        let (samples, duration) = tts.synth(text, voice)
+            .await
+            .map_err(|e| format!("synthesis failed: {e:?}"))?;
+        eprintln!("[tts] synthesized {} samples in {duration:?}", samples.len());
+
+        // Write 24kHz 16-bit mono WAV.
+        let spec = hound::WavSpec {
+            channels: 1,
+            sample_rate: 24000,
+            bits_per_sample: 16,
+            sample_format: hound::SampleFormat::Int,
+        };
+
+        let mut writer = hound::WavWriter::create(output_path, spec)
+            .map_err(|e| format!("WAV create failed: {e}"))?;
+        for &sample in &samples {
+            let s16 = (sample * 32767.0).round().clamp(-32768.0, 32767.0) as i16;
+            writer.write_sample(s16).map_err(|e| format!("WAV write: {e}"))?;
+        }
+        writer.finalize().map_err(|e| format!("WAV finalize: {e}"))?;
+
+        eprintln!("[tts] wrote {output_path}");
+        Ok(serde_json::json!({ "output": output_path }))
+    }
+}
+
+/// Map voice name string to Kokoro Voice enum variant.
+fn select_voice(name: &str) -> Voice {
+    match name {
+        "af_bella" => Voice::AfBella(1.0),
+        "af_heart" => Voice::AfHeart(1.0),
+        "af_jessica" => Voice::AfJessica(1.0),
+        "af_nicole" => Voice::AfNicole(1.0),
+        "af_nova" => Voice::AfNova(1.0),
+        "af_sarah" => Voice::AfSarah(1.0),
+        "af_sky" => Voice::AfSky(1.0),
+        "af_river" => Voice::AfRiver(1.0),
+        "af_alloy" => Voice::AfAlloy(1.0),
+        "af_aoede" => Voice::AfAoede(1.0),
+        "af_kore" => Voice::AfKore(1.0),
+        "am_adam" => Voice::AmAdam(1.0),
+        "am_echo" => Voice::AmEcho(1.0),
+        "am_eric" => Voice::AmEric(1.0),
+        "am_fenrir" => Voice::AmFenrir(1.0),
+        "am_liam" => Voice::AmLiam(1.0),
+        "am_michael" => Voice::AmMichael(1.0),
+        "am_onyx" => Voice::AmOnyx(1.0),
+        "am_puck" => Voice::AmPuck(1.0),
+        "bf_alice" => Voice::BfAlice(1.0),
+        "bf_emma" => Voice::BfEmma(1.0),
+        "bf_isabella" => Voice::BfIsabella(1.0),
+        "bf_lily" => Voice::BfLily(1.0),
+        "bm_daniel" => Voice::BmDaniel(1.0),
+        "bm_fable" => Voice::BmFable(1.0),
+        "bm_george" => Voice::BmGeorge(1.0),
+        "bm_lewis" => Voice::BmLewis(1.0),
+        _ => {
+            eprintln!("[tts] unknown voice '{name}', falling back to af_bella");
+            Voice::AfBella(1.0)
+        }
+    }
+}
--- a/rust/crates/proxy-engine/src/webrtc_engine.rs
+++ b/rust/crates/proxy-engine/src/webrtc_engine.rs
@@ -1,16 +1,17 @@
-//! WebRTC engine — manages browser PeerConnections with SIP audio bridging.
+//! WebRTC engine — manages browser PeerConnections.
 //!
-//! Browser Opus audio → Rust PeerConnection → transcode via codec-lib → SIP RTP
-//! SIP RTP → transcode via codec-lib → Rust PeerConnection → Browser Opus
+//! Audio bridging is now channel-based:
+//! - Browser Opus audio → on_track → mixer inbound channel
+//! - Mixer outbound channel → Opus RTP → TrackLocalStaticRTP → browser
+//!
+//! The mixer handles all transcoding. The WebRTC engine just shuttles raw Opus.

 use crate::ipc::{emit_event, OutTx};
-use crate::rtp::{build_rtp_header, rtp_clock_increment};
-use codec_lib::{TranscodeState, PT_G722, PT_OPUS};
+use crate::mixer::RtpPacket;
+use codec_lib::PT_OPUS;
 use std::collections::HashMap;
-use std::net::SocketAddr;
 use std::sync::Arc;
-use tokio::net::UdpSocket;
-use tokio::sync::Mutex;
+use tokio::sync::{mpsc, Mutex};
 use webrtc::api::media_engine::MediaEngine;
 use webrtc::api::APIBuilder;
 use webrtc::ice_transport::ice_candidate::RTCIceCandidateInit;
@@ -22,24 +23,14 @@ use webrtc::rtp_transceiver::rtp_codec::RTCRtpCodecCapability;
 use webrtc::track::track_local::track_local_static_rtp::TrackLocalStaticRTP;
 use webrtc::track::track_local::{TrackLocal, TrackLocalWriter};

-/// SIP-side bridge info for a WebRTC session.
-#[derive(Clone)]
-pub struct SipBridgeInfo {
-    /// Provider's media endpoint (RTP destination).
-    pub provider_media: SocketAddr,
-    /// Provider's codec payload type (e.g. 9 for G.722).
-    pub sip_pt: u8,
-    /// The SIP UDP socket for sending RTP to the provider.
-    pub sip_socket: Arc<UdpSocket>,
-}
-
 /// A managed WebRTC session.
 struct WebRtcSession {
    pc: Arc<RTCPeerConnection>,
    local_track: Arc<TrackLocalStaticRTP>,
    call_id: Option<String>,
-    /// SIP bridge — set when the session is linked to a call.
-    sip_bridge: Arc<Mutex<Option<SipBridgeInfo>>>,
+    /// Channel sender for forwarding browser Opus audio to the mixer.
+    /// Set when the session is linked to a call via link_to_mixer().
+    mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>>,
 }

 /// Manages all WebRTC sessions.
@@ -56,7 +47,7 @@ impl WebRtcEngine {
        }
    }

-    /// Handle a WebRTC offer from a browser.
+    /// Handle a WebRTC offer from a browser — create PeerConnection, return SDP answer.
    pub async fn handle_offer(
        &mut self,
        session_id: &str,
@@ -99,8 +90,9 @@ impl WebRtcEngine {
            .await
            .map_err(|e| format!("add track: {e}"))?;

-        // Shared SIP bridge info (populated when linked to a call).
-        let sip_bridge: Arc<Mutex<Option<SipBridgeInfo>>> = Arc::new(Mutex::new(None));
+        // Shared mixer channel sender (populated when linked to a call).
+        let mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>> =
+            Arc::new(Mutex::new(None));

        // ICE candidate handler.
        let out_tx_ice = self.out_tx.clone();
@@ -151,14 +143,14 @@ impl WebRtcEngine {
        }));

        // Track handler — receives Opus audio from the browser.
-        // When SIP bridge is set, transcodes and forwards to provider.
+        // Forwards raw Opus payload to the mixer channel (when linked).
        let out_tx_track = self.out_tx.clone();
        let sid_track = session_id.to_string();
-        let sip_bridge_for_track = sip_bridge.clone();
+        let mixer_tx_for_track = mixer_tx.clone();
        pc.on_track(Box::new(move |track, _receiver, _transceiver| {
            let out_tx = out_tx_track.clone();
            let sid = sid_track.clone();
-            let bridge = sip_bridge_for_track.clone();
+            let mixer_tx = mixer_tx_for_track.clone();
            Box::pin(async move {
                let codec_info = track.codec();
                emit_event(
@@ -171,8 +163,8 @@ impl WebRtcEngine {
                    }),
                );

-                // Spawn the browser→SIP audio forwarding task.
-                tokio::spawn(browser_to_sip_loop(track, bridge, out_tx, sid));
+                // Spawn browser→mixer forwarding task.
+                tokio::spawn(browser_to_mixer_loop(track, mixer_tx, out_tx, sid));
            })
        }));

@@ -199,67 +191,41 @@ impl WebRtcEngine {
                pc,
                local_track,
                call_id: None,
-                sip_bridge,
+                mixer_tx,
            },
        );

        Ok(answer_sdp)
    }

-    /// Link a WebRTC session to a SIP call — sets up the audio bridge.
-    pub async fn link_to_sip(
+    /// Link a WebRTC session to a call's mixer via channels.
+    /// - `inbound_tx`: browser audio goes TO the mixer through this channel
+    /// - `outbound_rx`: mixed audio comes FROM the mixer through this channel
+    pub async fn link_to_mixer(
        &mut self,
        session_id: &str,
        call_id: &str,
-        bridge_info: SipBridgeInfo,
+        inbound_tx: mpsc::Sender<RtpPacket>,
+        outbound_rx: mpsc::Receiver<Vec<u8>>,
    ) -> bool {
-        if let Some(session) = self.sessions.get_mut(session_id) {
-            session.call_id = Some(call_id.to_string());
-            let mut bridge = session.sip_bridge.lock().await;
-            *bridge = Some(bridge_info);
-            true
-        } else {
-            false
-        }
-    }
+        let session = match self.sessions.get_mut(session_id) {
+            Some(s) => s,
+            None => return false,
+        };

-    /// Send transcoded audio from the SIP side to the browser.
-    /// Called by the RTP relay when it receives a packet from the provider.
-    pub async fn forward_sip_to_browser(
-        &self,
-        session_id: &str,
-        sip_rtp_payload: &[u8],
-        sip_pt: u8,
-    ) -> Result<(), String> {
-        let session = self
-            .sessions
-            .get(session_id)
-            .ok_or_else(|| format!("session {session_id} not found"))?;
+        session.call_id = Some(call_id.to_string());

-        // Transcode SIP codec → Opus.
-        // We create a temporary TranscodeState per packet for simplicity.
-        // TODO: Use a per-session persistent state for proper codec continuity.
-        let mut transcoder = TranscodeState::new().map_err(|e| format!("codec: {e}"))?;
-        let opus_payload = transcoder
-            .transcode(sip_rtp_payload, sip_pt, PT_OPUS, Some("to_browser"))
-            .map_err(|e| format!("transcode: {e}"))?;
-
-        if opus_payload.is_empty() {
-            return Ok(());
+        // Set the mixer sender so the on_track loop starts forwarding.
+        {
+            let mut tx = session.mixer_tx.lock().await;
+            *tx = Some(inbound_tx);
        }

-        // Build RTP header for Opus.
-        // TODO: Track seq/ts/ssrc per session for proper continuity.
-        let header = build_rtp_header(PT_OPUS, 0, 0, 0);
-        let mut packet = header.to_vec();
-        packet.extend_from_slice(&opus_payload);
+        // Spawn mixer→browser outbound task.
+        let local_track = session.local_track.clone();
+        tokio::spawn(mixer_to_browser_loop(outbound_rx, local_track));

-        session
-            .local_track
-            .write(&packet)
-            .await
-            .map(|_| ())
-            .map_err(|e| format!("write: {e}"))
+        true
    }

    pub async fn add_ice_candidate(
@@ -294,90 +260,50 @@ impl WebRtcEngine {
        }
        Ok(())
    }
-
-    pub fn has_session(&self, session_id: &str) -> bool {
-        self.sessions.contains_key(session_id)
-    }
 }

-/// Browser → SIP audio forwarding loop.
-/// Reads Opus RTP from the browser, transcodes to the SIP codec, sends to provider.
-async fn browser_to_sip_loop(
+/// Browser → Mixer audio forwarding loop.
+/// Reads Opus RTP from the browser track, sends raw Opus payload to the mixer channel.
+async fn browser_to_mixer_loop(
    track: Arc<webrtc::track::track_remote::TrackRemote>,
-    sip_bridge: Arc<Mutex<Option<SipBridgeInfo>>>,
+    mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>>,
    out_tx: OutTx,
    session_id: String,
 ) {
-    // Create a persistent codec state for this direction.
-    let mut transcoder = match TranscodeState::new() {
-        Ok(t) => t,
-        Err(e) => {
-            emit_event(
-                &out_tx,
-                "webrtc_error",
-                serde_json::json!({ "session_id": session_id, "error": format!("codec init: {e}") }),
-            );
-            return;
-        }
-    };
-
    let mut buf = vec![0u8; 1500];
    let mut count = 0u64;
-    let mut to_sip_seq: u16 = 0;
-    let mut to_sip_ts: u32 = 0;
-    let to_sip_ssrc: u32 = rand::random();

    loop {
        match track.read(&mut buf).await {
            Ok((rtp_packet, _attributes)) => {
                count += 1;

-                // Get the SIP bridge info (may not be set yet if call isn't linked).
-                let bridge = sip_bridge.lock().await;
-                let bridge_info = match bridge.as_ref() {
-                    Some(b) => b.clone(),
-                    None => continue, // Not linked to a SIP call yet — drop the packet.
-                };
-                drop(bridge); // Release lock before doing I/O.
-
-                // Extract Opus payload from the RTP packet (skip 12-byte header).
                let payload = &rtp_packet.payload;
                if payload.is_empty() {
                    continue;
                }

-                // Transcode Opus → SIP codec (e.g. G.722).
-                let sip_payload = match transcoder.transcode(
-                    payload,
-                    PT_OPUS,
-                    bridge_info.sip_pt,
-                    Some("to_sip"),
-                ) {
-                    Ok(p) if !p.is_empty() => p,
-                    _ => continue,
-                };
-
-                // Build SIP RTP packet.
-                let header = build_rtp_header(bridge_info.sip_pt, to_sip_seq, to_sip_ts, to_sip_ssrc);
-                let mut sip_rtp = header.to_vec();
-                sip_rtp.extend_from_slice(&sip_payload);
-
-                to_sip_seq = to_sip_seq.wrapping_add(1);
-                to_sip_ts = to_sip_ts.wrapping_add(rtp_clock_increment(bridge_info.sip_pt));
-
-                // Send to provider.
-                let _ = bridge_info
-                    .sip_socket
-                    .send_to(&sip_rtp, bridge_info.provider_media)
-                    .await;
+                // Send raw Opus payload to mixer (if linked).
+                let tx = mixer_tx.lock().await;
+                if let Some(ref tx) = *tx {
+                    let _ = tx
+                        .send(RtpPacket {
+                            payload: payload.to_vec(),
+                            payload_type: PT_OPUS,
+                            marker: false,
+                            timestamp: 0,
+                        })
+                        .await;
+                }
+                drop(tx);

                if count == 1 || count == 50 || count % 500 == 0 {
                    emit_event(
                        &out_tx,
-                        "webrtc_audio_tx",
+                        "webrtc_audio_rx",
                        serde_json::json!({
                            "session_id": session_id,
-                            "direction": "browser_to_sip",
+                            "direction": "browser_to_mixer",
                            "packet_count": count,
                        }),
                    );
@@ -387,3 +313,14 @@ async fn browser_to_sip_loop(
        }
    }
 }
+
+/// Mixer → Browser audio forwarding loop.
+/// Reads Opus-encoded RTP packets from the mixer and writes to the WebRTC track.
+async fn mixer_to_browser_loop(
+    mut outbound_rx: mpsc::Receiver<Vec<u8>>,
+    local_track: Arc<TrackLocalStaticRTP>,
+) {
+    while let Some(rtp_data) = outbound_rx.recv().await {
+        let _ = local_track.write(&rtp_data).await;
+    }
+}
--- a/rust/crates/tts-engine/Cargo.toml
+++ b/rust/crates/tts-engine/Cargo.toml
@@ -1,18 +0,0 @@
-[package]
-name = "tts-engine"
-version = "0.1.0"
-edition = "2021"
-
-[[bin]]
-name = "tts-engine"
-path = "src/main.rs"
-
-[dependencies]
-kokoro-tts = { version = "0.3", default-features = false }
-# Pin to rc.11 matching kokoro-tts's expectation; enable vendored TLS to avoid system libssl-dev.
-ort = { version = "=2.0.0-rc.11", default-features = false, features = [
-    "std", "download-binaries", "copy-dylibs", "ndarray",
-    "tls-native-vendored"
-] }
-tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
-hound = "3.5"
--- a/rust/crates/tts-engine/src/main.rs
+++ b/rust/crates/tts-engine/src/main.rs
@@ -1,149 +0,0 @@
-/// TTS engine CLI — synthesizes text to a WAV file using Kokoro neural TTS.
-///
-/// Usage:
-///   echo "Hello world" | tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav
-///   tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav --text "Hello world"
-///
-/// Outputs 24kHz 16-bit mono WAV.
-
-use kokoro_tts::{KokoroTts, Voice};
-use std::io::{self, Read};
-
-fn parse_args() -> Result<(String, String, String, String, Option<String>), String> {
-    let args: Vec<String> = std::env::args().collect();
-    let mut model = String::new();
-    let mut voices = String::new();
-    let mut output = String::new();
-    let mut text: Option<String> = None;
-    let mut voice_name: Option<String> = None;
-
-    let mut i = 1;
-    while i < args.len() {
-        match args[i].as_str() {
-            "--model" => { i += 1; model = args.get(i).cloned().unwrap_or_default(); }
-            "--voices" => { i += 1; voices = args.get(i).cloned().unwrap_or_default(); }
-            "--output" | "--output_file" => { i += 1; output = args.get(i).cloned().unwrap_or_default(); }
-            "--text" => { i += 1; text = args.get(i).cloned(); }
-            "--voice" => { i += 1; voice_name = args.get(i).cloned(); }
-            _ => {}
-        }
-        i += 1;
-    }
-
-    if model.is_empty() { return Err("--model required".into()); }
-    if voices.is_empty() { return Err("--voices required".into()); }
-    if output.is_empty() { return Err("--output required".into()); }
-
-    let voice_str = voice_name.unwrap_or_else(|| "af_bella".into());
-
-    Ok((model, voices, output, voice_str, text))
-}
-
-fn select_voice(name: &str) -> Voice {
-    match name {
-        "af_bella" => Voice::AfBella(1.0),
-        "af_heart" => Voice::AfHeart(1.0),
-        "af_jessica" => Voice::AfJessica(1.0),
-        "af_nicole" => Voice::AfNicole(1.0),
-        "af_nova" => Voice::AfNova(1.0),
-        "af_sarah" => Voice::AfSarah(1.0),
-        "af_sky" => Voice::AfSky(1.0),
-        "af_river" => Voice::AfRiver(1.0),
-        "af_alloy" => Voice::AfAlloy(1.0),
-        "af_aoede" => Voice::AfAoede(1.0),
-        "af_kore" => Voice::AfKore(1.0),
-        "am_adam" => Voice::AmAdam(1.0),
-        "am_echo" => Voice::AmEcho(1.0),
-        "am_eric" => Voice::AmEric(1.0),
-        "am_fenrir" => Voice::AmFenrir(1.0),
-        "am_liam" => Voice::AmLiam(1.0),
-        "am_michael" => Voice::AmMichael(1.0),
-        "am_onyx" => Voice::AmOnyx(1.0),
-        "am_puck" => Voice::AmPuck(1.0),
-        "bf_alice" => Voice::BfAlice(1.0),
-        "bf_emma" => Voice::BfEmma(1.0),
-        "bf_isabella" => Voice::BfIsabella(1.0),
-        "bf_lily" => Voice::BfLily(1.0),
-        "bm_daniel" => Voice::BmDaniel(1.0),
-        "bm_fable" => Voice::BmFable(1.0),
-        "bm_george" => Voice::BmGeorge(1.0),
-        "bm_lewis" => Voice::BmLewis(1.0),
-        _ => {
-            eprintln!("[tts-engine] unknown voice '{}', falling back to af_bella", name);
-            Voice::AfBella(1.0)
-        }
-    }
-}
-
-#[tokio::main]
-async fn main() {
-    let (model_path, voices_path, output_path, voice_name, text_arg) = match parse_args() {
-        Ok(v) => v,
-        Err(e) => {
-            eprintln!("Error: {}", e);
-            eprintln!("Usage: tts-engine --model <model.onnx> --voices <voices.bin> --output <output.wav> [--text <text>] [--voice <voice_name>]");
-            std::process::exit(1);
-        }
-    };
-
-    // Get text from --text arg or stdin.
-    let text = match text_arg {
-        Some(t) => t,
-        None => {
-            let mut buf = String::new();
-            io::stdin().read_to_string(&mut buf).expect("failed to read stdin");
-            buf.trim().to_string()
-        }
-    };
-
-    if text.is_empty() {
-        eprintln!("[tts-engine] no text provided");
-        std::process::exit(1);
-    }
-
-    eprintln!("[tts-engine] loading model: {}", model_path);
-    let tts = match KokoroTts::new(&model_path, &voices_path).await {
-        Ok(t) => t,
-        Err(e) => {
-            eprintln!("[tts-engine] failed to load model: {:?}", e);
-            std::process::exit(1);
-        }
-    };
-
-    let voice = select_voice(&voice_name);
-    eprintln!("[tts-engine] synthesizing with voice '{}': \"{}\"", voice_name, text);
-
-    let (samples, duration) = match tts.synth(&text, voice).await {
-        Ok(r) => r,
-        Err(e) => {
-            eprintln!("[tts-engine] synthesis failed: {:?}", e);
-            std::process::exit(1);
-        }
-    };
-
-    eprintln!("[tts-engine] synthesized {} samples in {:?}", samples.len(), duration);
-
-    // Write WAV: 24kHz, 16-bit, mono (same format announcement.ts expects).
-    let spec = hound::WavSpec {
-        channels: 1,
-        sample_rate: 24000,
-        bits_per_sample: 16,
-        sample_format: hound::SampleFormat::Int,
-    };
-
-    let mut writer = match hound::WavWriter::create(&output_path, spec) {
-        Ok(w) => w,
-        Err(e) => {
-            eprintln!("[tts-engine] failed to create WAV: {}", e);
-            std::process::exit(1);
-        }
-    };
-
-    for &sample in &samples {
-        let s16 = (sample * 32767.0).round().clamp(-32768.0, 32767.0) as i16;
-        writer.write_sample(s16).unwrap();
-    }
-    writer.finalize().unwrap();
-
-    eprintln!("[tts-engine] wrote {}", output_path);
-}
--- a/ts/00_commitinfo_data.ts
+++ b/ts/00_commitinfo_data.ts
@@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: 'siprouter',
-  version: '1.12.0',
+  version: '1.17.0',
  description: 'undefined'
 }
--- a/ts/announcement.ts
+++ b/ts/announcement.ts
@@ -1,59 +1,22 @@
 /**
- * TTS announcement module — pre-generates audio announcements using espeak-ng
- * and caches them as encoded RTP packets for playback during call setup.
+ * TTS announcement module — generates announcement WAV files at startup.
 *
- * On startup, generates the announcement WAV via espeak-ng (formant-based TTS
- * with highly accurate pronunciation), encodes each 20ms frame to G.722 (for
- * SIP) and Opus (for WebRTC) via the Rust transcoder, and caches the packets.
+ * Engine priority: espeak-ng (formant TTS, fast) → Kokoro neural TTS via
+ * proxy-engine → disabled.
 *
- * Falls back to the Rust tts-engine (Kokoro neural TTS) if espeak-ng is not
- * installed, and disables announcements if neither is available.
+ * The generated WAV is left on disk for Rust's audio_player / start_interaction
+ * to play during calls. No encoding or RTP playback happens in TypeScript.
 */

 import { execSync } from 'node:child_process';
 import fs from 'node:fs';
 import path from 'node:path';
-import { Buffer } from 'node:buffer';
-import { encodePcm, isCodecReady } from './opusbridge.ts';
-
-/** RTP clock increment per 20ms frame for each codec. */
-function rtpClockIncrement(pt: number): number {
-  if (pt === 111) return 960;
-  if (pt === 9) return 160;
-  return 160;
-}
-
-/** Build a fresh RTP header. */
-function buildRtpHeader(pt: number, seq: number, ts: number, ssrc: number, marker: boolean): Buffer {
-  const hdr = Buffer.alloc(12);
-  hdr[0] = 0x80;
-  hdr[1] = (marker ? 0x80 : 0) | (pt & 0x7f);
-  hdr.writeUInt16BE(seq & 0xffff, 2);
-  hdr.writeUInt32BE(ts >>> 0, 4);
-  hdr.writeUInt32BE(ssrc >>> 0, 8);
-  return hdr;
-}
-
-// ---------------------------------------------------------------------------
-// Types
-// ---------------------------------------------------------------------------
-
-/** A pre-encoded announcement ready for RTP playback. */
-export interface IAnnouncementCache {
-  /** G.722 encoded frames (each is a 20ms frame payload, no RTP header). */
-  g722Frames: Buffer[];
-  /** Opus encoded frames for WebRTC playback. */
-  opusFrames: Buffer[];
-  /** Total duration in milliseconds. */
-  durationMs: number;
-}
+import { sendProxyCommand, isProxyReady } from './proxybridge.ts';

 // ---------------------------------------------------------------------------
 // State
 // ---------------------------------------------------------------------------

-let cachedAnnouncement: IAnnouncementCache | null = null;
-
 const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
 const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now.";
 const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav');
@@ -64,12 +27,10 @@ const KOKORO_VOICES = 'voices.bin';
 const KOKORO_VOICE = 'af_bella';

 // ---------------------------------------------------------------------------
-// Initialization
+// TTS generators
 // ---------------------------------------------------------------------------

-/**
- * Check if espeak-ng is available on the system.
- */
+/** Check if espeak-ng is available on the system. */
 function isEspeakAvailable(): boolean {
  try {
    execSync('which espeak-ng', { stdio: 'pipe' });
@@ -79,10 +40,7 @@ function isEspeakAvailable(): boolean {
  }
 }

-/**
- * Generate announcement WAV via espeak-ng (primary engine).
- * Returns true on success.
- */
+/** Generate announcement WAV via espeak-ng (primary engine). */
 function generateViaEspeak(wavPath: string, text: string, log: (msg: string) => void): boolean {
  log('[tts] generating announcement audio via espeak-ng...');
  try {
@@ -98,11 +56,8 @@ function generateViaEspeak(wavPath: string, text: string, log: (msg: string) =>
  }
 }

-/**
- * Generate announcement WAV via Kokoro TTS (fallback engine).
- * Returns true on success.
- */
-function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): boolean {
+/** Generate announcement WAV via Kokoro TTS (fallback, runs inside proxy-engine). */
+async function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): Promise<boolean> {
  const modelPath = path.join(TTS_DIR, KOKORO_MODEL);
  const voicesPath = path.join(TTS_DIR, KOKORO_VOICES);

@@ -111,25 +66,21 @@ function generateViaKokoro(wavPath: string, text: string, log: (msg: string) =>
    return false;
  }

-  const root = process.cwd();
-  const ttsBinPaths = [
-    path.join(root, 'dist_rust', 'tts-engine'),
-    path.join(root, 'rust', 'target', 'release', 'tts-engine'),
-    path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
-  ];
-  const ttsBin = ttsBinPaths.find((p) => fs.existsSync(p));
-  if (!ttsBin) {
-    log('[tts] tts-engine binary not found — Kokoro fallback unavailable');
+  if (!isProxyReady()) {
+    log('[tts] proxy-engine not ready — Kokoro fallback unavailable');
    return false;
  }

  log('[tts] generating announcement audio via Kokoro TTS (fallback)...');
  try {
-    execSync(
-      `"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${KOKORO_VOICE}" --output "${wavPath}" --text "${text}"`,
-      { timeout: 120000, stdio: 'pipe' },
-    );
-    log('[tts] Kokoro WAV generated');
+    await sendProxyCommand('generate_tts', {
+      model: modelPath,
+      voices: voicesPath,
+      voice: KOKORO_VOICE,
+      text,
+      output: wavPath,
+    });
+    log('[tts] Kokoro WAV generated (via proxy-engine)');
    return true;
  } catch (e: any) {
    log(`[tts] Kokoro failed: ${e.message}`);
@@ -137,40 +88,13 @@ function generateViaKokoro(wavPath: string, text: string, log: (msg: string) =>
  }
 }

-/**
- * Read a WAV file and detect its sample rate from the fmt chunk.
- * Returns { pcm, sampleRate } or null on failure.
- */
-function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
-  const wav = fs.readFileSync(wavPath);
-  if (wav.length < 44) return null;
-  if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
-  if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
-
-  let sampleRate = 22050; // default
-  let offset = 12;
-  let pcm: Buffer | null = null;
-
-  while (offset < wav.length - 8) {
-    const chunkId = wav.toString('ascii', offset, offset + 4);
-    const chunkSize = wav.readUInt32LE(offset + 4);
-    if (chunkId === 'fmt ') {
-      sampleRate = wav.readUInt32LE(offset + 12);
-    }
-    if (chunkId === 'data') {
-      pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
-    }
-    offset += 8 + chunkSize;
-    if (offset % 2 !== 0) offset++;
-  }
-
-  if (!pcm) return null;
-  return { pcm, sampleRate };
-}
+// ---------------------------------------------------------------------------
+// Initialization
+// ---------------------------------------------------------------------------

 /**
- * Pre-generate the announcement audio and encode to G.722 + Opus frames.
- * Must be called after the codec bridge is initialized.
+ * Pre-generate the announcement WAV file.
+ * Must be called after the proxy engine is initialized.
 *
 * Engine priority: espeak-ng → Kokoro → disabled.
 */
@@ -178,7 +102,6 @@ export async function initAnnouncement(log: (msg: string) => void): Promise<bool
  fs.mkdirSync(TTS_DIR, { recursive: true });

  try {
-    // Generate WAV if not cached.
    if (!fs.existsSync(CACHE_WAV)) {
      let generated = false;

@@ -189,9 +112,9 @@ export async function initAnnouncement(log: (msg: string) => void): Promise<bool
        log('[tts] espeak-ng not installed — trying Kokoro fallback');
      }

-      // Fall back to Kokoro.
+      // Fall back to Kokoro (via proxy-engine).
      if (!generated) {
-        generated = generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
+        generated = await generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
      }

      if (!generated) {
@@ -200,49 +123,7 @@ export async function initAnnouncement(log: (msg: string) => void): Promise<bool
      }
    }

-    // Read WAV and extract raw PCM + sample rate.
-    const result = readWavWithRate(CACHE_WAV);
-    if (!result) {
-      log('[tts] failed to parse WAV file');
-      return false;
-    }
-
-    const { pcm, sampleRate } = result;
-
-    // Wait for codec bridge to be ready.
-    if (!isCodecReady()) {
-      log('[tts] codec bridge not ready — will retry');
-      return false;
-    }
-
-    // Encode in 20ms chunks. The Rust encoder resamples to each codec's native rate.
-    const FRAME_SAMPLES = Math.floor(sampleRate * 0.02);
-    const FRAME_BYTES = FRAME_SAMPLES * 2; // 16-bit = 2 bytes per sample
-    const totalFrames = Math.floor(pcm.length / FRAME_BYTES);
-
-    const g722Frames: Buffer[] = [];
-    const opusFrames: Buffer[] = [];
-
-    log(`[tts] encoding ${totalFrames} frames (${FRAME_SAMPLES} samples/frame @ ${sampleRate}Hz)...`);
-    for (let i = 0; i < totalFrames; i++) {
-      const framePcm = pcm.subarray(i * FRAME_BYTES, (i + 1) * FRAME_BYTES);
-      const pcmBuf = Buffer.from(framePcm);
-      const [g722, opus] = await Promise.all([
-        encodePcm(pcmBuf, sampleRate, 9),   // G.722 for SIP devices
-        encodePcm(pcmBuf, sampleRate, 111),  // Opus for WebRTC browsers
-      ]);
-      if (g722) g722Frames.push(g722);
-      if (opus) opusFrames.push(opus);
-      if (!g722 && !opus && i < 3) log(`[tts] frame ${i} encode failed`);
-    }
-
-    cachedAnnouncement = {
-      g722Frames,
-      opusFrames,
-      durationMs: totalFrames * 20,
-    };
-
-    log(`[tts] announcement cached: ${g722Frames.length} frames (${(totalFrames * 20 / 1000).toFixed(1)}s)`);
+    log('[tts] announcement WAV ready');
    return true;
  } catch (e: any) {
    log(`[tts] init error: ${e.message}`);
@@ -250,100 +131,7 @@ export async function initAnnouncement(log: (msg: string) => void): Promise<bool
  }
 }

-// ---------------------------------------------------------------------------
-// Playback
-// ---------------------------------------------------------------------------
-
-/**
- * Play the pre-cached announcement to an RTP endpoint.
- *
- * @param sendPacket - function to send a raw RTP packet
- * @param ssrc - SSRC to use in RTP headers
- * @param onDone - called when the announcement finishes
- * @returns a cancel function, or null if no announcement is cached
- */
-export function playAnnouncement(
-  sendPacket: (pkt: Buffer) => void,
-  ssrc: number,
-  onDone?: () => void,
-): (() => void) | null {
-  if (!cachedAnnouncement || cachedAnnouncement.g722Frames.length === 0) {
-    onDone?.();
-    return null;
-  }
-
-  const frames = cachedAnnouncement.g722Frames;
-  const PT = 9; // G.722
-  let frameIdx = 0;
-  let seq = Math.floor(Math.random() * 0xffff);
-  let rtpTs = Math.floor(Math.random() * 0xffffffff);
-
-  const timer = setInterval(() => {
-    if (frameIdx >= frames.length) {
-      clearInterval(timer);
-      onDone?.();
-      return;
-    }
-
-    const payload = frames[frameIdx];
-    const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
-    const pkt = Buffer.concat([hdr, payload]);
-    sendPacket(pkt);
-
-    seq++;
-    rtpTs += rtpClockIncrement(PT);
-    frameIdx++;
-  }, 20);
-
-  // Return cancel function.
-  return () => clearInterval(timer);
+/** Get the path to the cached announcement WAV, or null if not generated. */
+export function getAnnouncementWavPath(): string | null {
+  return fs.existsSync(CACHE_WAV) ? CACHE_WAV : null;
 }
-
-/**
- * Play pre-cached Opus announcement to a WebRTC PeerConnection sender.
- *
- * @param sendRtpPacket - function to send a raw RTP packet via sender.sendRtp()
- * @param ssrc - SSRC to use in RTP headers
- * @param onDone - called when announcement finishes
- * @returns cancel function, or null if no announcement cached
- */
-export function playAnnouncementToWebRtc(
-  sendRtpPacket: (pkt: Buffer) => void,
-  ssrc: number,
-  counters: { seq: number; ts: number },
-  onDone?: () => void,
-): (() => void) | null {
-  if (!cachedAnnouncement || cachedAnnouncement.opusFrames.length === 0) {
-    onDone?.();
-    return null;
-  }
-
-  const frames = cachedAnnouncement.opusFrames;
-  const PT = 111; // Opus
-  let frameIdx = 0;
-
-  const timer = setInterval(() => {
-    if (frameIdx >= frames.length) {
-      clearInterval(timer);
-      onDone?.();
-      return;
-    }
-
-    const payload = frames[frameIdx];
-    const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
-    const pkt = Buffer.concat([hdr, payload]);
-    sendRtpPacket(pkt);
-
-    counters.seq++;
-    counters.ts += 960; // Opus at 48kHz: 960 samples per 20ms
-    frameIdx++;
-  }, 20);
-
-  return () => clearInterval(timer);
-}
-
-/** Check if an announcement is cached and ready. */
-export function isAnnouncementReady(): boolean {
-  return cachedAnnouncement !== null && cachedAnnouncement.g722Frames.length > 0;
-}
-
--- a/ts/call/prompt-cache.ts
+++ b/ts/call/prompt-cache.ts
@@ -1,55 +1,31 @@
 /**
- * PromptCache — manages multiple named audio prompts for IVR and voicemail.
+ * PromptCache — manages named audio prompt WAV files for IVR and voicemail.
 *
- * Each prompt is pre-encoded as both G.722 frames (for SIP legs) and Opus
- * frames (for WebRTC legs), ready for 20ms RTP playback.
+ * Generates WAV files via espeak-ng (primary) or Kokoro TTS through the
+ * proxy-engine (fallback). Also supports loading pre-existing WAV files
+ * and programmatic tone generation.
 *
- * Supports three sources:
- * 1. TTS generation via espeak-ng (primary) or Kokoro (fallback)
- * 2. Loading from a pre-existing WAV file
- * 3. Programmatic tone generation (beep, etc.)
- *
- * The existing announcement.ts system continues to work independently;
- * this module provides generalized prompt management for IVR/voicemail.
+ * All audio playback happens in Rust (audio_player / start_interaction).
+ * This module only manages WAV files on disk.
 */

 import { execSync } from 'node:child_process';
 import fs from 'node:fs';
 import path from 'node:path';
 import { Buffer } from 'node:buffer';
-import { encodePcm, isCodecReady } from '../opusbridge.ts';
-
-/** RTP clock increment per 20ms frame for each codec. */
-function rtpClockIncrement(pt: number): number {
-  if (pt === 111) return 960;
-  if (pt === 9) return 160;
-  return 160;
-}
-
-/** Build a fresh RTP header. */
-function buildRtpHeader(pt: number, seq: number, ts: number, ssrc: number, marker: boolean): Buffer {
-  const hdr = Buffer.alloc(12);
-  hdr[0] = 0x80;
-  hdr[1] = (marker ? 0x80 : 0) | (pt & 0x7f);
-  hdr.writeUInt16BE(seq & 0xffff, 2);
-  hdr.writeUInt32BE(ts >>> 0, 4);
-  hdr.writeUInt32BE(ssrc >>> 0, 8);
-  return hdr;
-}
+import { sendProxyCommand, isProxyReady } from '../proxybridge.ts';

 // ---------------------------------------------------------------------------
 // Types
 // ---------------------------------------------------------------------------

-/** A pre-encoded prompt ready for RTP playback. */
+/** A cached prompt — just a WAV file path and metadata. */
 export interface ICachedPrompt {
  /** Unique prompt identifier. */
  id: string;
-  /** G.722 encoded frames (20ms each, no RTP header). */
-  g722Frames: Buffer[];
-  /** Opus encoded frames (20ms each, no RTP header). */
-  opusFrames: Buffer[];
-  /** Total duration in milliseconds. */
+  /** Path to the WAV file on disk. */
+  wavPath: string;
+  /** Total duration in milliseconds (approximate, from WAV header). */
  durationMs: number;
 }

@@ -82,84 +58,61 @@ function generateViaEspeak(wavPath: string, text: string): boolean {
  }
 }

-/** Generate WAV via Kokoro TTS. */
-function generateViaKokoro(wavPath: string, text: string, voice: string): boolean {
+/** Generate WAV via Kokoro TTS (runs inside proxy-engine). */
+async function generateViaKokoro(wavPath: string, text: string, voice: string): Promise<boolean> {
  const modelPath = path.join(TTS_DIR, 'kokoro-v1.0.onnx');
  const voicesPath = path.join(TTS_DIR, 'voices.bin');
  if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) return false;
-
-  const root = process.cwd();
-  const ttsBin = [
-    path.join(root, 'dist_rust', 'tts-engine'),
-    path.join(root, 'rust', 'target', 'release', 'tts-engine'),
-    path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
-  ].find((p) => fs.existsSync(p));
-  if (!ttsBin) return false;
+  if (!isProxyReady()) return false;

  try {
-    execSync(
-      `"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${voice}" --output "${wavPath}" --text "${text}"`,
-      { timeout: 120000, stdio: 'pipe' },
-    );
+    await sendProxyCommand('generate_tts', {
+      model: modelPath,
+      voices: voicesPath,
+      voice,
+      text,
+      output: wavPath,
+    });
    return true;
  } catch {
    return false;
  }
 }

-/** Read a WAV file and return raw PCM + sample rate. */
-function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
-  const wav = fs.readFileSync(wavPath);
-  if (wav.length < 44) return null;
-  if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
-  if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
+/** Read a WAV file's duration from its header. */
+function getWavDurationMs(wavPath: string): number {
+  try {
+    const wav = fs.readFileSync(wavPath);
+    if (wav.length < 44) return 0;
+    if (wav.toString('ascii', 0, 4) !== 'RIFF') return 0;

-  let sampleRate = 22050;
-  let pcm: Buffer | null = null;
-  let offset = 12;
+    let sampleRate = 16000;
+    let dataSize = 0;
+    let bitsPerSample = 16;
+    let channels = 1;
+    let offset = 12;

-  while (offset < wav.length - 8) {
-    const chunkId = wav.toString('ascii', offset, offset + 4);
-    const chunkSize = wav.readUInt32LE(offset + 4);
-    if (chunkId === 'fmt ') {
-      sampleRate = wav.readUInt32LE(offset + 12);
+    while (offset < wav.length - 8) {
+      const chunkId = wav.toString('ascii', offset, offset + 4);
+      const chunkSize = wav.readUInt32LE(offset + 4);
+      if (chunkId === 'fmt ') {
+        channels = wav.readUInt16LE(offset + 10);
+        sampleRate = wav.readUInt32LE(offset + 12);
+        bitsPerSample = wav.readUInt16LE(offset + 22);
+      }
+      if (chunkId === 'data') {
+        dataSize = chunkSize;
+      }
+      offset += 8 + chunkSize;
+      if (offset % 2 !== 0) offset++;
    }
-    if (chunkId === 'data') {
-      pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
-    }
-    offset += 8 + chunkSize;
-    if (offset % 2 !== 0) offset++;
+
+    const bytesPerSample = (bitsPerSample / 8) * channels;
+    const totalSamples = bytesPerSample > 0 ? dataSize / bytesPerSample : 0;
+    return sampleRate > 0 ? Math.round((totalSamples / sampleRate) * 1000) : 0;
+  } catch {
+    return 0;
  }
-
-  return pcm ? { pcm, sampleRate } : null;
-}
-
-/** Encode raw PCM frames to G.722 + Opus. */
-async function encodePcmFrames(
-  pcm: Buffer,
-  sampleRate: number,
-  log: (msg: string) => void,
-): Promise<{ g722Frames: Buffer[]; opusFrames: Buffer[] } | null> {
-  if (!isCodecReady()) return null;
-
-  const frameSamples = Math.floor(sampleRate * 0.02); // 20ms
-  const frameBytes = frameSamples * 2; // 16-bit
-  const totalFrames = Math.floor(pcm.length / frameBytes);
-
-  const g722Frames: Buffer[] = [];
-  const opusFrames: Buffer[] = [];
-
-  for (let i = 0; i < totalFrames; i++) {
-    const framePcm = Buffer.from(pcm.subarray(i * frameBytes, (i + 1) * frameBytes));
-    const [g722, opus] = await Promise.all([
-      encodePcm(framePcm, sampleRate, 9),   // G.722
-      encodePcm(framePcm, sampleRate, 111),  // Opus
-    ]);
-    if (g722) g722Frames.push(g722);
-    if (opus) opusFrames.push(opus);
-  }
-
-  return { g722Frames, opusFrames };
 }

 // ---------------------------------------------------------------------------
@@ -195,7 +148,7 @@ export class PromptCache {
  }

  /**
-   * Generate a TTS prompt and cache it.
+   * Generate a TTS prompt WAV and cache its path.
   * Uses espeak-ng (primary) or Kokoro (fallback).
   */
  async generatePrompt(id: string, text: string, voice = 'af_bella'): Promise<ICachedPrompt | null> {
@@ -207,14 +160,14 @@ export class PromptCache {
      this.espeakAvailable = isEspeakAvailable();
    }

-    // Generate WAV.
-    let generated = false;
+    // Generate WAV if not already on disk.
    if (!fs.existsSync(wavPath)) {
+      let generated = false;
      if (this.espeakAvailable) {
        generated = generateViaEspeak(wavPath, text);
      }
      if (!generated) {
-        generated = generateViaKokoro(wavPath, text, voice);
+        generated = await generateViaKokoro(wavPath, text, voice);
      }
      if (!generated) {
        this.log(`[prompt-cache] failed to generate TTS for "${id}"`);
@@ -223,49 +176,22 @@ export class PromptCache {
      this.log(`[prompt-cache] generated WAV for "${id}"`);
    }

-    return this.loadWavPrompt(id, wavPath);
+    return this.registerWav(id, wavPath);
  }

  /**
-   * Load a WAV file as a prompt and cache it.
+   * Load a pre-existing WAV file as a prompt.
   */
  async loadWavPrompt(id: string, wavPath: string): Promise<ICachedPrompt | null> {
    if (!fs.existsSync(wavPath)) {
      this.log(`[prompt-cache] WAV not found: ${wavPath}`);
      return null;
    }
-
-    const result = readWavWithRate(wavPath);
-    if (!result) {
-      this.log(`[prompt-cache] failed to parse WAV: ${wavPath}`);
-      return null;
-    }
-
-    const encoded = await encodePcmFrames(result.pcm, result.sampleRate, this.log);
-    if (!encoded) {
-      this.log(`[prompt-cache] encoding failed for "${id}" (codec bridge not ready?)`);
-      return null;
-    }
-
-    const durationMs = encoded.g722Frames.length * 20;
-    const prompt: ICachedPrompt = {
-      id,
-      g722Frames: encoded.g722Frames,
-      opusFrames: encoded.opusFrames,
-      durationMs,
-    };
-
-    this.prompts.set(id, prompt);
-    this.log(`[prompt-cache] cached "${id}": ${encoded.g722Frames.length} frames (${(durationMs / 1000).toFixed(1)}s)`);
-    return prompt;
+    return this.registerWav(id, wavPath);
  }

  /**
-   * Generate a beep tone prompt (sine wave).
-   * @param id - prompt ID
-   * @param freqHz - tone frequency (default 1000 Hz)
-   * @param durationMs - tone duration (default 500ms)
-   * @param amplitude - 16-bit amplitude (default 8000)
+   * Generate a beep tone WAV and cache it.
   */
  async generateBeep(
    id: string,
@@ -273,149 +199,77 @@ export class PromptCache {
    durationMs = 500,
    amplitude = 8000,
  ): Promise<ICachedPrompt | null> {
-    // Generate at 16kHz for decent quality.
-    const sampleRate = 16000;
-    const totalSamples = Math.floor((sampleRate * durationMs) / 1000);
-    const pcm = Buffer.alloc(totalSamples * 2);
+    fs.mkdirSync(TTS_DIR, { recursive: true });
+    const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`);

-    for (let i = 0; i < totalSamples; i++) {
-      const t = i / sampleRate;
-      // Apply a short fade-in/fade-out to avoid click artifacts.
-      const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade
-      let envelope = 1.0;
-      if (i < fadeLen) envelope = i / fadeLen;
-      else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen;
+    if (!fs.existsSync(wavPath)) {
+      // Generate 16kHz 16-bit mono sine wave WAV.
+      const sampleRate = 16000;
+      const totalSamples = Math.floor((sampleRate * durationMs) / 1000);
+      const pcm = Buffer.alloc(totalSamples * 2);

-      const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope);
-      pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
+      for (let i = 0; i < totalSamples; i++) {
+        const t = i / sampleRate;
+        const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade
+        let envelope = 1.0;
+        if (i < fadeLen) envelope = i / fadeLen;
+        else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen;
+
+        const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope);
+        pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
+      }
+
+      // Write WAV file.
+      const headerSize = 44;
+      const dataSize = pcm.length;
+      const wav = Buffer.alloc(headerSize + dataSize);
+
+      // RIFF header
+      wav.write('RIFF', 0);
+      wav.writeUInt32LE(36 + dataSize, 4);
+      wav.write('WAVE', 8);
+
+      // fmt chunk
+      wav.write('fmt ', 12);
+      wav.writeUInt32LE(16, 16);        // chunk size
+      wav.writeUInt16LE(1, 20);         // PCM format
+      wav.writeUInt16LE(1, 22);         // mono
+      wav.writeUInt32LE(sampleRate, 24);
+      wav.writeUInt32LE(sampleRate * 2, 28); // byte rate
+      wav.writeUInt16LE(2, 32);         // block align
+      wav.writeUInt16LE(16, 34);        // bits per sample
+
+      // data chunk
+      wav.write('data', 36);
+      wav.writeUInt32LE(dataSize, 40);
+      pcm.copy(wav, 44);
+
+      fs.writeFileSync(wavPath, wav);
+      this.log(`[prompt-cache] beep WAV generated for "${id}"`);
    }

-    const encoded = await encodePcmFrames(pcm, sampleRate, this.log);
-    if (!encoded) {
-      this.log(`[prompt-cache] beep encoding failed for "${id}"`);
-      return null;
-    }
-
-    const actualDuration = encoded.g722Frames.length * 20;
-    const prompt: ICachedPrompt = {
-      id,
-      g722Frames: encoded.g722Frames,
-      opusFrames: encoded.opusFrames,
-      durationMs: actualDuration,
-    };
-
-    this.prompts.set(id, prompt);
-    this.log(`[prompt-cache] beep "${id}" cached: ${actualDuration}ms @ ${freqHz}Hz`);
-    return prompt;
+    return this.registerWav(id, wavPath);
  }

-  /**
-   * Remove a prompt from the cache.
-   */
+  /** Remove a prompt from the cache. */
  remove(id: string): void {
    this.prompts.delete(id);
  }

-  /**
-   * Clear all cached prompts.
-   */
+  /** Clear all cached prompts. */
  clear(): void {
    this.prompts.clear();
  }
-}

-// ---------------------------------------------------------------------------
-// Standalone playback helpers (for use by SystemLeg)
-// ---------------------------------------------------------------------------
+  // -------------------------------------------------------------------------
+  // Internal
+  // -------------------------------------------------------------------------

-/**
- * Play a cached prompt's G.722 frames as RTP packets at 20ms intervals.
- *
- * @param prompt - the cached prompt to play
- * @param sendPacket - function to send a raw RTP packet (12-byte header + payload)
- * @param ssrc - SSRC for RTP headers
- * @param onDone - called when playback finishes
- * @returns cancel function, or null if prompt has no G.722 frames
- */
-export function playPromptG722(
-  prompt: ICachedPrompt,
-  sendPacket: (pkt: Buffer) => void,
-  ssrc: number,
-  onDone?: () => void,
-): (() => void) | null {
-  if (prompt.g722Frames.length === 0) {
-    onDone?.();
-    return null;
+  private registerWav(id: string, wavPath: string): ICachedPrompt {
+    const durationMs = getWavDurationMs(wavPath);
+    const prompt: ICachedPrompt = { id, wavPath, durationMs };
+    this.prompts.set(id, prompt);
+    this.log(`[prompt-cache] cached "${id}": ${wavPath} (${(durationMs / 1000).toFixed(1)}s)`);
+    return prompt;
  }
-
-  const frames = prompt.g722Frames;
-  const PT = 9;
-  let frameIdx = 0;
-  let seq = Math.floor(Math.random() * 0xffff);
-  let rtpTs = Math.floor(Math.random() * 0xffffffff);
-
-  const timer = setInterval(() => {
-    if (frameIdx >= frames.length) {
-      clearInterval(timer);
-      onDone?.();
-      return;
-    }
-
-    const payload = frames[frameIdx];
-    const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
-    const pkt = Buffer.concat([hdr, payload]);
-    sendPacket(pkt);
-
-    seq++;
-    rtpTs += rtpClockIncrement(PT);
-    frameIdx++;
-  }, 20);
-
-  return () => clearInterval(timer);
-}
-
-/**
- * Play a cached prompt's Opus frames as RTP packets at 20ms intervals.
- *
- * @param prompt - the cached prompt to play
- * @param sendPacket - function to send a raw RTP packet
- * @param ssrc - SSRC for RTP headers
- * @param counters - shared seq/ts counters (mutated in place for seamless transitions)
- * @param onDone - called when playback finishes
- * @returns cancel function, or null if prompt has no Opus frames
- */
-export function playPromptOpus(
-  prompt: ICachedPrompt,
-  sendPacket: (pkt: Buffer) => void,
-  ssrc: number,
-  counters: { seq: number; ts: number },
-  onDone?: () => void,
-): (() => void) | null {
-  if (prompt.opusFrames.length === 0) {
-    onDone?.();
-    return null;
-  }
-
-  const frames = prompt.opusFrames;
-  const PT = 111;
-  let frameIdx = 0;
-
-  const timer = setInterval(() => {
-    if (frameIdx >= frames.length) {
-      clearInterval(timer);
-      onDone?.();
-      return;
-    }
-
-    const payload = frames[frameIdx];
-    const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
-    const pkt = Buffer.concat([hdr, payload]);
-    sendPacket(pkt);
-
-    counters.seq++;
-    counters.ts += 960; // Opus 48kHz: 960 samples per 20ms
-    frameIdx++;
-  }, 20);
-
-  return () => clearInterval(timer);
 }
--- a/ts/frontend.ts
+++ b/ts/frontend.ts
@@ -128,14 +128,19 @@ async function handleRequest(
    }
  }

-  // API: add leg to call.
+  // API: add a SIP device to a call (mid-call INVITE to desk phone).
  if (url.pathname.startsWith('/api/call/') && url.pathname.endsWith('/addleg') && method === 'POST') {
    try {
      const callId = url.pathname.split('/')[3];
      const body = await readJsonBody(req);
      if (!body?.deviceId) return sendJson(res, { ok: false, error: 'missing deviceId' }, 400);
-      const ok = callManager?.addDeviceToCall(callId, body.deviceId) ?? false;
-      return sendJson(res, { ok });
+      const { addDeviceLeg } = await import('./proxybridge.ts');
+      const legId = await addDeviceLeg(callId, body.deviceId);
+      if (legId) {
+        return sendJson(res, { ok: true, legId });
+      } else {
+        return sendJson(res, { ok: false, error: 'device not registered or call not found' }, 404);
+      }
    } catch (e: any) {
      return sendJson(res, { ok: false, error: e.message }, 400);
    }
@@ -147,8 +152,9 @@ async function handleRequest(
      const callId = url.pathname.split('/')[3];
      const body = await readJsonBody(req);
      if (!body?.number) return sendJson(res, { ok: false, error: 'missing number' }, 400);
-      const ok = callManager?.addExternalToCall(callId, body.number, body.providerId) ?? false;
-      return sendJson(res, { ok });
+      const { addLeg: addLegFn } = await import('./proxybridge.ts');
+      const legId = await addLegFn(callId, body.number, body.providerId);
+      return sendJson(res, { ok: !!legId, legId });
    } catch (e: any) {
      return sendJson(res, { ok: false, error: e.message }, 400);
    }
@@ -160,22 +166,22 @@ async function handleRequest(
      const callId = url.pathname.split('/')[3];
      const body = await readJsonBody(req);
      if (!body?.legId) return sendJson(res, { ok: false, error: 'missing legId' }, 400);
-      const ok = callManager?.removeLegFromCall(callId, body.legId) ?? false;
+      const { removeLeg: removeLegFn } = await import('./proxybridge.ts');
+      const ok = await removeLegFn(callId, body.legId);
      return sendJson(res, { ok });
    } catch (e: any) {
      return sendJson(res, { ok: false, error: e.message }, 400);
    }
  }

-  // API: transfer leg.
+  // API: transfer leg (not yet implemented).
  if (url.pathname === '/api/transfer' && method === 'POST') {
    try {
      const body = await readJsonBody(req);
      if (!body?.sourceCallId || !body?.legId || !body?.targetCallId) {
        return sendJson(res, { ok: false, error: 'missing sourceCallId, legId, or targetCallId' }, 400);
      }
-      const ok = callManager?.transferLeg(body.sourceCallId, body.legId, body.targetCallId) ?? false;
-      return sendJson(res, { ok });
+      return sendJson(res, { ok: false, error: 'not yet implemented' }, 501);
    } catch (e: any) {
      return sendJson(res, { ok: false, error: e.message }, 400);
    }
@@ -339,11 +345,13 @@ export function initWebUi(
  onHangupCall: (callId: string) => boolean,
  onConfigSaved?: () => void,
  callManager?: CallManager,
+  voiceboxManager?: VoiceboxManager,
  /** WebRTC signaling handlers — forwarded to Rust proxy-engine. */
  onWebRtcOffer?: (sessionId: string, sdp: string, ws: WebSocket) => Promise<void>,
  onWebRtcIce?: (sessionId: string, candidate: any) => Promise<void>,
  onWebRtcClose?: (sessionId: string) => Promise<void>,
-  voiceboxManager?: VoiceboxManager,
+  /** Called when browser sends webrtc-accept (callId + sessionId linking). */
+  onWebRtcAccept?: (callId: string, sessionId: string) => void,
 ): void {
  const WEB_PORT = 3060;

@@ -382,6 +390,7 @@ export function initWebUi(
        if (msg.type === 'webrtc-offer' && msg.sessionId) {
          // Forward to Rust proxy-engine for WebRTC handling.
          if (onWebRtcOffer) {
+            log(`[webrtc-ws] offer msg keys: ${Object.keys(msg).join(',')}, sdp type: ${typeof msg.sdp}, sdp len: ${msg.sdp?.length || 0}`);
            onWebRtcOffer(msg.sessionId, msg.sdp, socket as any).catch((e: any) =>
              log(`[webrtc] offer error: ${e.message}`));
          }
@@ -394,8 +403,10 @@ export function initWebUi(
            onWebRtcClose(msg.sessionId).catch(() => {});
          }
        } else if (msg.type === 'webrtc-accept' && msg.callId) {
-          // TODO: Wire to Rust call linking.
          log(`[webrtc] accept: call=${msg.callId} session=${msg.sessionId || 'none'}`);
+          if (onWebRtcAccept && msg.sessionId) {
+            onWebRtcAccept(msg.callId, msg.sessionId);
+          }
        } else if (msg.type?.startsWith('webrtc-')) {
          msg._remoteIp = remoteIp;
          handleWebRtcSignaling(socket as any, msg);
--- a/ts/opusbridge.ts
+++ b/ts/opusbridge.ts
@@ -1,199 +0,0 @@
-/**
- * Audio transcoding bridge — uses smartrust to communicate with the Rust
- * opus-codec binary, which handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding.
- *
- * All codec conversion happens in Rust (libopus + SpanDSP G.722 port).
- * The TypeScript side just passes raw payloads back and forth.
- */
-
-import path from 'node:path';
-import { RustBridge } from '@push.rocks/smartrust';
-
-// ---------------------------------------------------------------------------
-// Command type map for smartrust
-// ---------------------------------------------------------------------------
-
-type TCodecCommands = {
-  init: {
-    params: Record<string, never>;
-    result: Record<string, never>;
-  };
-  create_session: {
-    params: { session_id: string };
-    result: Record<string, never>;
-  };
-  destroy_session: {
-    params: { session_id: string };
-    result: Record<string, never>;
-  };
-  transcode: {
-    params: { data_b64: string; from_pt: number; to_pt: number; session_id?: string; direction?: string };
-    result: { data_b64: string };
-  };
-  encode_pcm: {
-    params: { data_b64: string; sample_rate: number; to_pt: number; session_id?: string };
-    result: { data_b64: string };
-  };
-};
-
-// ---------------------------------------------------------------------------
-// Bridge singleton
-// ---------------------------------------------------------------------------
-
-let bridge: RustBridge<TCodecCommands> | null = null;
-let initialized = false;
-
-function buildLocalPaths(): string[] {
-  const root = process.cwd();
-  return [
-    path.join(root, 'dist_rust', 'opus-codec'),
-    path.join(root, 'rust', 'target', 'release', 'opus-codec'),
-    path.join(root, 'rust', 'target', 'debug', 'opus-codec'),
-  ];
-}
-
-let logFn: ((msg: string) => void) | undefined;
-
-/**
- * Initialize the audio transcoding bridge. Spawns the Rust binary.
- */
-export async function initCodecBridge(log?: (msg: string) => void): Promise<boolean> {
-  if (initialized && bridge) return true;
-  logFn = log;
-
-  try {
-    bridge = new RustBridge<TCodecCommands>({
-      binaryName: 'opus-codec',
-      localPaths: buildLocalPaths(),
-    });
-
-    const spawned = await bridge.spawn();
-    if (!spawned) {
-      log?.('[codec] failed to spawn opus-codec binary');
-      bridge = null;
-      return false;
-    }
-
-    // Auto-restart: reset state when the Rust process exits so the next
-    // transcode attempt triggers re-initialization instead of silent failure.
-    bridge.on('exit', () => {
-      logFn?.('[codec] Rust audio transcoder process exited — will re-init on next use');
-      bridge = null;
-      initialized = false;
-    });
-
-    await bridge.sendCommand('init', {} as any);
-    initialized = true;
-    log?.('[codec] Rust audio transcoder initialized (Opus + G.722 + PCMU/PCMA)');
-    return true;
-  } catch (e: any) {
-    log?.(`[codec] init error: ${e.message}`);
-    bridge = null;
-    return false;
-  }
-}
-
-// ---------------------------------------------------------------------------
-// Session management — per-call codec isolation
-// ---------------------------------------------------------------------------
-
-/**
- * Create an isolated codec session. Each session gets its own Opus/G.722
- * encoder/decoder state, preventing concurrent calls from corrupting each
- * other's stateful codec predictions.
- */
-export async function createSession(sessionId: string): Promise<boolean> {
-  if (!bridge || !initialized) {
-    // Attempt auto-reinit if bridge died.
-    const ok = await initCodecBridge(logFn);
-    if (!ok) return false;
-  }
-  try {
-    await bridge!.sendCommand('create_session', { session_id: sessionId });
-    return true;
-  } catch (e: any) {
-    logFn?.(`[codec] create_session error: ${e?.message || e}`);
-    return false;
-  }
-}
-
-/**
- * Destroy a codec session, freeing its encoder/decoder state.
- */
-export async function destroySession(sessionId: string): Promise<void> {
-  if (!bridge || !initialized) return;
-  try {
-    await bridge.sendCommand('destroy_session', { session_id: sessionId });
-  } catch {
-    // Best-effort cleanup.
-  }
-}
-
-// ---------------------------------------------------------------------------
-// Transcoding
-// ---------------------------------------------------------------------------
-
-/**
- * Transcode an RTP payload between two codecs.
- * All codec work (Opus, G.722, PCMU, PCMA) + resampling happens in Rust.
- *
- * @param data - raw RTP payload (no header)
- * @param fromPT - source payload type (0=PCMU, 8=PCMA, 9=G.722, 111=Opus)
- * @param toPT - target payload type
- * @param sessionId - optional session for isolated codec state
- * @returns transcoded payload, or null on failure
- */
-export async function transcode(data: Buffer, fromPT: number, toPT: number, sessionId?: string, direction?: string): Promise<Buffer | null> {
-  if (!bridge || !initialized) return null;
-  try {
-    const params: any = {
-      data_b64: data.toString('base64'),
-      from_pt: fromPT,
-      to_pt: toPT,
-    };
-    if (sessionId) params.session_id = sessionId;
-    if (direction) params.direction = direction;
-    const result = await bridge.sendCommand('transcode', params);
-    return Buffer.from(result.data_b64, 'base64');
-  } catch {
-    return null;
-  }
-}
-
-/**
- * Encode raw 16-bit PCM to a target codec.
- * @param pcmData - raw 16-bit LE PCM bytes
- * @param sampleRate - input sample rate (e.g. 22050 for Piper TTS)
- * @param toPT - target payload type (9=G.722, 111=Opus, 0=PCMU, 8=PCMA)
- * @param sessionId - optional session for isolated codec state
- */
-export async function encodePcm(pcmData: Buffer, sampleRate: number, toPT: number, sessionId?: string): Promise<Buffer | null> {
-  if (!bridge || !initialized) return null;
-  try {
-    const params: any = {
-      data_b64: pcmData.toString('base64'),
-      sample_rate: sampleRate,
-      to_pt: toPT,
-    };
-    if (sessionId) params.session_id = sessionId;
-    const result = await bridge.sendCommand('encode_pcm', params);
-    return Buffer.from(result.data_b64, 'base64');
-  } catch (e: any) {
-    console.error('[encodePcm] error:', e?.message || e);
-    return null;
-  }
-}
-
-/** Check if the codec bridge is ready. */
-export function isCodecReady(): boolean {
-  return initialized && bridge !== null;
-}
-
-/** Shut down the codec bridge. */
-export function shutdownCodecBridge(): void {
-  if (bridge) {
-    try { bridge.kill(); } catch { /* ignore */ }
-    bridge = null;
-    initialized = false;
-  }
-}
--- a/ts/proxybridge.ts
+++ b/ts/proxybridge.ts
@@ -41,6 +41,48 @@ type TProxyCommands = {
    params: { call_id: string };
    result: { file_path: string; duration_ms: number };
  };
+  add_device_leg: {
+    params: { call_id: string; device_id: string };
+    result: { leg_id: string };
+  };
+  transfer_leg: {
+    params: { source_call_id: string; leg_id: string; target_call_id: string };
+    result: Record<string, never>;
+  };
+  replace_leg: {
+    params: { call_id: string; old_leg_id: string; number: string; provider_id?: string };
+    result: { new_leg_id: string };
+  };
+  start_interaction: {
+    params: {
+      call_id: string;
+      leg_id: string;
+      prompt_wav: string;
+      expected_digits: string;
+      timeout_ms: number;
+    };
+    result: { result: 'digit' | 'timeout' | 'cancelled'; digit?: string };
+  };
+  add_tool_leg: {
+    params: {
+      call_id: string;
+      tool_type: 'recording' | 'transcription';
+      config?: Record<string, unknown>;
+    };
+    result: { tool_leg_id: string };
+  };
+  remove_tool_leg: {
+    params: { call_id: string; tool_leg_id: string };
+    result: Record<string, never>;
+  };
+  set_leg_metadata: {
+    params: { call_id: string; leg_id: string; key: string; value: unknown };
+    result: Record<string, never>;
+  };
+  generate_tts: {
+    params: { model: string; voices: string; voice: string; text: string; output: string };
+    result: { output: string };
+  };
 };

 // ---------------------------------------------------------------------------
@@ -238,6 +280,38 @@ export async function webrtcLink(sessionId: string, callId: string, providerMedi
  }
 }

+/**
+ * Add an external SIP leg to an existing call (multiparty).
+ */
+export async function addLeg(callId: string, number: string, providerId?: string): Promise<string | null> {
+  if (!bridge || !initialized) return null;
+  try {
+    const result = await bridge.sendCommand('add_leg', {
+      call_id: callId,
+      number,
+      provider_id: providerId,
+    } as any);
+    return (result as any)?.leg_id || null;
+  } catch (e: any) {
+    logFn?.(`[proxy-engine] add_leg error: ${e?.message || e}`);
+    return null;
+  }
+}
+
+/**
+ * Remove a leg from a call.
+ */
+export async function removeLeg(callId: string, legId: string): Promise<boolean> {
+  if (!bridge || !initialized) return false;
+  try {
+    await bridge.sendCommand('remove_leg', { call_id: callId, leg_id: legId } as any);
+    return true;
+  } catch (e: any) {
+    logFn?.(`[proxy-engine] remove_leg error: ${e?.message || e}`);
+    return false;
+  }
+}
+
 /**
 * Close a WebRTC session.
 */
@@ -248,11 +322,170 @@ export async function webrtcClose(sessionId: string): Promise<void> {
  } catch { /* ignore */ }
 }

+// ---------------------------------------------------------------------------
+// Device leg & interaction commands
+// ---------------------------------------------------------------------------
+
+/**
+ * Add a local SIP device to an existing call (mid-call INVITE to desk phone).
+ */
+export async function addDeviceLeg(callId: string, deviceId: string): Promise<string | null> {
+  if (!bridge || !initialized) return null;
+  try {
+    const result = await bridge.sendCommand('add_device_leg', {
+      call_id: callId,
+      device_id: deviceId,
+    } as any);
+    return (result as any)?.leg_id || null;
+  } catch (e: any) {
+    logFn?.(`[proxy-engine] add_device_leg error: ${e?.message || e}`);
+    return null;
+  }
+}
+
+/**
+ * Transfer a leg from one call to another (leg stays connected, switches mixer).
+ */
+export async function transferLeg(
+  sourceCallId: string,
+  legId: string,
+  targetCallId: string,
+): Promise<boolean> {
+  if (!bridge || !initialized) return false;
+  try {
+    await bridge.sendCommand('transfer_leg', {
+      source_call_id: sourceCallId,
+      leg_id: legId,
+      target_call_id: targetCallId,
+    } as any);
+    return true;
+  } catch (e: any) {
+    logFn?.(`[proxy-engine] transfer_leg error: ${e?.message || e}`);
+    return false;
+  }
+}
+
+/**
+ * Replace a leg: terminate the old leg and dial a new number into the same call.
+ */
+export async function replaceLeg(
+  callId: string,
+  oldLegId: string,
+  number: string,
+  providerId?: string,
+): Promise<string | null> {
+  if (!bridge || !initialized) return null;
+  try {
+    const result = await bridge.sendCommand('replace_leg', {
+      call_id: callId,
+      old_leg_id: oldLegId,
+      number,
+      provider_id: providerId,
+    } as any);
+    return (result as any)?.new_leg_id || null;
+  } catch (e: any) {
+    logFn?.(`[proxy-engine] replace_leg error: ${e?.message || e}`);
+    return null;
+  }
+}
+
+/**
+ * Start an interaction on a specific leg — isolate it, play a prompt, collect DTMF.
+ * Blocks until the interaction completes (digit pressed, timeout, or cancelled).
+ */
+export async function startInteraction(
+  callId: string,
+  legId: string,
+  promptWav: string,
+  expectedDigits: string,
+  timeoutMs: number,
+): Promise<{ result: 'digit' | 'timeout' | 'cancelled'; digit?: string } | null> {
+  if (!bridge || !initialized) return null;
+  try {
+    const result = await bridge.sendCommand('start_interaction', {
+      call_id: callId,
+      leg_id: legId,
+      prompt_wav: promptWav,
+      expected_digits: expectedDigits,
+      timeout_ms: timeoutMs,
+    } as any);
+    return result as any;
+  } catch (e: any) {
+    logFn?.(`[proxy-engine] start_interaction error: ${e?.message || e}`);
+    return null;
+  }
+}
+
+/**
+ * Add a tool leg (recording or transcription) to a call.
+ * Tool legs receive per-source unmerged audio from all participants.
+ */
+export async function addToolLeg(
+  callId: string,
+  toolType: 'recording' | 'transcription',
+  config?: Record<string, unknown>,
+): Promise<string | null> {
+  if (!bridge || !initialized) return null;
+  try {
+    const result = await bridge.sendCommand('add_tool_leg', {
+      call_id: callId,
+      tool_type: toolType,
+      config,
+    } as any);
+    return (result as any)?.tool_leg_id || null;
+  } catch (e: any) {
+    logFn?.(`[proxy-engine] add_tool_leg error: ${e?.message || e}`);
+    return null;
+  }
+}
+
+/**
+ * Remove a tool leg from a call. Triggers finalization (WAV files, metadata).
+ */
+export async function removeToolLeg(callId: string, toolLegId: string): Promise<boolean> {
+  if (!bridge || !initialized) return false;
+  try {
+    await bridge.sendCommand('remove_tool_leg', {
+      call_id: callId,
+      tool_leg_id: toolLegId,
+    } as any);
+    return true;
+  } catch (e: any) {
+    logFn?.(`[proxy-engine] remove_tool_leg error: ${e?.message || e}`);
+    return false;
+  }
+}
+
+/**
+ * Set a metadata key-value pair on a leg.
+ */
+export async function setLegMetadata(
+  callId: string,
+  legId: string,
+  key: string,
+  value: unknown,
+): Promise<boolean> {
+  if (!bridge || !initialized) return false;
+  try {
+    await bridge.sendCommand('set_leg_metadata', {
+      call_id: callId,
+      leg_id: legId,
+      key,
+      value,
+    } as any);
+    return true;
+  } catch (e: any) {
+    logFn?.(`[proxy-engine] set_leg_metadata error: ${e?.message || e}`);
+    return false;
+  }
+}
+
 /**
 * Subscribe to an event from the proxy engine.
 * Event names: incoming_call, outbound_device_call, call_ringing,
 * call_answered, call_ended, provider_registered, device_registered,
- * dtmf_digit, recording_done, sip_unhandled
+ * dtmf_digit, recording_done, tool_recording_done, tool_transcription_done,
+ * leg_added, leg_removed, sip_unhandled
 */
 export function onProxyEvent(event: string, handler: (data: any) => void): void {
  if (!bridge) throw new Error('proxy engine not initialized');
@@ -264,6 +497,15 @@ export function isProxyReady(): boolean {
  return initialized && bridge !== null;
 }

+/** Send an arbitrary command to the proxy engine bridge. */
+export async function sendProxyCommand<K extends keyof TProxyCommands>(
+  method: K,
+  params: TProxyCommands[K]['params'],
+): Promise<TProxyCommands[K]['result']> {
+  if (!bridge || !initialized) throw new Error('proxy engine not initialized');
+  return bridge.sendCommand(method as string, params as any) as any;
+}
+
 /** Shut down the proxy engine. */
 export function shutdownProxyEngine(): void {
  if (bridge) {
--- a/ts/sipproxy.ts
+++ b/ts/sipproxy.ts
@@ -24,7 +24,6 @@ import {
  getAllBrowserDeviceIds,
  getBrowserDeviceWs,
 } from './webrtcbridge.ts';
-import { initCodecBridge } from './opusbridge.ts';
 import { initAnnouncement } from './announcement.ts';
 import { PromptCache } from './call/prompt-cache.ts';
 import { VoiceboxManager } from './voicebox.ts';
@@ -37,7 +36,10 @@ import {
  shutdownProxyEngine,
  webrtcOffer,
  webrtcIce,
+  webrtcLink,
  webrtcClose,
+  addLeg,
+  removeLeg,
 } from './proxybridge.ts';
 import type {
  IIncomingCallEvent,
@@ -93,6 +95,16 @@ interface IDeviceStatus {
  isBrowser: boolean;
 }

+interface IActiveLeg {
+  id: string;
+  type: 'sip-device' | 'sip-provider' | 'webrtc' | 'tool';
+  state: string;
+  codec: string | null;
+  rtpPort: number | null;
+  remoteMedia: string | null;
+  metadata: Record<string, unknown>;
+}
+
 interface IActiveCall {
  id: string;
  direction: string;
@@ -101,6 +113,13 @@ interface IActiveCall {
  providerUsed: string | null;
  state: string;
  startedAt: number;
+  legs: Map<string, IActiveLeg>;
+}
+
+interface IHistoryLeg {
+  id: string;
+  type: string;
+  metadata: Record<string, unknown>;
 }

 interface ICallHistoryEntry {
@@ -110,6 +129,7 @@ interface ICallHistoryEntry {
  calleeNumber: string | null;
  startedAt: number;
  duration: number;
+  legs: IHistoryLeg[];
 }

 const providerStatuses = new Map<string, IProviderStatus>();
@@ -118,6 +138,12 @@ const activeCalls = new Map<string, IActiveCall>();
 const callHistory: ICallHistoryEntry[] = [];
 const MAX_HISTORY = 100;

+// WebRTC session ↔ call linking state.
+// Both pieces (session accept + call media info) can arrive in any order.
+const webrtcSessionToCall = new Map<string, string>(); // sessionId → callId
+const webrtcCallToSession = new Map<string, string>(); // callId → sessionId
+const pendingCallMedia = new Map<string, { addr: string; port: number; sipPt: number }>(); // callId → provider media info
+
 // Initialize provider statuses from config (all start as unregistered).
 for (const p of appConfig.providers) {
  providerStatuses.set(p.id, {
@@ -178,7 +204,18 @@ function getStatus() {
    calls: [...activeCalls.values()].map((c) => ({
      ...c,
      duration: Math.floor((Date.now() - c.startedAt) / 1000),
-      legs: [],
+      legs: [...c.legs.values()].map((l) => ({
+        id: l.id,
+        type: l.type,
+        state: l.state,
+        codec: l.codec,
+        rtpPort: l.rtpPort,
+        remoteMedia: l.remoteMedia,
+        metadata: l.metadata || {},
+        pktSent: 0,
+        pktReceived: 0,
+        transcoding: false,
+      })),
    })),
    callHistory,
    contacts: appConfig.contacts || [],
@@ -233,6 +270,7 @@ async function startProxyEngine(): Promise<void> {
      providerUsed: data.provider_id,
      state: 'ringing',
      startedAt: Date.now(),
+      legs: new Map(),
    });

    // Notify browsers of incoming call.
@@ -257,6 +295,7 @@ async function startProxyEngine(): Promise<void> {
      providerUsed: null,
      state: 'setting-up',
      startedAt: Date.now(),
+      legs: new Map(),
    });
  });

@@ -270,7 +309,19 @@ async function startProxyEngine(): Promise<void> {
      providerUsed: data.provider_id,
      state: 'setting-up',
      startedAt: Date.now(),
+      legs: new Map(),
    });
+
+    // Notify all browser devices — they can connect via WebRTC to listen/talk.
+    const browserIds = getAllBrowserDeviceIds();
+    for (const bid of browserIds) {
+      sendToBrowserDevice(bid, {
+        type: 'webrtc-incoming',
+        callId: data.call_id,
+        from: data.number,
+        deviceId: bid,
+      });
+    }
  });

  onProxyEvent('call_ringing', (data: { call_id: string }) => {
@@ -278,11 +329,46 @@ async function startProxyEngine(): Promise<void> {
    if (call) call.state = 'ringing';
  });

-  onProxyEvent('call_answered', (data: { call_id: string }) => {
+  onProxyEvent('call_answered', (data: { call_id: string; provider_media_addr?: string; provider_media_port?: number; sip_pt?: number }) => {
    const call = activeCalls.get(data.call_id);
    if (call) {
      call.state = 'connected';
      log(`[call] ${data.call_id} connected`);
+
+      // Enrich provider leg with media info from the answered event.
+      if (data.provider_media_addr && data.provider_media_port) {
+        for (const leg of call.legs.values()) {
+          if (leg.type === 'sip-provider') {
+            leg.remoteMedia = `${data.provider_media_addr}:${data.provider_media_port}`;
+            if (data.sip_pt !== undefined) {
+              const codecNames: Record<number, string> = { 0: 'PCMU', 8: 'PCMA', 9: 'G.722', 111: 'Opus' };
+              leg.codec = codecNames[data.sip_pt] || `PT${data.sip_pt}`;
+            }
+            break;
+          }
+        }
+      }
+    }
+
+    // Try to link WebRTC session to this call for audio bridging.
+    if (data.provider_media_addr && data.provider_media_port) {
+      const sessionId = webrtcCallToSession.get(data.call_id);
+      if (sessionId) {
+        // Both session and media info available — link now.
+        const sipPt = data.sip_pt ?? 9;
+        log(`[webrtc] linking session=${sessionId.slice(0, 8)} to call=${data.call_id} media=${data.provider_media_addr}:${data.provider_media_port} pt=${sipPt}`);
+        webrtcLink(sessionId, data.call_id, data.provider_media_addr, data.provider_media_port, sipPt).then((ok) => {
+          log(`[webrtc] link result: ${ok}`);
+        });
+      } else {
+        // Session not yet accepted — store media info for when it arrives.
+        pendingCallMedia.set(data.call_id, {
+          addr: data.provider_media_addr,
+          port: data.provider_media_port,
+          sipPt: data.sip_pt ?? 9,
+        });
+        log(`[webrtc] media info cached for call=${data.call_id}, waiting for session accept`);
+      }
    }
  });

@@ -290,6 +376,15 @@ async function startProxyEngine(): Promise<void> {
    const call = activeCalls.get(data.call_id);
    if (call) {
      log(`[call] ${data.call_id} ended: ${data.reason} (${data.duration}s)`);
+      // Snapshot legs with metadata for history.
+      const historyLegs: IHistoryLeg[] = [];
+      for (const [, leg] of call.legs) {
+        historyLegs.push({
+          id: leg.id,
+          type: leg.type,
+          metadata: leg.metadata || {},
+        });
+      }
      // Move to history.
      callHistory.unshift({
        id: call.id,
@@ -298,9 +393,22 @@ async function startProxyEngine(): Promise<void> {
        calleeNumber: call.calleeNumber,
        startedAt: call.startedAt,
        duration: data.duration,
+        legs: historyLegs,
      });
      if (callHistory.length > MAX_HISTORY) callHistory.pop();
      activeCalls.delete(data.call_id);
+
+      // Notify browser(s) that the call ended.
+      broadcastWs('webrtc-call-ended', { callId: data.call_id });
+
+      // Clean up WebRTC session mappings.
+      const sessionId = webrtcCallToSession.get(data.call_id);
+      if (sessionId) {
+        webrtcCallToSession.delete(data.call_id);
+        webrtcSessionToCall.delete(sessionId);
+        webrtcClose(sessionId).catch(() => {});
+      }
+      pendingCallMedia.delete(data.call_id);
    }
  });

@@ -308,6 +416,52 @@ async function startProxyEngine(): Promise<void> {
    log(`[sip] unhandled ${data.method_or_status} Call-ID=${data.call_id?.slice(0, 20)} from=${data.from_addr}:${data.from_port}`);
  });

+  // Leg events (multiparty) — update shadow state so the dashboard shows legs.
+  onProxyEvent('leg_added', (data: any) => {
+    log(`[leg] added: call=${data.call_id} leg=${data.leg_id} kind=${data.kind} state=${data.state}`);
+    const call = activeCalls.get(data.call_id);
+    if (call) {
+      call.legs.set(data.leg_id, {
+        id: data.leg_id,
+        type: data.kind,
+        state: data.state,
+        codec: null,
+        rtpPort: null,
+        remoteMedia: null,
+        metadata: data.metadata || {},
+      });
+    }
+  });
+
+  onProxyEvent('leg_removed', (data: any) => {
+    log(`[leg] removed: call=${data.call_id} leg=${data.leg_id}`);
+    activeCalls.get(data.call_id)?.legs.delete(data.leg_id);
+  });
+
+  onProxyEvent('leg_state_changed', (data: any) => {
+    log(`[leg] state: call=${data.call_id} leg=${data.leg_id} → ${data.state}`);
+    const call = activeCalls.get(data.call_id);
+    if (!call) return;
+    const leg = call.legs.get(data.leg_id);
+    if (leg) {
+      leg.state = data.state;
+      if (data.metadata) leg.metadata = data.metadata;
+    } else {
+      // Initial legs (provider/device) don't emit leg_added — create on first state change.
+      const legId: string = data.leg_id;
+      const type = legId.includes('-prov') ? 'sip-provider' : legId.includes('-dev') ? 'sip-device' : 'webrtc';
+      call.legs.set(data.leg_id, {
+        id: data.leg_id,
+        type,
+        state: data.state,
+        codec: null,
+        rtpPort: null,
+        remoteMedia: null,
+        metadata: data.metadata || {},
+      });
+    }
+  });
+
  // WebRTC events from Rust — forward ICE candidates to browser via WebSocket.
  onProxyEvent('webrtc_ice_candidate', (data: any) => {
    // Find the browser's WebSocket by session ID and send the ICE candidate.
@@ -368,9 +522,8 @@ async function startProxyEngine(): Promise<void> {
  const deviceList = appConfig.devices.map((d) => d.displayName).join(', ');
  log(`proxy engine started | LAN ${appConfig.proxy.lanIp}:${appConfig.proxy.lanPort} | providers: ${providerList} | devices: ${deviceList}`);

-  // Initialize audio codec bridge (still needed for WebRTC transcoding).
+  // Generate TTS audio (WAV files on disk, played by Rust audio_player).
  try {
-    await initCodecBridge(log);
    await initAnnouncement(log);

    // Pre-generate prompts.
@@ -392,7 +545,7 @@ async function startProxyEngine(): Promise<void> {
    }
    log(`[startup] prompts cached: ${promptCache.listIds().join(', ') || 'none'}`);
  } catch (e) {
-    log(`[codec] init failed: ${e}`);
+    log(`[tts] init failed: ${e}`);
  }
 }

@@ -418,6 +571,7 @@ initWebUi(
          providerUsed: providerId || null,
          state: 'setting-up',
          startedAt: Date.now(),
+          legs: new Map(),
        });
      } else {
        log(`[dashboard] call failed for ${number}`);
@@ -467,14 +621,22 @@ initWebUi(
    }
  },
  undefined, // callManager — legacy, replaced by Rust proxy-engine
-  voiceboxManager,
+  voiceboxManager, // voiceboxManager
  // WebRTC signaling → forwarded to Rust proxy-engine.
  async (sessionId, sdp, ws) => {
-    log(`[webrtc] offer from browser session=${sessionId.slice(0, 8)}`);
+    log(`[webrtc] offer from browser session=${sessionId.slice(0, 8)} sdp_type=${typeof sdp} sdp_len=${sdp?.length || 0}`);
+    if (!sdp || typeof sdp !== 'string' || sdp.length < 10) {
+      log(`[webrtc] WARNING: invalid SDP (type=${typeof sdp}), skipping offer`);
+      return;
+    }
+    log(`[webrtc] sending offer to Rust (${sdp.length}b)...`);
    const result = await webrtcOffer(sessionId, sdp);
+    log(`[webrtc] Rust result: ${JSON.stringify(result)?.slice(0, 200)}`);
    if (result?.sdp) {
      ws.send(JSON.stringify({ type: 'webrtc-answer', sessionId, sdp: result.sdp }));
      log(`[webrtc] answer sent to browser session=${sessionId.slice(0, 8)}`);
+    } else {
+      log(`[webrtc] ERROR: no answer SDP from Rust`);
    }
  },
  async (sessionId, candidate) => {
@@ -483,6 +645,26 @@ initWebUi(
  async (sessionId) => {
    await webrtcClose(sessionId);
  },
+  // onWebRtcAccept — browser has accepted a call, linking session to call.
+  (callId: string, sessionId: string) => {
+    log(`[webrtc] accept: callId=${callId} sessionId=${sessionId.slice(0, 8)}`);
+
+    // Store bidirectional mapping.
+    webrtcSessionToCall.set(sessionId, callId);
+    webrtcCallToSession.set(callId, sessionId);
+
+    // Check if we already have media info for this call (provider answered first).
+    const media = pendingCallMedia.get(callId);
+    if (media) {
+      pendingCallMedia.delete(callId);
+      log(`[webrtc] linking session=${sessionId.slice(0, 8)} to call=${callId} media=${media.addr}:${media.port} pt=${media.sipPt}`);
+      webrtcLink(sessionId, callId, media.addr, media.port, media.sipPt).then((ok) => {
+        log(`[webrtc] link result: ${ok}`);
+      });
+    } else {
+      log(`[webrtc] session ${sessionId.slice(0, 8)} accepted, waiting for call_answered media info`);
+    }
+  },
 );

 // ---------------------------------------------------------------------------
--- a/ts_web/00_commitinfo_data.ts
+++ b/ts_web/00_commitinfo_data.ts
@@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: 'siprouter',
-  version: '1.12.0',
+  version: '1.17.0',
  description: 'undefined'
 }
--- a/ts_web/state/appstate.ts
+++ b/ts_web/state/appstate.ts
@@ -20,7 +20,7 @@ export interface IDeviceStatus {

 export interface ILegStatus {
  id: string;
-  type: 'sip-device' | 'sip-provider' | 'webrtc';
+  type: 'sip-device' | 'sip-provider' | 'webrtc' | 'tool';
  state: string;
  remoteMedia: { address: string; port: number } | null;
  rtpPort: number | null;
@@ -28,6 +28,7 @@ export interface ILegStatus {
  pktReceived: number;
  codec: string | null;
  transcoding: boolean;
+  metadata?: Record<string, unknown>;
 }

 export interface ICallStatus {
@@ -42,6 +43,12 @@ export interface ICallStatus {
  legs: ILegStatus[];
 }

+export interface IHistoryLeg {
+  id: string;
+  type: string;
+  metadata: Record<string, unknown>;
+}
+
 export interface ICallHistoryEntry {
  id: string;
  direction: 'inbound' | 'outbound' | 'internal';
@@ -50,6 +57,7 @@ export interface ICallHistoryEntry {
  providerUsed: string | null;
  startedAt: number;
  duration: number;
+  legs?: IHistoryLeg[];
 }

 export interface IContact {
Author	SHA1	Message	Date
Juergen Kunz	2aca5f1510	v1.17.0	2026-04-10 15:58:41 +00:00
Juergen Kunz	73b28f5f57	feat(proxy-engine): upgrade the internal audio bus to 48kHz f32 with per-leg denoising and improve SIP leg routing	2026-04-10 15:58:41 +00:00
Juergen Kunz	10ad432a4c	v1.16.0	2026-04-10 15:21:44 +00:00
Juergen Kunz	66112091a2	feat(proxy-engine): integrate Kokoro TTS generation into proxy-engine and simplify TypeScript prompt handling to use cached WAV files	2026-04-10 15:21:44 +00:00
Juergen Kunz	c9ae747c95	v1.15.0	2026-04-10 15:12:30 +00:00
Juergen Kunz	45f9b9c15c	feat(proxy-engine): add device leg, leg transfer, and leg replacement call controls	2026-04-10 15:12:30 +00:00
Juergen Kunz	7d59361352	feat(mixer): enhance mixer functionality with interaction and tool legs - Updated mixer to handle participant and isolated leg roles, allowing for IVR and consent interactions. - Introduced commands for starting and canceling interactions, managing tool legs for recording and transcription. - Implemented per-source audio handling for tool legs, enabling separate audio processing. - Enhanced DTMF handling to forward events between participant legs only. - Added support for PCM recording directly from tool legs, with WAV file generation. - Updated TypeScript definitions and functions to support new interaction and tool leg features.	2026-04-10 14:54:21 +00:00
Juergen Kunz	6a130db7c7	v1.14.0	2026-04-10 12:52:48 +00:00
Juergen Kunz	93f671f1f9	feat(proxy-engine): add multiparty call mixing with dynamic SIP and WebRTC leg management	2026-04-10 12:52:48 +00:00
Juergen Kunz	36eab44e28	v1.13.0	2026-04-10 12:19:20 +00:00
Juergen Kunz	9e5aa35fee	feat(proxy-engine,webrtc): add B2BUA SIP leg handling and WebRTC call bridging for outbound calls	2026-04-10 12:19:20 +00:00