siprouter/rust/crates/proxy-engine/src/mixer.rs

//! Audio mixer — mix-minus engine for multiparty calls.
//!
//! Each Call spawns one mixer task. Legs communicate with the mixer via
//! tokio mpsc channels — no shared mutable state, no lock contention.
//!
//! Internal bus format: 48kHz f32 PCM (960 samples per 20ms frame).
//! All encoding/decoding happens at leg boundaries. Per-leg inbound denoising at 48kHz.
//!
//! The mixer runs a 20ms tick loop:
//! 1. Drain inbound channels, reorder RTP, decode variable-duration packets to 48kHz,
//!    and queue them in per-leg PCM buffers
//! 2. Compute total mix (sum of all **participant** legs' f32 PCM as f64)
//! 3. For each participant leg: mix-minus = total - own, resample to leg codec rate, encode, send
//! 4. For each isolated leg: play prompt frame or silence, check DTMF
//! 5. For each tool leg: send per-source unmerged audio batch
//! 6. Forward DTMF between participant legs only

use crate::ipc::{emit_event, OutTx};
use crate::jitter_buffer::{JitterBuffer, JitterResult};
use crate::rtp::{build_rtp_header, rtp_clock_increment, rtp_clock_rate};
use crate::tts::TtsStreamMessage;
use codec_lib::{codec_sample_rate, new_denoiser, TranscodeState};
use nnnoiseless::DenoiseState;
use std::collections::{HashMap, VecDeque};
use tokio::sync::{mpsc, oneshot, watch};
use tokio::task::JoinHandle;
use tokio::time::{self, Duration, MissedTickBehavior};

/// Mixing sample rate — 48kHz. Opus is native, G.722 needs 3× upsample, G.711 needs 6× upsample.
/// All processing (denoising, mixing) happens at this rate in f32 for maximum quality.
const MIX_RATE: u32 = 48000;
/// Samples per 20ms frame at the mixing rate.
const MIX_FRAME_SIZE: usize = 960; // 48000 * 0.020
/// Safety cap for how much timestamp-derived gap fill we synthesize at once.
const MAX_GAP_FILL_SAMPLES: usize = MIX_FRAME_SIZE * 6; // 120ms
/// Bound how many decode / concealment steps a leg can consume in one tick.
const MAX_PACKET_STEPS_PER_TICK: usize = 24;
/// Report the first output drop immediately, then every N drops.
const DROP_REPORT_INTERVAL: u64 = 50;

/// A raw RTP payload received from a leg (no RTP header).
pub struct RtpPacket {
    pub payload: Vec<u8>,
    pub payload_type: u8,
    /// RTP marker bit (first packet of a DTMF event, etc.).
    pub marker: bool,
    /// RTP sequence number for reordering.
    pub seq: u16,
    /// RTP timestamp from the original packet header.
    pub timestamp: u32,
}

// ---------------------------------------------------------------------------
// Leg roles
// ---------------------------------------------------------------------------

/// What role a leg currently plays in the mixer.
enum LegRole {
    /// Normal participant: contributes to mix, receives mix-minus.
    Participant,
    /// Temporarily isolated for IVR/consent interaction.
    Isolated(IsolationState),
}

struct IsolationState {
    /// PCM frames at MIX_RATE (960 samples each, 48kHz f32) queued for playback.
    prompt_frames: VecDeque<Vec<f32>>,
    /// Live TTS frames arrive here while playback is already in progress.
    prompt_stream_rx: Option<mpsc::Receiver<TtsStreamMessage>>,
    /// Cancels the background TTS producer when the interaction ends early.
    prompt_cancel_tx: Option<watch::Sender<bool>>,
    /// Whether the live prompt stream has ended.
    prompt_stream_finished: bool,
    /// Digits that complete the interaction (e.g., ['1', '2']).
    expected_digits: Vec<char>,
    /// Ticks remaining before timeout (decremented each tick after prompt ends).
    timeout_ticks_remaining: u32,
    /// Whether we've finished playing the prompt.
    prompt_done: bool,
    /// Channel to send the result back to the command handler.
    result_tx: Option<oneshot::Sender<InteractionResult>>,
}

/// Result of a leg interaction (consent prompt, IVR, etc.).
pub enum InteractionResult {
    /// The participant pressed one of the expected digits.
    Digit(char),
    /// No digit was received within the timeout.
    Timeout,
    /// The leg was removed or the call tore down before completion.
    Cancelled,
}

// ---------------------------------------------------------------------------
// Tool legs
// ---------------------------------------------------------------------------

/// Type of tool leg.
#[derive(Debug, Clone, Copy)]
pub enum ToolType {
    Recording,
    Transcription,
}

/// Per-source audio delivered to a tool leg each mixer tick.
pub struct ToolAudioBatch {
    pub sources: Vec<ToolAudioSource>,
}

/// One participant's 20ms audio frame.
pub struct ToolAudioSource {
    pub leg_id: String,
    /// PCM at 48kHz f32, MIX_FRAME_SIZE (960) samples.
    pub pcm_48k: Vec<f32>,
}

/// Internal storage for a tool leg inside the mixer.
struct ToolLegSlot {
    #[allow(dead_code)]
    tool_type: ToolType,
    audio_tx: mpsc::Sender<ToolAudioBatch>,
    dropped_batches: u64,
}

// ---------------------------------------------------------------------------
// Commands
// ---------------------------------------------------------------------------

/// Commands sent to the mixer task via a control channel.
pub enum MixerCommand {
    /// Add a new participant leg to the mix.
    AddLeg {
        leg_id: String,
        codec_pt: u8,
        inbound_rx: mpsc::Receiver<RtpPacket>,
        outbound_tx: mpsc::Sender<Vec<u8>>,
    },
    /// Remove a leg from the mix (channels are dropped, I/O tasks exit).
    RemoveLeg { leg_id: String },
    /// Shut down the mixer.
    Shutdown,

    /// Isolate a leg and start an interaction (consent prompt, IVR).
    /// The leg is removed from the mix and hears the prompt instead.
    /// DTMF from the leg is checked against expected_digits.
    StartInteraction {
        leg_id: String,
        /// PCM frames at MIX_RATE (48kHz f32), each 960 samples.
        prompt_pcm_frames: Vec<Vec<f32>>,
        /// Optional live prompt stream. Frames are appended as they are synthesized.
        prompt_stream_rx: Option<mpsc::Receiver<TtsStreamMessage>>,
        /// Optional cancellation handle for the live prompt stream.
        prompt_cancel_tx: Option<watch::Sender<bool>>,
        expected_digits: Vec<char>,
        timeout_ms: u32,
        result_tx: oneshot::Sender<InteractionResult>,
    },

    /// Add a tool leg that receives per-source unmerged audio.
    AddToolLeg {
        leg_id: String,
        tool_type: ToolType,
        audio_tx: mpsc::Sender<ToolAudioBatch>,
    },
    /// Remove a tool leg (drops the channel, background task finalizes).
    RemoveToolLeg { leg_id: String },
}

// ---------------------------------------------------------------------------
// Mixer internals
// ---------------------------------------------------------------------------

/// Internal per-leg state inside the mixer.
struct MixerLegSlot {
    codec_pt: u8,
    transcoder: TranscodeState,
    /// Per-leg inbound denoiser (48kHz, 480-sample frames).
    denoiser: Box<DenoiseState<'static>>,
    inbound_rx: mpsc::Receiver<RtpPacket>,
    outbound_tx: mpsc::Sender<Vec<u8>>,
    /// Decoded PCM waiting for playout. Variable-duration RTP packets are
    /// decoded into this FIFO; the mixer consumes exactly one 20ms frame per tick.
    pcm_buffer: VecDeque<f32>,
    /// Last decoded+denoised PCM frame at MIX_RATE (960 samples, 48kHz f32).
    last_pcm_frame: Vec<f32>,
    /// Next RTP timestamp expected from the inbound stream.
    expected_rtp_timestamp: Option<u32>,
    /// Best-effort estimate of packet duration in RTP clock units.
    estimated_packet_ts: u32,
    /// Number of consecutive ticks with no inbound packet.
    silent_ticks: u32,
    /// Per-leg jitter buffer for packet reordering and timing.
    jitter: JitterBuffer,
    // RTP output state.
    rtp_seq: u16,
    rtp_ts: u32,
    rtp_ssrc: u32,
    /// Dropped outbound frames for this leg (queue full / closed).
    outbound_drops: u64,
    /// Current role of this leg in the mixer.
    role: LegRole,
}

fn mix_samples_to_rtp_ts(codec_pt: u8, mix_samples: usize) -> u32 {
    let clock_rate = rtp_clock_rate(codec_pt).max(1) as u64;
    (((mix_samples as u64 * clock_rate) + (MIX_RATE as u64 / 2)) / MIX_RATE as u64) as u32
}

fn rtp_ts_to_mix_samples(codec_pt: u8, rtp_ts: u32) -> usize {
    let clock_rate = rtp_clock_rate(codec_pt).max(1) as u64;
    (((rtp_ts as u64 * MIX_RATE as u64) + (clock_rate / 2)) / clock_rate) as usize
}

fn is_forward_rtp_delta(delta: u32) -> bool {
    delta > 0 && delta < 0x8000_0000
}

fn should_emit_drop_event(total_drops: u64) -> bool {
    total_drops == 1 || total_drops % DROP_REPORT_INTERVAL == 0
}

fn emit_output_drop_event(
    out_tx: &OutTx,
    call_id: &str,
    leg_id: Option<&str>,
    tool_leg_id: Option<&str>,
    stream: &str,
    reason: &str,
    total_drops: u64,
) {
    if !should_emit_drop_event(total_drops) {
        return;
    }

    emit_event(
        out_tx,
        "mixer_output_drop",
        serde_json::json!({
            "call_id": call_id,
            "leg_id": leg_id,
            "tool_leg_id": tool_leg_id,
            "stream": stream,
            "reason": reason,
            "total_drops": total_drops,
        }),
    );
}

fn fade_concealment_from_last_frame(slot: &mut MixerLegSlot, samples: usize, decay: f32) {
    let mut template = if slot.last_pcm_frame.is_empty() {
        vec![0.0f32; MIX_FRAME_SIZE]
    } else {
        slot.last_pcm_frame.clone()
    };

    let mut remaining = samples;
    while remaining > 0 {
        for sample in &mut template {
            *sample *= decay;
        }
        let take = remaining.min(template.len());
        slot.pcm_buffer.extend(template.iter().take(take).copied());
        remaining -= take;
    }
}

fn append_packet_loss_concealment(slot: &mut MixerLegSlot, samples: usize) {
    let mut remaining = samples.max(1);
    while remaining > 0 {
        let chunk = remaining.min(MIX_FRAME_SIZE);
        if slot.codec_pt == codec_lib::PT_OPUS {
            match slot.transcoder.opus_plc(chunk) {
                Ok(mut pcm) => {
                    pcm.resize(chunk, 0.0);
                    slot.pcm_buffer.extend(pcm);
                }
                Err(_) => fade_concealment_from_last_frame(slot, chunk, 0.8),
            }
        } else {
            fade_concealment_from_last_frame(slot, chunk, 0.85);
        }
        remaining -= chunk;
    }
}

fn decode_packet_to_mix_pcm(slot: &mut MixerLegSlot, pkt: &RtpPacket) -> Option<Vec<f32>> {
    let (pcm, rate) = slot
        .transcoder
        .decode_to_f32(&pkt.payload, pkt.payload_type)
        .ok()?;

    let pcm_48k = if rate == MIX_RATE {
        pcm
    } else {
        slot.transcoder
            .resample_f32(&pcm, rate, MIX_RATE)
            .unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
    };

    let processed = if slot.codec_pt != codec_lib::PT_OPUS {
        TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k)
    } else {
        pcm_48k
    };

    Some(processed)
}

fn queue_inbound_packet(slot: &mut MixerLegSlot, pkt: RtpPacket) {
    if let Some(pcm_48k) = decode_packet_to_mix_pcm(slot, &pkt) {
        if pcm_48k.is_empty() {
            return;
        }

        if let Some(expected_ts) = slot.expected_rtp_timestamp {
            let gap_ts = pkt.timestamp.wrapping_sub(expected_ts);
            if is_forward_rtp_delta(gap_ts) {
                let gap_samples = rtp_ts_to_mix_samples(slot.codec_pt, gap_ts);
                if gap_samples <= MAX_GAP_FILL_SAMPLES {
                    append_packet_loss_concealment(slot, gap_samples);
                } else {
                    slot.pcm_buffer.clear();
                }
            }
        }

        let packet_ts = mix_samples_to_rtp_ts(slot.codec_pt, pcm_48k.len());
        if packet_ts > 0 {
            slot.estimated_packet_ts = packet_ts;
            slot.expected_rtp_timestamp = Some(pkt.timestamp.wrapping_add(packet_ts));
        }
        slot.pcm_buffer.extend(pcm_48k);
    }
}

fn fill_leg_playout_buffer(slot: &mut MixerLegSlot) {
    let mut steps = 0usize;
    while slot.pcm_buffer.len() < MIX_FRAME_SIZE && steps < MAX_PACKET_STEPS_PER_TICK {
        steps += 1;
        match slot.jitter.consume() {
            JitterResult::Packet(pkt) => queue_inbound_packet(slot, pkt),
            JitterResult::Missing => {
                let conceal_ts = slot
                    .estimated_packet_ts
                    .max(rtp_clock_increment(slot.codec_pt));
                let conceal_samples =
                    rtp_ts_to_mix_samples(slot.codec_pt, conceal_ts).clamp(1, MAX_GAP_FILL_SAMPLES);
                append_packet_loss_concealment(slot, conceal_samples);
                if let Some(expected_ts) = slot.expected_rtp_timestamp {
                    slot.expected_rtp_timestamp = Some(expected_ts.wrapping_add(conceal_ts));
                }
            }
            JitterResult::Filling => break,
        }
    }
}

fn take_mix_frame(slot: &mut MixerLegSlot) -> Vec<f32> {
    let mut frame = Vec::with_capacity(MIX_FRAME_SIZE);
    while frame.len() < MIX_FRAME_SIZE {
        if let Some(sample) = slot.pcm_buffer.pop_front() {
            frame.push(sample);
        } else {
            frame.push(0.0);
        }
    }
    frame
}

fn soft_limit_sample(sample: f32) -> f32 {
    const KNEE: f32 = 0.85;

    let abs = sample.abs();
    if abs <= KNEE {
        sample
    } else {
        let excess = abs - KNEE;
        let compressed = KNEE + (excess / (1.0 + (excess / (1.0 - KNEE))));
        sample.signum() * compressed.min(1.0)
    }
}

fn try_send_leg_output(
    out_tx: &OutTx,
    call_id: &str,
    leg_id: &str,
    slot: &mut MixerLegSlot,
    rtp: Vec<u8>,
    stream: &str,
) {
    let reason = match slot.outbound_tx.try_send(rtp) {
        Ok(()) => return,
        Err(mpsc::error::TrySendError::Full(_)) => "full",
        Err(mpsc::error::TrySendError::Closed(_)) => "closed",
    };

    slot.outbound_drops += 1;
    emit_output_drop_event(
        out_tx,
        call_id,
        Some(leg_id),
        None,
        stream,
        reason,
        slot.outbound_drops,
    );
}

fn try_send_tool_output(
    out_tx: &OutTx,
    call_id: &str,
    tool_leg_id: &str,
    tool: &mut ToolLegSlot,
    batch: ToolAudioBatch,
) {
    let reason = match tool.audio_tx.try_send(batch) {
        Ok(()) => return,
        Err(mpsc::error::TrySendError::Full(_)) => "full",
        Err(mpsc::error::TrySendError::Closed(_)) => "closed",
    };

    tool.dropped_batches += 1;
    emit_output_drop_event(
        out_tx,
        call_id,
        None,
        Some(tool_leg_id),
        "tool-batch",
        reason,
        tool.dropped_batches,
    );
}

fn cancel_prompt_producer(state: &mut IsolationState) {
    if let Some(cancel_tx) = state.prompt_cancel_tx.take() {
        let _ = cancel_tx.send(true);
    }
}

fn cancel_isolated_interaction(state: &mut IsolationState) {
    cancel_prompt_producer(state);
    if let Some(tx) = state.result_tx.take() {
        let _ = tx.send(InteractionResult::Cancelled);
    }
}

fn drain_prompt_stream(
    out_tx: &OutTx,
    call_id: &str,
    leg_id: &str,
    state: &mut IsolationState,
) {
    loop {
        let Some(mut stream_rx) = state.prompt_stream_rx.take() else {
            return;
        };

        match stream_rx.try_recv() {
            Ok(TtsStreamMessage::Frames(frames)) => {
                state.prompt_frames.extend(frames);
                state.prompt_stream_rx = Some(stream_rx);
            }
            Ok(TtsStreamMessage::Finished) => {
                state.prompt_stream_finished = true;
                return;
            }
            Ok(TtsStreamMessage::Failed(error)) => {
                emit_event(
                    out_tx,
                    "mixer_error",
                    serde_json::json!({
                        "call_id": call_id,
                        "leg_id": leg_id,
                        "error": format!("tts stream failed: {error}"),
                    }),
                );
                state.prompt_stream_finished = true;
                return;
            }
            Err(mpsc::error::TryRecvError::Empty) => {
                state.prompt_stream_rx = Some(stream_rx);
                return;
            }
            Err(mpsc::error::TryRecvError::Disconnected) => {
                state.prompt_stream_finished = true;
                return;
            }
        }
    }
}

/// Spawn the mixer task for a call. Returns the command sender and task handle.
pub fn spawn_mixer(call_id: String, out_tx: OutTx) -> (mpsc::Sender<MixerCommand>, JoinHandle<()>) {
    let (cmd_tx, cmd_rx) = mpsc::channel::<MixerCommand>(32);

    let handle = tokio::spawn(async move {
        mixer_loop(call_id, cmd_rx, out_tx).await;
    });

    (cmd_tx, handle)
}

/// The 20ms mixing loop.
async fn mixer_loop(call_id: String, mut cmd_rx: mpsc::Receiver<MixerCommand>, out_tx: OutTx) {
    let mut legs: HashMap<String, MixerLegSlot> = HashMap::new();
    let mut tool_legs: HashMap<String, ToolLegSlot> = HashMap::new();
    let mut interval = time::interval(Duration::from_millis(20));
    interval.set_missed_tick_behavior(MissedTickBehavior::Skip);

    loop {
        interval.tick().await;

        // ── 1. Process control commands (non-blocking). ─────────────
        loop {
            match cmd_rx.try_recv() {
                Ok(MixerCommand::AddLeg {
                    leg_id,
                    codec_pt,
                    inbound_rx,
                    outbound_tx,
                }) => {
                    let transcoder = match TranscodeState::new() {
                        Ok(t) => t,
                        Err(e) => {
                            emit_event(
                                &out_tx,
                                "mixer_error",
                                serde_json::json!({
                                    "call_id": call_id,
                                    "leg_id": leg_id,
                                    "error": format!("codec init: {e}"),
                                }),
                            );
                            continue;
                        }
                    };
                    legs.insert(
                        leg_id,
                        MixerLegSlot {
                            codec_pt,
                            transcoder,
                            denoiser: new_denoiser(),
                            inbound_rx,
                            outbound_tx,
                            pcm_buffer: VecDeque::new(),
                            last_pcm_frame: vec![0.0f32; MIX_FRAME_SIZE],
                            expected_rtp_timestamp: None,
                            estimated_packet_ts: rtp_clock_increment(codec_pt),
                            silent_ticks: 0,
                            rtp_seq: 0,
                            rtp_ts: 0,
                            rtp_ssrc: rand::random(),
                            outbound_drops: 0,
                            role: LegRole::Participant,
                            jitter: JitterBuffer::new(),
                        },
                    );
                }
                Ok(MixerCommand::RemoveLeg { leg_id }) => {
                    // If the leg is isolated, send Cancelled before dropping.
                    if let Some(slot) = legs.get_mut(&leg_id) {
                        if let LegRole::Isolated(ref mut state) = slot.role {
                            cancel_isolated_interaction(state);
                        }
                    }
                    legs.remove(&leg_id);
                    // Channels drop → I/O tasks exit cleanly.
                }
                Ok(MixerCommand::Shutdown) => {
                    // Cancel all outstanding interactions before shutting down.
                    for slot in legs.values_mut() {
                        if let LegRole::Isolated(ref mut state) = slot.role {
                            cancel_isolated_interaction(state);
                        }
                    }
                    return;
                }
                Ok(MixerCommand::StartInteraction {
                    leg_id,
                    prompt_pcm_frames,
                    prompt_stream_rx,
                    prompt_cancel_tx,
                    expected_digits,
                    timeout_ms,
                    result_tx,
                }) => {
                    if let Some(slot) = legs.get_mut(&leg_id) {
                        // Cancel any existing interaction first.
                        if let LegRole::Isolated(ref mut old_state) = slot.role {
                            cancel_isolated_interaction(old_state);
                        }
                        let timeout_ticks = timeout_ms / 20;
                        slot.role = LegRole::Isolated(IsolationState {
                            prompt_frames: VecDeque::from(prompt_pcm_frames),
                            prompt_stream_rx,
                            prompt_cancel_tx,
                            prompt_stream_finished: false,
                            expected_digits,
                            timeout_ticks_remaining: timeout_ticks,
                            prompt_done: false,
                            result_tx: Some(result_tx),
                        });
                    } else {
                        // Leg not found — immediately cancel.
                        if let Some(cancel_tx) = prompt_cancel_tx {
                            let _ = cancel_tx.send(true);
                        }
                        let _ = result_tx.send(InteractionResult::Cancelled);
                    }
                }
                Ok(MixerCommand::AddToolLeg {
                    leg_id,
                    tool_type,
                    audio_tx,
                }) => {
                    tool_legs.insert(
                        leg_id,
                        ToolLegSlot {
                            tool_type,
                            audio_tx,
                            dropped_batches: 0,
                        },
                    );
                }
                Ok(MixerCommand::RemoveToolLeg { leg_id }) => {
                    tool_legs.remove(&leg_id);
                    // Dropping the ToolLegSlot drops audio_tx → background task sees channel close.
                }
                Err(mpsc::error::TryRecvError::Empty) => break,
                Err(mpsc::error::TryRecvError::Disconnected) => return,
            }
        }

        if legs.is_empty() && tool_legs.is_empty() {
            continue;
        }

        // ── 2. Drain inbound packets, decode to 48kHz f32 PCM. ────
        //    DTMF (PT 101) packets are collected separately.
        //    Audio packets are sorted by sequence number and decoded
        //    in order to maintain codec state (critical for G.722 ADPCM).
        let leg_ids: Vec<String> = legs.keys().cloned().collect();
        let mut dtmf_forward: Vec<(String, RtpPacket)> = Vec::new();

        for lid in &leg_ids {
            let slot = legs.get_mut(lid).unwrap();

            // Step 2a: Drain all pending packets into the jitter buffer.
            let mut got_audio = false;
            loop {
                match slot.inbound_rx.try_recv() {
                    Ok(pkt) => {
                        if pkt.payload_type == 101 {
                            dtmf_forward.push((lid.clone(), pkt));
                        } else {
                            got_audio = true;
                            slot.jitter.push(pkt);
                        }
                    }
                    Err(_) => break,
                }
            }

            // Step 2b: Decode enough RTP to cover one 20ms playout frame.
            // Variable-duration packets (10ms, 20ms, 60ms, ...) accumulate in
            // the per-leg PCM FIFO; we pop exactly one 20ms frame below.
            fill_leg_playout_buffer(slot);
            slot.last_pcm_frame = take_mix_frame(slot);

            // Run jitter adaptation + prune stale packets.
            slot.jitter.adapt();
            slot.jitter.prune_stale();

            // Silent ticks: based on actual network reception, not jitter buffer state.
            if got_audio || dtmf_forward.iter().any(|(src, _)| src == lid) {
                slot.silent_ticks = 0;
            } else {
                slot.silent_ticks += 1;
            }
            if slot.silent_ticks > 150 {
                slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
                slot.pcm_buffer.clear();
                slot.expected_rtp_timestamp = None;
                slot.estimated_packet_ts = rtp_clock_increment(slot.codec_pt);
            }
        }

        // ── 3. Compute total mix from PARTICIPANT legs only. ────────
        //    Accumulate as f64 to prevent precision loss when summing f32.
        let mut total_mix = vec![0.0f64; MIX_FRAME_SIZE];
        for slot in legs.values() {
            if matches!(slot.role, LegRole::Participant) {
                for (i, &s) in slot.last_pcm_frame.iter().enumerate().take(MIX_FRAME_SIZE) {
                    total_mix[i] += s as f64;
                }
            }
        }

        // ── 4. Per-leg output. ──────────────────────────────────────
        // Collect interaction completions to apply after the loop
        // (can't mutate role while iterating mutably for encode).
        let mut completed_interactions: Vec<(String, InteractionResult)> = Vec::new();

        for (lid, slot) in legs.iter_mut() {
            match &mut slot.role {
                LegRole::Participant => {
                    // Mix-minus: total minus this leg's own contribution.
                    // Apply a light soft limiter instead of hard clipping the sum.
                    let mut mix_minus = Vec::with_capacity(MIX_FRAME_SIZE);
                    for i in 0..MIX_FRAME_SIZE {
                        let sample = (total_mix[i] - slot.last_pcm_frame[i] as f64) as f32;
                        mix_minus.push(soft_limit_sample(sample));
                    }

                    // Resample from 48kHz to the leg's codec native rate.
                    let target_rate = codec_sample_rate(slot.codec_pt);
                    let resampled = if target_rate == MIX_RATE {
                        mix_minus
                    } else {
                        slot.transcoder
                            .resample_f32(&mix_minus, MIX_RATE, target_rate)
                            .unwrap_or_default()
                    };

                    // Encode to the leg's codec (f32 → i16 → codec inside encode_from_f32).
                    let encoded = match slot.transcoder.encode_from_f32(&resampled, slot.codec_pt) {
                        Ok(e) if !e.is_empty() => e,
                        _ => continue,
                    };

                    // Build RTP packet with header.
                    let header =
                        build_rtp_header(slot.codec_pt, slot.rtp_seq, slot.rtp_ts, slot.rtp_ssrc);
                    let mut rtp = header.to_vec();
                    rtp.extend_from_slice(&encoded);

                    slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
                    slot.rtp_ts = slot.rtp_ts.wrapping_add(rtp_clock_increment(slot.codec_pt));

                    try_send_leg_output(&out_tx, &call_id, lid, slot, rtp, "participant-audio");
                }
                LegRole::Isolated(state) => {
                    drain_prompt_stream(&out_tx, &call_id, lid, state);

                    // Check for DTMF digit from this leg.
                    let mut matched_digit: Option<char> = None;
                    for (src_lid, dtmf_pkt) in &dtmf_forward {
                        if src_lid == lid && dtmf_pkt.payload.len() >= 4 {
                            let event_id = dtmf_pkt.payload[0];
                            let end_bit = (dtmf_pkt.payload[1] & 0x80) != 0;
                            if end_bit {
                                const EVENT_CHARS: &[char] = &[
                                    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '*', '#',
                                    'A', 'B', 'C', 'D',
                                ];
                                if let Some(&ch) = EVENT_CHARS.get(event_id as usize) {
                                    if state.expected_digits.contains(&ch) {
                                        matched_digit = Some(ch);
                                        break;
                                    }
                                }
                            }
                        }
                    }

                    if let Some(digit) = matched_digit {
                        // Interaction complete — digit matched.
                        completed_interactions.push((lid.clone(), InteractionResult::Digit(digit)));
                    } else {
                        // Play prompt frame, wait for live TTS, or move to timeout once the
                        // prompt stream has fully drained.
                        let pcm_frame = if let Some(frame) = state.prompt_frames.pop_front() {
                            frame
                        } else if !state.prompt_stream_finished {
                            vec![0.0f32; MIX_FRAME_SIZE]
                        } else {
                            state.prompt_done = true;
                            vec![0.0f32; MIX_FRAME_SIZE]
                        };

                        // Encode prompt frame to the leg's codec.
                        let target_rate = codec_sample_rate(slot.codec_pt);
                        let resampled = if target_rate == MIX_RATE {
                            pcm_frame
                        } else {
                            slot.transcoder
                                .resample_f32(&pcm_frame, MIX_RATE, target_rate)
                                .unwrap_or_default()
                        };

                        let mut prompt_rtp: Option<Vec<u8>> = None;
                        if let Ok(encoded) =
                            slot.transcoder.encode_from_f32(&resampled, slot.codec_pt)
                        {
                            if !encoded.is_empty() {
                                let header = build_rtp_header(
                                    slot.codec_pt,
                                    slot.rtp_seq,
                                    slot.rtp_ts,
                                    slot.rtp_ssrc,
                                );
                                let mut rtp = header.to_vec();
                                rtp.extend_from_slice(&encoded);
                                slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
                                slot.rtp_ts =
                                    slot.rtp_ts.wrapping_add(rtp_clock_increment(slot.codec_pt));
                                prompt_rtp = Some(rtp);
                            }
                        }

                        // Check timeout (only after prompt finishes).
                        if state.prompt_done {
                            if state.timeout_ticks_remaining == 0 {
                                completed_interactions
                                    .push((lid.clone(), InteractionResult::Timeout));
                            } else {
                                state.timeout_ticks_remaining -= 1;
                            }
                        }

                        if let Some(rtp) = prompt_rtp {
                            try_send_leg_output(
                                &out_tx,
                                &call_id,
                                lid,
                                slot,
                                rtp,
                                "isolated-prompt",
                            );
                        }
                    }
                }
            }
        }

        // Apply completed interactions — revert legs to Participant.
        for (lid, result) in completed_interactions {
            if let Some(slot) = legs.get_mut(&lid) {
                if let LegRole::Isolated(ref mut state) = slot.role {
                    cancel_prompt_producer(state);
                    if let Some(tx) = state.result_tx.take() {
                        let _ = tx.send(result);
                    }
                }
                slot.role = LegRole::Participant;
            }
        }

        // ── 5. Distribute per-source audio to tool legs. ────────────
        if !tool_legs.is_empty() {
            // Collect participant PCM frames (computed in step 2).
            let sources: Vec<ToolAudioSource> = legs
                .iter()
                .filter(|(_, s)| matches!(s.role, LegRole::Participant))
                .map(|(lid, s)| ToolAudioSource {
                    leg_id: lid.clone(),
                    pcm_48k: s.last_pcm_frame.clone(),
                })
                .collect();

            for (tool_leg_id, tool) in tool_legs.iter_mut() {
                let batch = ToolAudioBatch {
                    sources: sources
                        .iter()
                        .map(|s| ToolAudioSource {
                            leg_id: s.leg_id.clone(),
                            pcm_48k: s.pcm_48k.clone(),
                        })
                        .collect(),
                };
                try_send_tool_output(&out_tx, &call_id, tool_leg_id, tool, batch);
            }
        }

        // ── 6. Forward DTMF packets between participant legs only. ──
        for (source_lid, dtmf_pkt) in &dtmf_forward {
            // Skip if the source is an isolated leg (its DTMF was handled in step 4).
            if let Some(src_slot) = legs.get(source_lid) {
                if matches!(src_slot.role, LegRole::Isolated(_)) {
                    continue;
                }
            }
            for (target_lid, target_slot) in legs.iter_mut() {
                if target_lid == source_lid {
                    continue; // Don't echo DTMF back to sender.
                }
                // Don't forward to isolated legs.
                if matches!(target_slot.role, LegRole::Isolated(_)) {
                    continue;
                }
                let mut header = build_rtp_header(
                    101,
                    target_slot.rtp_seq,
                    target_slot.rtp_ts,
                    target_slot.rtp_ssrc,
                );
                if dtmf_pkt.marker {
                    header[1] |= 0x80; // Set marker bit.
                }
                let mut rtp_out = header.to_vec();
                rtp_out.extend_from_slice(&dtmf_pkt.payload);
                target_slot.rtp_seq = target_slot.rtp_seq.wrapping_add(1);
                // Don't increment rtp_ts for DTMF — it shares timestamp context with audio.
                try_send_leg_output(&out_tx, &call_id, target_lid, target_slot, rtp_out, "dtmf");
            }
        }
    }
}