Compare commits
29 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6fcdf4291a | |||
| 81441e7853 | |||
| 21ffc1d017 | |||
| 2f16c5efae | |||
| 254d7f3633 | |||
| 67537664df | |||
| 54129dcdae | |||
| 8c6556dae3 | |||
| 291beb1da4 | |||
| 79147f1e40 | |||
| c3a63a4092 | |||
| 7c4756402e | |||
| b6950e11d2 | |||
| e4935fbf21 | |||
| f543ff1568 | |||
| c63a759689 | |||
| a02146633b | |||
| f78639dd19 | |||
| 2aca5f1510 | |||
| 73b28f5f57 | |||
| 10ad432a4c | |||
| 66112091a2 | |||
| c9ae747c95 | |||
| 45f9b9c15c | |||
| 7d59361352 | |||
| 6a130db7c7 | |||
| 93f671f1f9 | |||
| 36eab44e28 | |||
| 9e5aa35fee |
16
.dockerignore
Normal file
16
.dockerignore
Normal file
@@ -0,0 +1,16 @@
|
||||
node_modules/
|
||||
.nogit/
|
||||
nogit/
|
||||
.git/
|
||||
.playwright-mcp/
|
||||
.vscode/
|
||||
test/
|
||||
dist_rust/
|
||||
dist_ts_web/
|
||||
rust/target/
|
||||
sip_trace.log
|
||||
sip_trace_*.log
|
||||
proxy.out
|
||||
proxy_v2.out
|
||||
*.pid
|
||||
.server.pid
|
||||
32
.gitea/workflows/docker_tags.yaml
Normal file
32
.gitea/workflows/docker_tags.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
name: Docker (tags)
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
env:
|
||||
IMAGE: code.foss.global/host.today/ht-docker-node:dbase_dind
|
||||
NPMCI_LOGIN_DOCKER_GITEA: ${{ github.server_url }}|${{ gitea.repository_owner }}|${{ secrets.GITEA_TOKEN }}
|
||||
NPMCI_LOGIN_DOCKER_DOCKERREGISTRY: ${{ secrets.NPMCI_LOGIN_DOCKER_DOCKERREGISTRY }}
|
||||
|
||||
jobs:
|
||||
release:
|
||||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ env.IMAGE }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
pnpm install -g pnpm
|
||||
pnpm install -g @git.zone/tsdocker
|
||||
|
||||
- name: Release
|
||||
run: |
|
||||
tsdocker login
|
||||
tsdocker build
|
||||
tsdocker push
|
||||
@@ -8,5 +8,16 @@
|
||||
"production": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"@git.zone/tsrust": {
|
||||
"targets": ["linux_amd64", "linux_arm64"]
|
||||
},
|
||||
"@git.zone/tsdocker": {
|
||||
"registries": ["code.foss.global"],
|
||||
"registryRepoMap": {
|
||||
"code.foss.global": "serve.zone/siprouter",
|
||||
"dockerregistry.lossless.digital": "serve.zone/siprouter"
|
||||
},
|
||||
"platforms": ["linux/amd64", "linux/arm64"]
|
||||
}
|
||||
}
|
||||
|
||||
74
Dockerfile
Normal file
74
Dockerfile
Normal file
@@ -0,0 +1,74 @@
|
||||
# gitzone dockerfile_service
|
||||
## STAGE 1 // BUILD
|
||||
FROM code.foss.global/host.today/ht-docker-node:lts AS build
|
||||
|
||||
# System build tools that the Rust dep tree needs beyond the base image:
|
||||
# - cmake : used by the `cmake` crate (transitive via ort_sys / a webrtc
|
||||
# sub-crate) to build a C/C++ library from source when a
|
||||
# prebuilt-binary download path doesn't apply.
|
||||
# - pkg-config : used by audiopus_sys and other *-sys crates to locate libs
|
||||
# on the native target (safe no-op if they vendor their own).
|
||||
# These are normally pre-installed on dev machines but not in ht-docker-node:lts.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cmake \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# buildx sets TARGETARCH automatically for each platform it's building:
|
||||
# linux/amd64 -> TARGETARCH=amd64
|
||||
# linux/arm64 -> TARGETARCH=arm64
|
||||
# We use it to tell tsrust to build ONLY the current container's arch. This
|
||||
# overrides the `@git.zone/tsrust.targets` list in .smartconfig.json, which is
|
||||
# right for local dev / CI (where you want both binaries) but wrong for per-
|
||||
# platform Docker stages (each stage would then also try to cross-compile to
|
||||
# the OTHER arch — which fails in the arm64 stage because no reverse cross-
|
||||
# toolchain is installed).
|
||||
#
|
||||
# With --target set, tsrust builds a single target natively within whichever
|
||||
# platform this stage is running under (native on amd64, QEMU-emulated on arm64).
|
||||
ARG TARGETARCH
|
||||
|
||||
COPY ./ /app
|
||||
WORKDIR /app
|
||||
RUN pnpm config set store-dir .pnpm-store
|
||||
RUN rm -rf node_modules && pnpm install
|
||||
|
||||
# tsrust --target takes precedence over .smartconfig.json's targets array.
|
||||
# Writes dist_rust/proxy-engine_linux_amd64 or dist_rust/proxy-engine_linux_arm64.
|
||||
# The TS layer (ts/proxybridge.ts buildLocalPaths) picks the right one at runtime
|
||||
# via process.arch.
|
||||
RUN pnpm exec tsrust --target linux_${TARGETARCH}
|
||||
|
||||
# Web bundle (esbuild — pure JS, uses the platform's native esbuild binary
|
||||
# installed by pnpm above, so no cross-bundling concerns).
|
||||
RUN pnpm run bundle
|
||||
|
||||
# Drop pnpm store to keep the image smaller. node_modules stays because the
|
||||
# runtime entrypoint is tsx and siprouter has no separate dist_ts/ to run from.
|
||||
RUN rm -rf .pnpm-store
|
||||
|
||||
## STAGE 2 // PRODUCTION
|
||||
FROM code.foss.global/host.today/ht-docker-node:alpine-node AS production
|
||||
|
||||
# gcompat + libstdc++ let the glibc-linked proxy-engine binary run on Alpine.
|
||||
RUN apk add --no-cache gcompat libstdc++
|
||||
|
||||
WORKDIR /app
|
||||
COPY --from=build /app /app
|
||||
|
||||
ENV SIPROUTER_MODE=OCI_CONTAINER
|
||||
ENV NODE_ENV=production
|
||||
|
||||
LABEL org.opencontainers.image.title="siprouter" \
|
||||
org.opencontainers.image.description="SIP proxy with Rust data plane and WebRTC bridge" \
|
||||
org.opencontainers.image.source="https://code.foss.global/serve.zone/siprouter"
|
||||
|
||||
# 5070 SIP signaling (UDP+TCP)
|
||||
# 5061 SIP-TLS (optional, UDP+TCP)
|
||||
# 3060 Web UI / WebSocket (HTTP or HTTPS, auto-detected from .nogit/cert.pem)
|
||||
# 20000-20200/udp RTP media range (must match config.proxy.rtpPortRange)
|
||||
EXPOSE 5070/udp 5070/tcp 5061/udp 5061/tcp 3060/tcp 20000-20200/udp
|
||||
|
||||
# exec replaces sh as PID 1 with tsx, so SIGINT/SIGTERM reach Node and
|
||||
# ts/sipproxy.ts' shutdown handler (which calls shutdownProxyEngine) runs cleanly.
|
||||
CMD ["sh", "-c", "exec ./node_modules/.bin/tsx ts/sipproxy.ts"]
|
||||
107
changelog.md
107
changelog.md
@@ -1,5 +1,112 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-04-11 - 1.20.2 - fix(proxy-engine)
|
||||
fix inbound route browser ringing and provider-facing SDP advertisement while preventing RTP port exhaustion
|
||||
|
||||
- Honor inbound routing `ringBrowsers` when emitting incoming call events so browser toast notifications can be suppressed per route.
|
||||
- Rewrite SDP and Record-Route using the destination leg's routable address, using `public_ip` for provider legs and LAN IP for device and internal legs.
|
||||
- Store provider leg public IP metadata on legs to support correct per-destination SIP message rewriting.
|
||||
- Change the RTP port pool to track sockets with `Weak<UdpSocket>` so ports are reclaimed automatically after calls end, avoiding leaked allocations and eventual 503 failures on new calls.
|
||||
- Remove unused dashboard/status, DTMF, relay, and transport helper code paths as part of engine cleanup.
|
||||
|
||||
## 2026-04-11 - 1.20.1 - fix(docker)
|
||||
install required native build tools for Rust dependencies in the build image
|
||||
|
||||
- Add cmake and pkg-config to the Docker build stage so Rust native dependencies can compile successfully in the container
|
||||
- Document why these tools are needed for transitive Rust crates that build or detect native libraries
|
||||
|
||||
## 2026-04-11 - 1.20.0 - feat(docker)
|
||||
add multi-arch Docker build and tagged release pipeline
|
||||
|
||||
- Add a production Dockerfile for building and running the SIP router with the Rust proxy engine and web bundle
|
||||
- Configure tsdocker and tsrust for linux/amd64 and linux/arm64 image builds and registry mapping
|
||||
- Add a tag-triggered Gitea workflow to build and push Docker images
|
||||
- Update runtime binary resolution to load architecture-specific Rust artifacts in Docker and CI environments
|
||||
- Add Docker-related package scripts, dependency updates, and ignore rules for container builds
|
||||
|
||||
## 2026-04-11 - 1.19.2 - fix(web-ui)
|
||||
normalize lucide icon names across SIP proxy views
|
||||
|
||||
- Updates icon identifiers to the expected PascalCase lucide format in app navigation, calls, IVR, overview, providers, and voicemail views.
|
||||
- Fixes UI icon rendering for stats cards and action menus such as transfer, delete, status, and call direction indicators.
|
||||
|
||||
## 2026-04-10 - 1.19.1 - fix(readme)
|
||||
refresh documentation for jitter buffering, voicemail, and WebSocket signaling details
|
||||
|
||||
- Add adaptive jitter buffer and packet loss concealment details to the audio pipeline documentation
|
||||
- Document voicemail unheard count and heard-state API endpoints
|
||||
- Update WebSocket event and browser signaling examples to reflect current message types
|
||||
|
||||
## 2026-04-10 - 1.19.0 - feat(proxy-engine,codec-lib)
|
||||
add adaptive RTP jitter buffering with Opus packet loss concealment and stable 20ms resampling
|
||||
|
||||
- introduces a per-leg adaptive jitter buffer in the mixer to reorder RTP packets, gate initial playout, and deliver one frame per 20ms tick
|
||||
- adds Opus PLC support to synthesize missing audio frames when packets are lost, with fade-based fallback handling for non-Opus codecs
|
||||
- updates i16 and f32 resamplers to use canonical 20ms chunks so cached resamplers preserve filter state and avoid variable-size cache thrashing
|
||||
|
||||
## 2026-04-10 - 1.18.0 - feat(readme)
|
||||
expand documentation for voicemail, IVR, audio engine, and API capabilities
|
||||
|
||||
- Updates the feature overview to document voicemail, IVR menus, call recording, enhanced TTS, and the 48kHz float audio engine
|
||||
- Refreshes the architecture section to describe the TypeScript control plane, Rust proxy-engine data plane, and JSON-over-stdio IPC
|
||||
- Clarifies REST API and WebSocket coverage with voicemail endpoints, incoming call events, and refined endpoint descriptions
|
||||
|
||||
## 2026-04-10 - 1.17.2 - fix(proxy-engine)
|
||||
use negotiated SDP payload types when wiring SIP legs and enable default nnnoiseless features for telephony denoising
|
||||
|
||||
- Select the negotiated codec payload type from SDP answers instead of always using the first offered codec
|
||||
- Preserve the device leg's preferred payload type from its own INVITE SDP when attaching it to the mixer
|
||||
- Enable default nnnoiseless features in codec-lib and proxy-engine dependencies
|
||||
|
||||
## 2026-04-10 - 1.17.1 - fix(proxy-engine,codec-lib,sip-proto,ts)
|
||||
preserve negotiated media details and improve RTP audio handling across call legs
|
||||
|
||||
- Use native Opus float encode/decode to avoid unnecessary i16 quantization in the f32 audio path.
|
||||
- Parse full RTP headers including extensions and sequence numbers, then sort inbound packets before decoding to keep codec state stable for out-of-order audio.
|
||||
- Capture negotiated codec payload types from SDP offers and answers and include codec, RTP port, remote media, and metadata in leg_added events.
|
||||
- Emit leg_state_changed and leg_removed events more consistently so the dashboard reflects leg lifecycle updates accurately.
|
||||
|
||||
## 2026-04-10 - 1.17.0 - feat(proxy-engine)
|
||||
upgrade the internal audio bus to 48kHz f32 with per-leg denoising and improve SIP leg routing
|
||||
|
||||
- switch mixer, prompt playback, and tool leg audio handling from 16kHz i16 to 48kHz f32 for higher-quality internal processing
|
||||
- add f32 decode/encode and resampling support plus standalone RNNoise denoiser creation in codec-lib
|
||||
- apply per-leg inbound noise suppression in the mixer before mix-minus generation
|
||||
- fix passthrough call routing by matching the actual leg from the signaling source address when Call-IDs are shared
|
||||
- correct dialed number extraction from bare SIP request URIs by parsing the user part directly
|
||||
|
||||
## 2026-04-10 - 1.16.0 - feat(proxy-engine)
|
||||
integrate Kokoro TTS generation into proxy-engine and simplify TypeScript prompt handling to use cached WAV files
|
||||
|
||||
- adds a generate_tts command to proxy-engine with lazy-loaded Kokoro model support and WAV output generation
|
||||
- removes standalone opus-codec and tts-engine workspace binaries by consolidating TTS generation into proxy-engine
|
||||
- updates announcement and prompt cache flows to generate and cache WAV files on disk instead of pre-encoding RTP frames in TypeScript
|
||||
|
||||
## 2026-04-10 - 1.15.0 - feat(proxy-engine)
|
||||
add device leg, leg transfer, and leg replacement call controls
|
||||
|
||||
- adds proxy-engine commands and call manager support for inviting a registered SIP device into an active call
|
||||
- supports transferring an existing leg between calls while preserving the active connection and updating mixer routing
|
||||
- supports replacing a call leg by removing the current leg and dialing a new outbound destination
|
||||
- wires the frontend add-leg API and TypeScript bridge to the new device leg and leg control commands
|
||||
|
||||
## 2026-04-10 - 1.14.0 - feat(proxy-engine)
|
||||
add multiparty call mixing with dynamic SIP and WebRTC leg management
|
||||
|
||||
- replace passthrough call handling with a mixer-backed call model that tracks multiple legs and exposes leg status in call state output
|
||||
- add mixer and leg I/O infrastructure to bridge SIP RTP and WebRTC audio through channel-based mix-minus processing
|
||||
- introduce add_leg and remove_leg proxy commands and wire frontend bridge APIs to manage external call legs
|
||||
- emit leg lifecycle events for observability and mark unimplemented device-leg and transfer HTTP endpoints with 501 responses
|
||||
|
||||
## 2026-04-10 - 1.13.0 - feat(proxy-engine,webrtc)
|
||||
add B2BUA SIP leg handling and WebRTC call bridging for outbound calls
|
||||
|
||||
- introduce a new SipLeg module to manage outbound provider dialogs, including INVITE lifecycle, digest auth retries, ACK handling, media endpoint tracking, and termination
|
||||
- store outbound dashboard calls as B2BUA calls in the call manager and emit provider media details on call_answered for bridge setup
|
||||
- separate SIP and WebRTC engine locking to avoid contention and deadlocks while linking sessions to call RTP sockets
|
||||
- add bidirectional RTP bridging between provider SIP media and browser WebRTC audio using the allocated RTP socket
|
||||
- wire browser webrtc-accept events in the frontend and sipproxy so session-to-call linking can occur when media and acceptance arrive in either order
|
||||
|
||||
## 2026-04-10 - 1.12.0 - feat(proxy-engine)
|
||||
add Rust-based outbound calling, WebRTC bridging, and voicemail handling
|
||||
|
||||
|
||||
BIN
nogit/voicemail/default/msg-1775825168199.wav
Normal file
BIN
nogit/voicemail/default/msg-1775825168199.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840000387.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840000387.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840014276.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840014276.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840439400.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840439400.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840447441.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840447441.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840454835.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840454835.wav
Normal file
Binary file not shown.
@@ -1,11 +1,14 @@
|
||||
{
|
||||
"name": "siprouter",
|
||||
"version": "1.12.0",
|
||||
"version": "1.20.2",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"bundle": "node node_modules/.pnpm/esbuild@0.27.7/node_modules/esbuild/bin/esbuild ts_web/index.ts --bundle --format=esm --outfile=dist_ts_web/bundle.js --platform=browser --target=es2022 --minify",
|
||||
"buildRust": "tsrust",
|
||||
"build": "pnpm run buildRust && pnpm run bundle",
|
||||
"build:docker": "tsdocker build --verbose",
|
||||
"release:docker": "tsdocker push --verbose",
|
||||
"start": "tsx ts/sipproxy.ts",
|
||||
"restartBackground": "pnpm run buildRust && pnpm run bundle; test -f .server.pid && kill $(cat .server.pid) 2>/dev/null; sleep 1; rm -f sip_trace.log proxy.out && nohup tsx ts/sipproxy.ts > proxy.out 2>&1 & echo $! > .server.pid; sleep 2; cat proxy.out"
|
||||
},
|
||||
@@ -14,10 +17,12 @@
|
||||
"@design.estate/dees-element": "^2.2.4",
|
||||
"@push.rocks/smartrust": "^1.3.2",
|
||||
"@push.rocks/smartstate": "^2.3.0",
|
||||
"tsx": "^4.21.0",
|
||||
"ws": "^8.20.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@git.zone/tsbundle": "^2.10.0",
|
||||
"@git.zone/tsdocker": "^2.2.4",
|
||||
"@git.zone/tsrust": "^1.3.2",
|
||||
"@git.zone/tswatch": "^3.3.2",
|
||||
"@types/ws": "^8.18.1"
|
||||
|
||||
650
pnpm-lock.yaml
generated
650
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
326
readme.md
326
readme.md
@@ -1,6 +1,6 @@
|
||||
# @serve.zone/siprouter
|
||||
|
||||
A production-grade **SIP B2BUA + WebRTC bridge** built with TypeScript and Rust. Routes calls between SIP providers, SIP hardware devices, and browser softphones — with real-time codec transcoding, ML noise suppression, neural TTS announcements, and a slick web dashboard.
|
||||
A production-grade **SIP B2BUA + WebRTC bridge** built with TypeScript and Rust. Routes calls between SIP providers, SIP hardware devices, and browser softphones — with real-time codec transcoding, adaptive jitter buffering, ML noise suppression, neural TTS, voicemail, IVR menus, and a slick web dashboard.
|
||||
|
||||
## Issue Reporting and Security
|
||||
|
||||
@@ -12,14 +12,17 @@ For reporting bugs, issues, or security vulnerabilities, please visit [community
|
||||
|
||||
siprouter sits between your SIP trunk providers and your endpoints — hardware phones, ATAs, browser softphones — and handles **everything** in between:
|
||||
|
||||
- 📞 **SIP B2BUA** — Terminates and re-originates calls with full RFC 3261 dialog state management
|
||||
- 🌐 **WebRTC Bridge** — Browser-based softphone with bidirectional audio to the SIP network
|
||||
- 🎛️ **Multi-Provider Trunking** — Register with multiple SIP providers simultaneously (sipgate, easybell, o2, etc.)
|
||||
- 🔊 **Rust Codec Engine** — Real-time Opus ↔ G.722 ↔ PCMU ↔ PCMA transcoding in native Rust
|
||||
- 🤖 **ML Noise Suppression** — RNNoise denoiser with per-direction state (to SIP / to browser)
|
||||
- 🗣️ **Neural TTS** — Kokoro-powered "connecting your call" announcements, pre-encoded for instant playback
|
||||
- 🔀 **Hub Model Calls** — N-leg calls with dynamic add/remove, transfer, and RTP fan-out
|
||||
- 🖥️ **Web Dashboard** — Real-time SPA with live call monitoring, browser phone, contact management, provider config
|
||||
- 📞 **SIP B2BUA** — Terminates and re-originates calls with full RFC 3261 dialog state management, digest auth, and SDP negotiation
|
||||
- 🌐 **WebRTC Bridge** — Browser-based softphone with bidirectional Opus audio to the SIP network
|
||||
- 🎛️ **Multi-Provider Trunking** — Register with multiple SIP providers simultaneously (sipgate, easybell, etc.) with automatic failover
|
||||
- 🎧 **48kHz f32 Audio Engine** — High-fidelity internal audio bus at 48kHz/32-bit float with native Opus float encode/decode, FFT-based resampling, and per-leg ML noise suppression
|
||||
- 🔀 **N-Leg Mix-Minus Mixer** — Conference-grade mixing with dynamic leg add/remove, transfer, and per-source audio separation
|
||||
- 🎯 **Adaptive Jitter Buffer** — Per-leg jitter buffering with sequence-based reordering, adaptive depth (60–120ms), Opus PLC for lost packets, and hold/resume detection
|
||||
- 📧 **Voicemail** — Configurable voicemail boxes with TTS greetings, recording, and web playback
|
||||
- 🔢 **IVR Menus** — DTMF-navigable interactive voice response with nested menus, routing actions, and custom prompts
|
||||
- 🗣️ **Neural TTS** — Kokoro-powered announcements and greetings with 25+ voice presets, backed by espeak-ng fallback
|
||||
- 🎙️ **Call Recording** — Per-source separated WAV recording at 48kHz via tool legs
|
||||
- 🖥️ **Web Dashboard** — Real-time SPA with 9 views: live calls, browser phone, routing, voicemail, IVR, contacts, providers, and streaming logs
|
||||
|
||||
---
|
||||
|
||||
@@ -35,32 +38,38 @@ siprouter sits between your SIP trunk providers and your endpoints — hardware
|
||||
┌──────────────────────────────────────┐
|
||||
│ siprouter │
|
||||
│ │
|
||||
│ ┌──────────┐ ┌──────────────────┐ │
|
||||
│ │ Call Hub │ │ Rust Transcoder │ │
|
||||
│ │ N legs │──│ Opus/G.722/PCM │ │
|
||||
│ │ fan-out │ │ + RNNoise │ │
|
||||
│ └────┬─────┘ └──────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────┴─────┐ ┌──────────────────┐ │
|
||||
│ │ SIP Stack│ │ Kokoro TTS │ │
|
||||
│ │ Dialog SM│ │ (ONNX Runtime) │ │
|
||||
│ └────┬─────┘ └──────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────┴──────────────────────────┐ │
|
||||
│ │ Local Registrar + Provider │ │
|
||||
│ │ Registration Engine │ │
|
||||
│ └───────────────────────────────┘ │
|
||||
└──────────┬──────────────┬────────────┘
|
||||
│ │
|
||||
┌──────┴──────┐ ┌─────┴──────┐
|
||||
│ SIP Devices │ │ SIP Trunk │
|
||||
│ (HT801, etc)│ │ Providers │
|
||||
└─────────────┘ └────────────┘
|
||||
│ TypeScript Control Plane │
|
||||
│ ┌────────────────────────────────┐ │
|
||||
│ │ Config · WebRTC Signaling │ │
|
||||
│ │ REST API · Web Dashboard │ │
|
||||
│ │ Voicebox Manager · TTS Cache │ │
|
||||
│ └────────────┬───────────────────┘ │
|
||||
│ JSON-over-stdio IPC │
|
||||
│ ┌────────────┴───────────────────┐ │
|
||||
│ │ Rust proxy-engine (data plane) │ │
|
||||
│ │ │ │
|
||||
│ │ SIP Stack · Dialog SM · Auth │ │
|
||||
│ │ Call Manager · N-Leg Mixer │ │
|
||||
│ │ 48kHz f32 Bus · Jitter Buffer │ │
|
||||
│ │ Codec Engine · RTP Port Pool │ │
|
||||
│ │ WebRTC Engine · Kokoro TTS │ │
|
||||
│ │ Voicemail · IVR · Recording │ │
|
||||
│ └────┬──────────────────┬────────┘ │
|
||||
└───────┤──────────────────┤───────────┘
|
||||
│ │
|
||||
┌──────┴──────┐ ┌──────┴──────┐
|
||||
│ SIP Devices │ │ SIP Trunk │
|
||||
│ (HT801 etc) │ │ Providers │
|
||||
└─────────────┘ └─────────────┘
|
||||
```
|
||||
|
||||
### The Hub Model
|
||||
### 🧠 Key Design Decisions
|
||||
|
||||
Every call is a **hub** with N legs. Each leg is either a `SipLeg` (hardware device or provider) or a `WebRtcLeg` (browser). RTP flows through the hub — each leg's received audio is forwarded to all other legs, with codec transcoding handled transparently by the Rust engine.
|
||||
- **Hub Model** — Every call is a hub with N legs. Each leg is a `SipLeg` (device/provider) or `WebRtcLeg` (browser). Legs can be dynamically added, removed, or transferred without tearing down the call.
|
||||
- **Rust Data Plane** — All SIP protocol handling, codec transcoding, mixing, and RTP I/O runs in native Rust for real-time performance. TypeScript handles config, signaling, REST API, and dashboard.
|
||||
- **48kHz f32 Internal Bus** — Audio is processed at maximum quality internally. Encoding/decoding to wire format (G.722, PCMU, Opus) happens solely at the leg boundary.
|
||||
- **Per-Session Codec Isolation** — Each call leg gets its own encoder/decoder/resampler/denoiser state — no cross-call corruption.
|
||||
- **SDP Codec Negotiation** — Outbound encoding uses the codec actually negotiated in SDP answers, not just the first offered codec.
|
||||
|
||||
---
|
||||
|
||||
@@ -70,15 +79,16 @@ Every call is a **hub** with N legs. Each leg is either a `SipLeg` (hardware dev
|
||||
|
||||
- **Node.js** ≥ 20 with `tsx` globally available
|
||||
- **pnpm** for package management
|
||||
- **Rust** toolchain (for building the codec engine and TTS)
|
||||
- **Rust** toolchain (for building the proxy engine)
|
||||
- **espeak-ng** (optional, for TTS fallback)
|
||||
|
||||
### Install & Build
|
||||
|
||||
```bash
|
||||
# Clone and install
|
||||
# Clone and install dependencies
|
||||
pnpm install
|
||||
|
||||
# Build the Rust binaries (opus-codec + tts-engine)
|
||||
# Build the Rust proxy-engine binary
|
||||
pnpm run buildRust
|
||||
|
||||
# Bundle the web frontend
|
||||
@@ -87,57 +97,92 @@ pnpm run bundle
|
||||
|
||||
### Configuration
|
||||
|
||||
Create `.nogit/config.json` with your setup:
|
||||
Create `.nogit/config.json`:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"proxy": {
|
||||
"lanIp": "192.168.1.100", // Your server's LAN IP
|
||||
"lanPort": 5070, // SIP signaling port
|
||||
"rtpPortRange": [20000, 20200],// RTP relay port pool (even ports)
|
||||
"webUiPort": 3060 // Dashboard port
|
||||
"lanIp": "192.168.1.100", // Your server's LAN IP
|
||||
"lanPort": 5070, // SIP signaling port
|
||||
"publicIpSeed": "stun.example.com", // STUN server for public IP discovery
|
||||
"rtpPortRange": { "min": 20000, "max": 20200 }, // RTP port pool (even ports)
|
||||
"webUiPort": 3060 // Dashboard + REST API port
|
||||
},
|
||||
"providers": [
|
||||
{
|
||||
"id": "my-trunk",
|
||||
"name": "My SIP Provider",
|
||||
"host": "sip.provider.com",
|
||||
"port": 5060,
|
||||
"displayName": "My SIP Provider",
|
||||
"domain": "sip.provider.com",
|
||||
"outboundProxy": { "address": "sip.provider.com", "port": 5060 },
|
||||
"username": "user",
|
||||
"password": "pass",
|
||||
"codecs": ["G.722", "PCMA", "PCMU"],
|
||||
"registerExpiry": 3600
|
||||
"codecs": [9, 0, 8, 101], // G.722, PCMU, PCMA, telephone-event
|
||||
"registerIntervalSec": 300
|
||||
}
|
||||
],
|
||||
"devices": [
|
||||
{
|
||||
"id": "desk-phone",
|
||||
"name": "Desk Phone",
|
||||
"type": "sip"
|
||||
"displayName": "Desk Phone",
|
||||
"expectedAddress": "192.168.1.50",
|
||||
"extension": "100"
|
||||
}
|
||||
],
|
||||
"routing": {
|
||||
"inbound": {
|
||||
"default": { "target": "all-devices", "ringBrowser": true }
|
||||
"routes": [
|
||||
{
|
||||
"id": "inbound-default",
|
||||
"name": "Ring all devices",
|
||||
"priority": 100,
|
||||
"direction": "inbound",
|
||||
"match": {},
|
||||
"action": {
|
||||
"targets": ["desk-phone"],
|
||||
"ringBrowsers": true,
|
||||
"voicemailBox": "main",
|
||||
"noAnswerTimeout": 25
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "outbound-default",
|
||||
"name": "Route via trunk",
|
||||
"priority": 100,
|
||||
"direction": "outbound",
|
||||
"match": {},
|
||||
"action": { "provider": "my-trunk" }
|
||||
}
|
||||
]
|
||||
},
|
||||
"voiceboxes": [
|
||||
{
|
||||
"id": "main",
|
||||
"enabled": true,
|
||||
"greetingText": "Please leave a message after the beep.",
|
||||
"greetingVoice": "af_bella",
|
||||
"noAnswerTimeoutSec": 25,
|
||||
"maxRecordingSec": 120,
|
||||
"maxMessages": 50
|
||||
}
|
||||
}
|
||||
],
|
||||
"contacts": [
|
||||
{ "id": "1", "name": "Alice", "number": "+491234567890", "starred": true }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### TTS Setup (Optional)
|
||||
|
||||
For neural "connecting your call" announcements, download the Kokoro TTS model:
|
||||
For neural announcements and voicemail greetings, download the Kokoro TTS model:
|
||||
|
||||
```bash
|
||||
mkdir -p .nogit/tts
|
||||
# Download the full-quality model (310MB) + voices (27MB)
|
||||
curl -L -o .nogit/tts/kokoro-v1.0.onnx \
|
||||
https://github.com/mzdk100/kokoro/releases/download/V1.0/kokoro-v1.0.onnx
|
||||
curl -L -o .nogit/tts/voices.bin \
|
||||
https://github.com/mzdk100/kokoro/releases/download/V1.0/voices.bin
|
||||
```
|
||||
|
||||
If the model files aren't present, the announcement feature is simply disabled — everything else works fine.
|
||||
Without the model files, TTS falls back to `espeak-ng`. Without either, announcements are skipped — everything else works fine.
|
||||
|
||||
### Run
|
||||
|
||||
@@ -145,7 +190,7 @@ If the model files aren't present, the announcement feature is simply disabled
|
||||
pnpm start
|
||||
```
|
||||
|
||||
The SIP proxy starts on the configured port and the web dashboard is available at `http://<your-ip>:3060`.
|
||||
The SIP proxy starts on the configured port and the web dashboard is available at `https://<your-ip>:3060`.
|
||||
|
||||
### HTTPS (Optional)
|
||||
|
||||
@@ -157,68 +202,93 @@ Place `cert.pem` and `key.pem` in `.nogit/` for TLS on the dashboard.
|
||||
|
||||
```
|
||||
siprouter/
|
||||
├── ts/ # TypeScript source
|
||||
│ ├── sipproxy.ts # Main entry — bootstraps everything
|
||||
│ ├── config.ts # Config loader & validation
|
||||
│ ├── registrar.ts # Local SIP registrar for devices
|
||||
│ ├── providerstate.ts # Per-provider upstream registration engine
|
||||
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
|
||||
│ ├── webrtcbridge.ts # WebRTC signaling layer
|
||||
│ ├── opusbridge.ts # Rust IPC bridge (smartrust)
|
||||
│ ├── codec.ts # High-level RTP transcoding interface
|
||||
│ ├── announcement.ts # Neural TTS announcement generator
|
||||
│ ├── sip/ # Zero-dependency SIP protocol library
|
||||
│ │ ├── message.ts # SIP message parser/builder/mutator
|
||||
│ │ ├── dialog.ts # RFC 3261 dialog state machine
|
||||
│ │ ├── helpers.ts # SDP builder, digest auth, codec registry
|
||||
│ │ └── rewrite.ts # SIP URI + SDP body rewriting
|
||||
│ └── call/ # Hub-model call management
|
||||
│ ├── call-manager.ts # Central registry, factory, routing
|
||||
│ ├── call.ts # Call hub — owns N legs, media fan-out
|
||||
│ ├── sip-leg.ts # SIP device/provider connection
|
||||
│ ├── webrtc-leg.ts # Browser WebRTC connection
|
||||
│ └── rtp-port-pool.ts # UDP port allocation
|
||||
├── ts_web/ # Web frontend (Lit-based SPA)
|
||||
│ ├── elements/ # Web components (dashboard, phone, etc.)
|
||||
│ └── state/ # App state, WebRTC client, notifications
|
||||
├── rust/ # Rust workspace
|
||||
├── ts/ # TypeScript control plane
|
||||
│ ├── sipproxy.ts # Main entry — bootstraps everything
|
||||
│ ├── config.ts # Config loader & validation
|
||||
│ ├── proxybridge.ts # Rust proxy-engine IPC bridge (smartrust)
|
||||
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
|
||||
│ ├── webrtcbridge.ts # WebRTC signaling layer
|
||||
│ ├── registrar.ts # Browser softphone registration
|
||||
│ ├── announcement.ts # TTS announcement generator (espeak-ng / Kokoro)
|
||||
│ ├── voicebox.ts # Voicemail box management
|
||||
│ └── call/
|
||||
│ └── prompt-cache.ts # Named audio prompt WAV management
|
||||
│
|
||||
├── ts_web/ # Web frontend (Lit-based SPA)
|
||||
│ ├── elements/ # Web components (9 dashboard views)
|
||||
│ └── state/ # App state, WebRTC client, notifications
|
||||
│
|
||||
├── rust/ # Rust workspace (the data plane)
|
||||
│ └── crates/
|
||||
│ ├── opus-codec/ # Real-time audio transcoder (Opus/G.722/PCM)
|
||||
│ └── tts-engine/ # Kokoro neural TTS CLI
|
||||
├── html/ # Static HTML shell
|
||||
├── .nogit/ # Secrets, config, models (gitignored)
|
||||
└── dist_rust/ # Compiled Rust binaries (gitignored)
|
||||
│ ├── codec-lib/ # Audio codec library (Opus/G.722/PCMU/PCMA)
|
||||
│ ├── sip-proto/ # Zero-dependency SIP protocol library
|
||||
│ └── proxy-engine/ # Main binary — SIP engine + mixer + RTP
|
||||
│
|
||||
├── html/ # Static HTML shell
|
||||
├── .nogit/ # Secrets, config, TTS models (gitignored)
|
||||
└── dist_rust/ # Compiled Rust binary (gitignored)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎧 Codec Engine (Rust)
|
||||
## 🎧 Audio Engine (Rust)
|
||||
|
||||
The `opus-codec` binary handles all real-time audio processing via a JSON-over-stdio IPC protocol:
|
||||
The `proxy-engine` binary handles all real-time audio processing with a **48kHz f32 internal bus** — encoding and decoding happens only at leg boundaries.
|
||||
|
||||
| Codec | Payload Type | Sample Rate | Use Case |
|
||||
|-------|-------------|-------------|----------|
|
||||
| **Opus** | 111 | 48 kHz | WebRTC browsers |
|
||||
| **G.722** | 9 | 16 kHz | HD SIP devices |
|
||||
### Supported Codecs
|
||||
|
||||
| Codec | PT | Native Rate | Use Case |
|
||||
|-------|:--:|:-----------:|----------|
|
||||
| **Opus** | 111 | 48 kHz | WebRTC browsers (native float encode/decode — zero i16 quantization) |
|
||||
| **G.722** | 9 | 16 kHz | HD SIP devices & providers |
|
||||
| **PCMU** (G.711 µ-law) | 0 | 8 kHz | Legacy SIP |
|
||||
| **PCMA** (G.711 A-law) | 8 | 8 kHz | Legacy SIP |
|
||||
|
||||
**Features:**
|
||||
- Per-call isolated codec sessions (no cross-call state corruption)
|
||||
- FFT-based sample rate conversion via `rubato`
|
||||
- **RNNoise ML noise suppression** with per-direction state — denoises audio flowing to SIP separately from audio flowing to the browser
|
||||
- Raw PCM encoding for TTS frame processing
|
||||
### Audio Pipeline
|
||||
|
||||
```
|
||||
Inbound: Wire RTP → Jitter Buffer → Decode → Resample to 48kHz → Denoise (RNNoise) → Mix Bus
|
||||
Outbound: Mix Bus → Mix-Minus → Resample to codec rate → Encode → Wire RTP
|
||||
```
|
||||
|
||||
- **Adaptive jitter buffer** — per-leg `BTreeMap`-based buffer keyed by RTP sequence number. Delivers exactly one frame per 20ms mixer tick in sequence order. Adaptive target depth starts at 3 frames (60ms) and adjusts between 2–6 frames based on observed network jitter. Handles hold/resume by detecting large forward sequence jumps and resetting cleanly.
|
||||
- **Packet loss concealment (PLC)** — on missing packets, Opus legs invoke the decoder's built-in PLC (`decode(None)`) to synthesize a smooth fill frame. Non-Opus legs (G.722, PCMU) apply exponential fade (0.85×) toward silence to avoid hard discontinuities.
|
||||
- **FFT-based resampling** via `rubato` — high-quality sinc interpolation with canonical 20ms chunk sizes to ensure consistent resampler state across frames, preventing filter discontinuities
|
||||
- **ML noise suppression** via `nnnoiseless` (RNNoise) — per-leg inbound denoising with SIMD acceleration (AVX/SSE). Skipped for WebRTC legs (browsers already denoise via getUserMedia)
|
||||
- **Mix-minus mixing** — each participant hears everyone except themselves, accumulated in f64 precision
|
||||
- **RFC 3550 compliant header parsing** — properly handles CSRC lists and header extensions
|
||||
|
||||
---
|
||||
|
||||
## 🗣️ Neural TTS (Rust)
|
||||
## 🗣️ Neural TTS
|
||||
|
||||
The `tts-engine` binary uses [Kokoro TTS](https://github.com/mzdk100/kokoro) (82M parameter neural model) to synthesize announcements at startup:
|
||||
Announcements and voicemail greetings are synthesized using [Kokoro TTS](https://github.com/mzdk100/kokoro) — an 82M parameter neural model running via ONNX Runtime directly in the Rust process:
|
||||
|
||||
- **24 kHz, 16-bit mono** output
|
||||
- **25+ voice presets** — American/British, male/female (e.g., `af_bella`, `am_adam`, `bf_emma`, `bm_george`)
|
||||
- **~800ms** synthesis time for a 3-second announcement
|
||||
- Pre-encoded to G.722 + Opus for zero-latency RTP playback during call setup
|
||||
- **~800ms** synthesis time for a 3-second phrase
|
||||
- Lazy-loaded on first use — no startup cost if TTS is unused
|
||||
- Falls back to `espeak-ng` if the ONNX model is not available
|
||||
|
||||
---
|
||||
|
||||
## 📧 Voicemail
|
||||
|
||||
- Configurable voicemail boxes with custom TTS greetings (text + voice) or uploaded WAV
|
||||
- Automatic routing on no-answer timeout (configurable, default 25s)
|
||||
- Recording with configurable max duration (default 120s) and message count limit (default 50)
|
||||
- Unheard message tracking for MWI (message waiting indication)
|
||||
- Web dashboard playback and management
|
||||
- WAV storage in `.nogit/voicemail/`
|
||||
|
||||
---
|
||||
|
||||
## 🔢 IVR (Interactive Voice Response)
|
||||
|
||||
- DTMF-navigable menus with configurable entries
|
||||
- Actions: route to extension, route to voicemail, transfer, submenu, hangup, repeat prompt
|
||||
- Custom TTS prompts per menu
|
||||
- Nested menu support
|
||||
|
||||
---
|
||||
|
||||
@@ -228,33 +298,54 @@ The `tts-engine` binary uses [Kokoro TTS](https://github.com/mzdk100/kokoro) (82
|
||||
|
||||
| View | Description |
|
||||
|------|-------------|
|
||||
| **Overview** | Stats tiles — uptime, providers, devices, active calls |
|
||||
| **Calls** | Active calls with leg details, codec info, packet counters. Add/remove legs, transfer, hangup |
|
||||
| **Phone** | Browser softphone — mic/speaker selection, audio meters, dial pad, incoming call popup |
|
||||
| **Contacts** | Contact management with click-to-call |
|
||||
| **Providers** | SIP trunk config with registration status |
|
||||
| **Log** | Live streaming log viewer |
|
||||
| 📊 **Overview** | Stats tiles — uptime, providers, devices, active calls |
|
||||
| 📞 **Calls** | Active calls with leg details, codec info, add/remove legs, transfer, hangup |
|
||||
| ☎️ **Phone** | Browser softphone — mic/speaker selection, audio meters, dial pad, incoming call popup |
|
||||
| 🔀 **Routes** | Routing rule management — match/action model with priority |
|
||||
| 📧 **Voicemail** | Voicemail box management + message playback |
|
||||
| 🔢 **IVR** | IVR menu builder — DTMF entries, TTS prompts, nested menus |
|
||||
| 👤 **Contacts** | Contact management with click-to-call |
|
||||
| 🔌 **Providers** | SIP trunk configuration and registration status |
|
||||
| 📋 **Log** | Live streaming log viewer |
|
||||
|
||||
### REST API
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/api/status` | GET | Full system status (providers, devices, calls) |
|
||||
| `/api/status` | GET | Full system status (providers, devices, calls, history) |
|
||||
| `/api/call` | POST | Originate a call |
|
||||
| `/api/hangup` | POST | Hang up a call |
|
||||
| `/api/call/:id/addleg` | POST | Add a leg to an active call |
|
||||
| `/api/call/:id/addexternal` | POST | Add an external participant |
|
||||
| `/api/call/:id/addleg` | POST | Add a device leg to an active call |
|
||||
| `/api/call/:id/addexternal` | POST | Add an external participant via provider |
|
||||
| `/api/call/:id/removeleg` | POST | Remove a leg from a call |
|
||||
| `/api/transfer` | POST | Transfer a call |
|
||||
| `/api/config` | GET/POST | Read or update configuration (hot-reload) |
|
||||
| `/api/config` | GET | Read current configuration |
|
||||
| `/api/config` | POST | Update configuration (hot-reload) |
|
||||
| `/api/voicemail/:box` | GET | List voicemail messages |
|
||||
| `/api/voicemail/:box/unheard` | GET | Get unheard message count |
|
||||
| `/api/voicemail/:box/:id/audio` | GET | Stream voicemail audio |
|
||||
| `/api/voicemail/:box/:id/heard` | POST | Mark a voicemail message as heard |
|
||||
| `/api/voicemail/:box/:id` | DELETE | Delete a voicemail message |
|
||||
|
||||
### WebSocket Events
|
||||
|
||||
Connect to `/ws` for real-time push:
|
||||
|
||||
```jsonc
|
||||
{ "type": "status", "data": { ... } } // Full status snapshot (1s interval)
|
||||
{ "type": "status", "data": { ... } } // Full status snapshot (1s interval)
|
||||
{ "type": "log", "data": { "message": "..." } } // Log lines in real-time
|
||||
{ "type": "call-update", "data": { ... } } // Call state change notification
|
||||
{ "type": "webrtc-answer", "data": { ... } } // WebRTC SDP answer for browser calls
|
||||
{ "type": "webrtc-error", "data": { ... } } // WebRTC signaling error
|
||||
```
|
||||
|
||||
Browser → server signaling:
|
||||
|
||||
```jsonc
|
||||
{ "type": "webrtc-offer", "data": { ... } } // Browser sends SDP offer
|
||||
{ "type": "webrtc-accept", "data": { ... } } // Browser accepts incoming call
|
||||
{ "type": "webrtc-ice", "data": { ... } } // ICE candidate exchange
|
||||
{ "type": "webrtc-hangup", "data": { ... } } // Browser hangs up
|
||||
```
|
||||
|
||||
---
|
||||
@@ -264,7 +355,7 @@ Connect to `/ws` for real-time push:
|
||||
| Port | Protocol | Purpose |
|
||||
|------|----------|---------|
|
||||
| 5070 (configurable) | UDP | SIP signaling |
|
||||
| 20000–20200 (configurable) | UDP | RTP relay (even ports, per-call allocation) |
|
||||
| 20000–20200 (configurable) | UDP | RTP media (even ports, per-call allocation) |
|
||||
| 3060 (configurable) | TCP | Web dashboard + WebSocket + REST API |
|
||||
|
||||
---
|
||||
@@ -275,28 +366,21 @@ Connect to `/ws` for real-time push:
|
||||
# Start in dev mode
|
||||
pnpm start
|
||||
|
||||
# Build Rust crates
|
||||
# Build Rust proxy-engine
|
||||
pnpm run buildRust
|
||||
|
||||
# Bundle web frontend
|
||||
pnpm run bundle
|
||||
|
||||
# Restart background server (build + bundle + restart)
|
||||
# Build + bundle + restart background server
|
||||
pnpm run restartBackground
|
||||
```
|
||||
|
||||
### Key Design Decisions
|
||||
|
||||
- **Hub Model** — Calls are N-leg hubs, not point-to-point. This enables multi-party, dynamic leg manipulation, and transfer without tearing down the call.
|
||||
- **Zero-dependency SIP library** — `ts/sip/` is a pure data-level SIP stack (parse/build/mutate/serialize). No transport or timer logic — those live in the application layer.
|
||||
- **Rust for the hot path** — Codec transcoding and noise suppression run in native Rust for real-time performance. TypeScript handles signaling and orchestration.
|
||||
- **Per-session codec isolation** — Each call gets its own Opus/G.722 encoder/decoder state in the Rust process, preventing stateful codec prediction from leaking between concurrent calls.
|
||||
|
||||
---
|
||||
|
||||
## License and Legal Information
|
||||
|
||||
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [license](./license) file.
|
||||
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [LICENSE](./LICENSE) file.
|
||||
|
||||
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
|
||||
|
||||
|
||||
249
rust/Cargo.lock
generated
249
rust/Cargo.lock
generated
@@ -237,6 +237,17 @@ version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "audiopus"
|
||||
version = "0.3.0-rc.0"
|
||||
@@ -487,6 +498,31 @@ dependencies = [
|
||||
"inout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "3.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags 1.3.2",
|
||||
"clap_lex",
|
||||
"indexmap 1.9.3",
|
||||
"once_cell",
|
||||
"strsim",
|
||||
"termcolor",
|
||||
"textwrap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
|
||||
dependencies = [
|
||||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.58"
|
||||
@@ -700,6 +736,125 @@ version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
|
||||
|
||||
[[package]]
|
||||
name = "dasp"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7381b67da416b639690ac77c73b86a7b5e64a29e31d1f75fb3b1102301ef355a"
|
||||
dependencies = [
|
||||
"dasp_envelope",
|
||||
"dasp_frame",
|
||||
"dasp_interpolate",
|
||||
"dasp_peak",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_rms",
|
||||
"dasp_sample",
|
||||
"dasp_signal",
|
||||
"dasp_slice",
|
||||
"dasp_window",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_envelope"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ec617ce7016f101a87fe85ed44180839744265fae73bb4aa43e7ece1b7668b6"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_peak",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_rms",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_frame"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2a3937f5fe2135702897535c8d4a5553f8b116f76c1529088797f2eee7c5cd6"
|
||||
dependencies = [
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_interpolate"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fc975a6563bb7ca7ec0a6c784ead49983a21c24835b0bc96eea11ee407c7486"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_peak"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cf88559d79c21f3d8523d91250c397f9a15b5fc72fbb3f87fdb0a37b79915bf"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_ring_buffer"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07d79e19b89618a543c4adec9c5a347fe378a19041699b3278e616e387511ea1"
|
||||
|
||||
[[package]]
|
||||
name = "dasp_rms"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6c5dcb30b7e5014486e2822537ea2beae50b19722ffe2ed7549ab03774575aa"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_sample"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f"
|
||||
|
||||
[[package]]
|
||||
name = "dasp_signal"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa1ab7d01689c6ed4eae3d38fe1cea08cba761573fbd2d592528d55b421077e7"
|
||||
dependencies = [
|
||||
"dasp_envelope",
|
||||
"dasp_frame",
|
||||
"dasp_interpolate",
|
||||
"dasp_peak",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_rms",
|
||||
"dasp_sample",
|
||||
"dasp_window",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_slice"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e1c7335d58e7baedafa516cb361360ff38d6f4d3f9d9d5ee2a2fc8e27178fa1"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_window"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99ded7b88821d2ce4e8b842c9f1c86ac911891ab89443cc1de750cae764c5076"
|
||||
dependencies = [
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data-encoding"
|
||||
version = "2.10.0"
|
||||
@@ -1214,6 +1369,12 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.5"
|
||||
@@ -1246,6 +1407,15 @@ version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hex"
|
||||
version = "0.4.3"
|
||||
@@ -1446,6 +1616,16 @@ dependencies = [
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown 0.12.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.14.0"
|
||||
@@ -1739,7 +1919,13 @@ version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "805d5964d1e7a0006a7fdced7dae75084d66d18b35f1dfe81bd76929b1f8da0c"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"dasp",
|
||||
"dasp_interpolate",
|
||||
"dasp_ring_buffer",
|
||||
"easyfft",
|
||||
"hound",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
@@ -1881,16 +2067,6 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opus-codec"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"codec-lib",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ort"
|
||||
version = "2.0.0-rc.11"
|
||||
@@ -1915,6 +2091,12 @@ dependencies = [
|
||||
"ureq",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
|
||||
|
||||
[[package]]
|
||||
name = "p256"
|
||||
version = "0.11.1"
|
||||
@@ -2188,6 +2370,9 @@ dependencies = [
|
||||
"base64 0.22.1",
|
||||
"codec-lib",
|
||||
"hound",
|
||||
"kokoro-tts",
|
||||
"nnnoiseless",
|
||||
"ort",
|
||||
"rand 0.8.5",
|
||||
"regex-lite",
|
||||
"serde",
|
||||
@@ -2890,6 +3075,21 @@ dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.16.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057"
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
@@ -3008,16 +3208,6 @@ dependencies = [
|
||||
"strength_reduce",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tts-engine"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"hound",
|
||||
"kokoro-tts",
|
||||
"ort",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "turn"
|
||||
version = "0.6.1"
|
||||
@@ -3261,7 +3451,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"wasm-encoder",
|
||||
"wasmparser",
|
||||
]
|
||||
@@ -3274,7 +3464,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"hashbrown 0.15.5",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"semver",
|
||||
]
|
||||
|
||||
@@ -3532,6 +3722,15 @@ version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
@@ -3581,7 +3780,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"heck",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"prettyplease",
|
||||
"syn 2.0.117",
|
||||
"wasm-metadata",
|
||||
@@ -3612,7 +3811,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags 2.11.0",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"log",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
@@ -3631,7 +3830,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"id-arena",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"log",
|
||||
"semver",
|
||||
"serde",
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"crates/codec-lib",
|
||||
"crates/opus-codec",
|
||||
"crates/tts-engine",
|
||||
"crates/sip-proto",
|
||||
"crates/proxy-engine",
|
||||
]
|
||||
|
||||
@@ -7,4 +7,4 @@ edition = "2021"
|
||||
audiopus = "0.3.0-rc.0"
|
||||
ezk-g722 = "0.1"
|
||||
rubato = "0.14"
|
||||
nnnoiseless = { version = "0.5", default-features = false }
|
||||
nnnoiseless = "0.5"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Audio codec library for the SIP router.
|
||||
//!
|
||||
//! Handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding with ML noise suppression.
|
||||
//! Used by both the standalone `opus-codec` CLI and the `proxy-engine` binary.
|
||||
//! Used by the `proxy-engine` binary for all audio transcoding.
|
||||
|
||||
use audiopus::coder::{Decoder as OpusDecoder, Encoder as OpusEncoder};
|
||||
use audiopus::packet::Packet as OpusPacket;
|
||||
@@ -104,6 +104,8 @@ pub struct TranscodeState {
|
||||
g722_dec: libg722::decoder::Decoder,
|
||||
/// Cached FFT resamplers keyed by (from_rate, to_rate, chunk_size).
|
||||
resamplers: HashMap<(u32, u32, usize), FftFixedIn<f64>>,
|
||||
/// Cached f32 FFT resamplers keyed by (from_rate, to_rate, chunk_size).
|
||||
resamplers_f32: HashMap<(u32, u32, usize), FftFixedIn<f32>>,
|
||||
/// ML noise suppression for the SIP-bound direction.
|
||||
denoiser_to_sip: Box<DenoiseState<'static>>,
|
||||
/// ML noise suppression for the browser-bound direction.
|
||||
@@ -133,14 +135,17 @@ impl TranscodeState {
|
||||
g722_enc,
|
||||
g722_dec,
|
||||
resamplers: HashMap::new(),
|
||||
resamplers_f32: HashMap::new(),
|
||||
denoiser_to_sip: DenoiseState::new(),
|
||||
denoiser_to_browser: DenoiseState::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// High-quality sample rate conversion using rubato FFT resampler.
|
||||
/// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
|
||||
/// maintaining proper inter-frame state for continuous audio streams.
|
||||
///
|
||||
/// To maintain continuous filter state, the resampler always processes at a
|
||||
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||
pub fn resample(
|
||||
&mut self,
|
||||
pcm: &[i16],
|
||||
@@ -151,28 +156,61 @@ impl TranscodeState {
|
||||
return Ok(pcm.to_vec());
|
||||
}
|
||||
|
||||
let chunk = pcm.len();
|
||||
let key = (from_rate, to_rate, chunk);
|
||||
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||
let key = (from_rate, to_rate, canonical_chunk);
|
||||
|
||||
if !self.resamplers.contains_key(&key) {
|
||||
let r =
|
||||
FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
|
||||
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
|
||||
let r = FftFixedIn::<f64>::new(
|
||||
from_rate as usize,
|
||||
to_rate as usize,
|
||||
canonical_chunk,
|
||||
1,
|
||||
1,
|
||||
)
|
||||
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
|
||||
self.resamplers.insert(key, r);
|
||||
}
|
||||
let resampler = self.resamplers.get_mut(&key).unwrap();
|
||||
|
||||
let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
|
||||
let input = vec![float_in];
|
||||
let mut output = Vec::with_capacity(
|
||||
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||
);
|
||||
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
||||
let mut offset = 0;
|
||||
while offset < pcm.len() {
|
||||
let remaining = pcm.len() - offset;
|
||||
let copy_len = remaining.min(canonical_chunk);
|
||||
let mut chunk = vec![0.0f64; canonical_chunk];
|
||||
for i in 0..copy_len {
|
||||
chunk[i] = pcm[offset + i] as f64 / 32768.0;
|
||||
}
|
||||
|
||||
Ok(result[0]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect())
|
||||
let input = vec![chunk];
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
||||
|
||||
if remaining < canonical_chunk {
|
||||
let expected =
|
||||
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||
let take = expected.min(result[0].len());
|
||||
output.extend(
|
||||
result[0][..take]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||
);
|
||||
} else {
|
||||
output.extend(
|
||||
result[0]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||
);
|
||||
}
|
||||
|
||||
offset += canonical_chunk;
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
|
||||
@@ -293,6 +331,171 @@ impl TranscodeState {
|
||||
_ => Err(format!("unsupported target PT {pt}")),
|
||||
}
|
||||
}
|
||||
|
||||
// ---- f32 API for high-quality internal bus ----------------------------
|
||||
|
||||
/// Decode an encoded audio payload to f32 PCM samples in [-1.0, 1.0].
|
||||
/// Returns (samples, sample_rate).
|
||||
///
|
||||
/// For Opus, uses native float decode (no i16 quantization).
|
||||
/// For G.722/G.711, decodes to i16 then converts (codec is natively i16).
|
||||
pub fn decode_to_f32(&mut self, data: &[u8], pt: u8) -> Result<(Vec<f32>, u32), String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut pcm = vec![0.0f32; 5760]; // up to 120ms at 48kHz
|
||||
let packet =
|
||||
OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
|
||||
let out =
|
||||
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
|
||||
let n: usize = self
|
||||
.opus_dec
|
||||
.decode_float(Some(packet), out, false)
|
||||
.map_err(|e| format!("opus decode_float: {e}"))?
|
||||
.into();
|
||||
pcm.truncate(n);
|
||||
Ok((pcm, 48000))
|
||||
}
|
||||
_ => {
|
||||
// G.722, PCMU, PCMA: natively i16 codecs — decode then convert.
|
||||
let (pcm_i16, rate) = self.decode_to_pcm(data, pt)?;
|
||||
let pcm_f32 = pcm_i16.iter().map(|&s| s as f32 / 32768.0).collect();
|
||||
Ok((pcm_f32, rate))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Opus packet loss concealment — synthesize one frame to fill a gap.
|
||||
/// Returns f32 PCM at 48kHz. `frame_size` should be 960 for 20ms.
|
||||
pub fn opus_plc(&mut self, frame_size: usize) -> Result<Vec<f32>, String> {
|
||||
let mut pcm = vec![0.0f32; frame_size];
|
||||
let out = MutSignals::try_from(&mut pcm[..])
|
||||
.map_err(|e| format!("opus plc signals: {e}"))?;
|
||||
let n: usize = self
|
||||
.opus_dec
|
||||
.decode_float(None::<OpusPacket<'_>>, out, false)
|
||||
.map_err(|e| format!("opus plc: {e}"))?
|
||||
.into();
|
||||
pcm.truncate(n);
|
||||
Ok(pcm)
|
||||
}
|
||||
|
||||
/// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
|
||||
///
|
||||
/// For Opus, uses native float encode (no i16 quantization).
|
||||
/// For G.722/G.711, converts to i16 then encodes (codec is natively i16).
|
||||
pub fn encode_from_f32(&mut self, pcm: &[f32], pt: u8) -> Result<Vec<u8>, String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut buf = vec![0u8; 4000];
|
||||
let n: usize = self
|
||||
.opus_enc
|
||||
.encode_float(pcm, &mut buf)
|
||||
.map_err(|e| format!("opus encode_float: {e}"))?
|
||||
.into();
|
||||
buf.truncate(n);
|
||||
Ok(buf)
|
||||
}
|
||||
_ => {
|
||||
// G.722, PCMU, PCMA: natively i16 codecs.
|
||||
let pcm_i16: Vec<i16> = pcm
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect();
|
||||
self.encode_from_pcm(&pcm_i16, pt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
|
||||
///
|
||||
/// To maintain continuous filter state, the resampler always processes at a
|
||||
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||
pub fn resample_f32(
|
||||
&mut self,
|
||||
pcm: &[f32],
|
||||
from_rate: u32,
|
||||
to_rate: u32,
|
||||
) -> Result<Vec<f32>, String> {
|
||||
if from_rate == to_rate || pcm.is_empty() {
|
||||
return Ok(pcm.to_vec());
|
||||
}
|
||||
|
||||
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||
let key = (from_rate, to_rate, canonical_chunk);
|
||||
|
||||
if !self.resamplers_f32.contains_key(&key) {
|
||||
let r = FftFixedIn::<f32>::new(
|
||||
from_rate as usize,
|
||||
to_rate as usize,
|
||||
canonical_chunk,
|
||||
1,
|
||||
1,
|
||||
)
|
||||
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
self.resamplers_f32.insert(key, r);
|
||||
}
|
||||
let resampler = self.resamplers_f32.get_mut(&key).unwrap();
|
||||
|
||||
let mut output = Vec::with_capacity(
|
||||
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||
);
|
||||
|
||||
let mut offset = 0;
|
||||
while offset < pcm.len() {
|
||||
let remaining = pcm.len() - offset;
|
||||
let mut chunk = vec![0.0f32; canonical_chunk];
|
||||
let copy_len = remaining.min(canonical_chunk);
|
||||
chunk[..copy_len].copy_from_slice(&pcm[offset..offset + copy_len]);
|
||||
|
||||
let input = vec![chunk];
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
|
||||
if remaining < canonical_chunk {
|
||||
let expected =
|
||||
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||
output.extend_from_slice(&result[0][..expected.min(result[0].len())]);
|
||||
} else {
|
||||
output.extend_from_slice(&result[0]);
|
||||
}
|
||||
|
||||
offset += canonical_chunk;
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.
|
||||
/// Processes in 480-sample (10ms) frames. State persists across calls.
|
||||
/// Operates natively in f32 — no i16 conversion overhead.
|
||||
pub fn denoise_f32(denoiser: &mut DenoiseState, pcm: &[f32]) -> Vec<f32> {
|
||||
let frame_size = DenoiseState::FRAME_SIZE; // 480
|
||||
let total = pcm.len();
|
||||
let whole = (total / frame_size) * frame_size;
|
||||
let mut output = Vec::with_capacity(total);
|
||||
let mut out_buf = [0.0f32; 480];
|
||||
|
||||
// nnnoiseless expects f32 samples scaled as i16 range (-32768..32767).
|
||||
for offset in (0..whole).step_by(frame_size) {
|
||||
let input: Vec<f32> = pcm[offset..offset + frame_size]
|
||||
.iter()
|
||||
.map(|&s| s * 32768.0)
|
||||
.collect();
|
||||
denoiser.process_frame(&mut out_buf, &input);
|
||||
output.extend(out_buf.iter().map(|&s| s / 32768.0));
|
||||
}
|
||||
if whole < total {
|
||||
output.extend_from_slice(&pcm[whole..]);
|
||||
}
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new standalone denoiser for per-leg inbound processing.
|
||||
pub fn new_denoiser() -> Box<DenoiseState<'static>> {
|
||||
DenoiseState::new()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
[package]
|
||||
name = "opus-codec"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "opus-codec"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
codec-lib = { path = "../codec-lib" }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
base64 = "0.22"
|
||||
@@ -1,286 +0,0 @@
|
||||
/// Audio transcoding bridge for smartrust.
|
||||
///
|
||||
/// Thin CLI wrapper around `codec-lib`. Handles Opus ↔ G.722 ↔ PCMU transcoding.
|
||||
///
|
||||
/// Protocol:
|
||||
/// -> {"id":"1","method":"init","params":{}}
|
||||
/// <- {"id":"1","success":true,"result":{}}
|
||||
/// -> {"id":"2","method":"create_session","params":{"session_id":"call-abc"}}
|
||||
/// <- {"id":"2","success":true,"result":{}}
|
||||
/// -> {"id":"3","method":"transcode","params":{"session_id":"call-abc","data_b64":"...","from_pt":111,"to_pt":9}}
|
||||
/// <- {"id":"3","success":true,"result":{"data_b64":"..."}}
|
||||
/// -> {"id":"4","method":"destroy_session","params":{"session_id":"call-abc"}}
|
||||
/// <- {"id":"4","success":true,"result":{}}
|
||||
|
||||
use base64::engine::general_purpose::STANDARD as B64;
|
||||
use base64::Engine as _;
|
||||
use codec_lib::{codec_sample_rate, TranscodeState};
|
||||
use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::io::{self, BufRead, Write};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Request {
|
||||
id: String,
|
||||
method: String,
|
||||
#[serde(default)]
|
||||
params: serde_json::Value,
|
||||
}
|
||||
|
||||
fn respond(
|
||||
out: &mut impl Write,
|
||||
id: &str,
|
||||
success: bool,
|
||||
result: Option<serde_json::Value>,
|
||||
error: Option<&str>,
|
||||
) {
|
||||
let mut resp = serde_json::json!({ "id": id, "success": success });
|
||||
if let Some(r) = result {
|
||||
resp["result"] = r;
|
||||
}
|
||||
if let Some(e) = error {
|
||||
resp["error"] = serde_json::Value::String(e.to_string());
|
||||
}
|
||||
let _ = writeln!(out, "{}", resp);
|
||||
let _ = out.flush();
|
||||
}
|
||||
|
||||
/// Resolve a session: if session_id is provided, look it up in the sessions map;
|
||||
/// otherwise fall back to the default state (backward compat with `init`).
|
||||
fn get_session<'a>(
|
||||
sessions: &'a mut HashMap<String, TranscodeState>,
|
||||
default: &'a mut Option<TranscodeState>,
|
||||
params: &serde_json::Value,
|
||||
) -> Option<&'a mut TranscodeState> {
|
||||
if let Some(sid) = params.get("session_id").and_then(|v| v.as_str()) {
|
||||
sessions.get_mut(sid)
|
||||
} else {
|
||||
default.as_mut()
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let stdin = io::stdin();
|
||||
let stdout = io::stdout();
|
||||
let mut out = io::BufWriter::new(stdout.lock());
|
||||
|
||||
let _ = writeln!(out, r#"{{"event":"ready","data":{{}}}}"#);
|
||||
let _ = out.flush();
|
||||
|
||||
let mut default_state: Option<TranscodeState> = None;
|
||||
let mut sessions: HashMap<String, TranscodeState> = HashMap::new();
|
||||
|
||||
for line in stdin.lock().lines() {
|
||||
let line = match line {
|
||||
Ok(l) if !l.trim().is_empty() => l,
|
||||
Ok(_) => continue,
|
||||
Err(_) => break,
|
||||
};
|
||||
|
||||
let req: Request = match serde_json::from_str(&line) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
respond(&mut out, "", false, None, Some(&format!("parse: {e}")));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match req.method.as_str() {
|
||||
"init" => match TranscodeState::new() {
|
||||
Ok(s) => {
|
||||
default_state = Some(s);
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
}
|
||||
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
|
||||
},
|
||||
|
||||
"create_session" => {
|
||||
let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
respond(&mut out, &req.id, false, None, Some("missing session_id"));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if sessions.contains_key(&session_id) {
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
continue;
|
||||
}
|
||||
match TranscodeState::new() {
|
||||
Ok(s) => {
|
||||
sessions.insert(session_id, s);
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
}
|
||||
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
|
||||
}
|
||||
}
|
||||
|
||||
"destroy_session" => {
|
||||
let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(&mut out, &req.id, false, None, Some("missing session_id"));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
sessions.remove(session_id);
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
}
|
||||
|
||||
"transcode" => {
|
||||
let st = match get_session(&mut sessions, &mut default_state, &req.params) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some("not initialized (no session or default state)"),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(&mut out, &req.id, false, None, Some("missing data_b64"));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let from_pt =
|
||||
req.params.get("from_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
|
||||
let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
|
||||
let direction = req.params.get("direction").and_then(|v| v.as_str());
|
||||
|
||||
let data = match B64.decode(data_b64) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some(&format!("b64: {e}")),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match st.transcode(&data, from_pt, to_pt, direction) {
|
||||
Ok(result) => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
true,
|
||||
Some(serde_json::json!({ "data_b64": B64.encode(&result) })),
|
||||
None,
|
||||
);
|
||||
}
|
||||
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
|
||||
}
|
||||
}
|
||||
|
||||
"encode_pcm" => {
|
||||
let st = match get_session(&mut sessions, &mut default_state, &req.params) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some("not initialized (no session or default state)"),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(&mut out, &req.id, false, None, Some("missing data_b64"));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let sample_rate = req
|
||||
.params
|
||||
.get("sample_rate")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(22050) as u32;
|
||||
let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(9) as u8;
|
||||
|
||||
let data = match B64.decode(data_b64) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some(&format!("b64: {e}")),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if data.len() % 2 != 0 {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some("PCM data has odd byte count (expected 16-bit LE samples)"),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let pcm: Vec<i16> = data
|
||||
.chunks_exact(2)
|
||||
.map(|c| i16::from_le_bytes([c[0], c[1]]))
|
||||
.collect();
|
||||
|
||||
let target_rate = codec_sample_rate(to_pt);
|
||||
let resampled = match st.resample(&pcm, sample_rate, target_rate) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
respond(&mut out, &req.id, false, None, Some(&e));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match st.encode_from_pcm(&resampled, to_pt) {
|
||||
Ok(encoded) => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
true,
|
||||
Some(serde_json::json!({ "data_b64": B64.encode(&encoded) })),
|
||||
None,
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
respond(&mut out, &req.id, false, None, Some(&e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
"encode" | "decode" => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some("use 'transcode' command instead"),
|
||||
);
|
||||
}
|
||||
|
||||
_ => respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some(&format!("unknown: {}", req.method)),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ path = "src/main.rs"
|
||||
[dependencies]
|
||||
codec-lib = { path = "../codec-lib" }
|
||||
sip-proto = { path = "../sip-proto" }
|
||||
nnnoiseless = "0.5"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
@@ -18,3 +19,8 @@ regex-lite = "0.1"
|
||||
webrtc = "0.8"
|
||||
rand = "0.8"
|
||||
hound = "3.5"
|
||||
kokoro-tts = { version = "0.3", default-features = false }
|
||||
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
|
||||
"std", "download-binaries", "copy-dylibs", "ndarray",
|
||||
"tls-native-vendored"
|
||||
] }
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//! Audio player — reads a WAV file and streams it as RTP packets.
|
||||
//! Also provides prompt preparation for the leg interaction system.
|
||||
|
||||
use crate::rtp::{build_rtp_header, rtp_clock_increment};
|
||||
use codec_lib::{codec_sample_rate, TranscodeState};
|
||||
@@ -8,6 +9,11 @@ use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::time::{self, Duration};
|
||||
|
||||
/// Mixing sample rate used by the mixer (must stay in sync with mixer::MIX_RATE).
|
||||
const MIX_RATE: u32 = 48000;
|
||||
/// Samples per 20ms frame at the mixing rate.
|
||||
const MIX_FRAME_SIZE: usize = 960;
|
||||
|
||||
/// Play a WAV file as RTP to a destination.
|
||||
/// Returns when playback is complete.
|
||||
pub async fn play_wav_file(
|
||||
@@ -171,3 +177,64 @@ pub async fn play_beep(
|
||||
|
||||
Ok((seq, ts))
|
||||
}
|
||||
|
||||
/// Load a WAV file and split it into 20ms f32 PCM frames at 48kHz.
|
||||
/// Used by the leg interaction system to prepare prompt audio for the mixer.
|
||||
pub fn load_prompt_pcm_frames(wav_path: &str) -> Result<Vec<Vec<f32>>, String> {
|
||||
let path = Path::new(wav_path);
|
||||
if !path.exists() {
|
||||
return Err(format!("WAV file not found: {wav_path}"));
|
||||
}
|
||||
|
||||
let mut reader =
|
||||
hound::WavReader::open(path).map_err(|e| format!("open WAV {wav_path}: {e}"))?;
|
||||
let spec = reader.spec();
|
||||
let wav_rate = spec.sample_rate;
|
||||
|
||||
// Read all samples as f32 in [-1.0, 1.0].
|
||||
let samples: Vec<f32> = if spec.bits_per_sample == 16 {
|
||||
reader
|
||||
.samples::<i16>()
|
||||
.filter_map(|s| s.ok())
|
||||
.map(|s| s as f32 / 32768.0)
|
||||
.collect()
|
||||
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
|
||||
reader
|
||||
.samples::<f32>()
|
||||
.filter_map(|s| s.ok())
|
||||
.collect()
|
||||
} else {
|
||||
return Err(format!(
|
||||
"unsupported WAV format: {}bit {:?}",
|
||||
spec.bits_per_sample, spec.sample_format
|
||||
));
|
||||
};
|
||||
|
||||
if samples.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Resample to MIX_RATE (48kHz) if needed.
|
||||
let resampled = if wav_rate != MIX_RATE {
|
||||
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
||||
transcoder
|
||||
.resample_f32(&samples, wav_rate, MIX_RATE)
|
||||
.map_err(|e| format!("resample: {e}"))?
|
||||
} else {
|
||||
samples
|
||||
};
|
||||
|
||||
// Split into MIX_FRAME_SIZE (960) sample frames.
|
||||
let mut frames = Vec::new();
|
||||
let mut offset = 0;
|
||||
while offset < resampled.len() {
|
||||
let end = (offset + MIX_FRAME_SIZE).min(resampled.len());
|
||||
let mut frame = resampled[offset..end].to_vec();
|
||||
// Pad short final frame with silence.
|
||||
frame.resize(MIX_FRAME_SIZE, 0.0);
|
||||
frames.push(frame);
|
||||
offset += MIX_FRAME_SIZE;
|
||||
}
|
||||
|
||||
Ok(frames)
|
||||
}
|
||||
|
||||
@@ -1,12 +1,20 @@
|
||||
//! Call hub — owns legs and bridges media.
|
||||
//! Call hub — owns N legs and a mixer task.
|
||||
//!
|
||||
//! Each Call has a unique ID and tracks its state, direction, and associated
|
||||
//! SIP Call-IDs for message routing.
|
||||
//! Every call has a central mixer that provides mix-minus audio to all
|
||||
//! participants. Legs can be added and removed dynamically mid-call.
|
||||
|
||||
use crate::mixer::{MixerCommand, RtpPacket};
|
||||
use crate::sip_leg::SipLeg;
|
||||
use sip_proto::message::SipMessage;
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
pub type LegId = String;
|
||||
|
||||
/// Call state machine.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
@@ -15,20 +23,20 @@ pub enum CallState {
|
||||
Ringing,
|
||||
Connected,
|
||||
Voicemail,
|
||||
Ivr,
|
||||
Terminating,
|
||||
Terminated,
|
||||
}
|
||||
|
||||
impl CallState {
|
||||
/// Wire-format string for events/dashboards. Not currently emitted —
|
||||
/// call state changes flow as typed events (`call_answered`, etc.) —
|
||||
/// but kept for future status-snapshot work.
|
||||
#[allow(dead_code)]
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::SettingUp => "setting-up",
|
||||
Self::Ringing => "ringing",
|
||||
Self::Connected => "connected",
|
||||
Self::Voicemail => "voicemail",
|
||||
Self::Ivr => "ivr",
|
||||
Self::Terminating => "terminating",
|
||||
Self::Terminated => "terminated",
|
||||
}
|
||||
}
|
||||
@@ -41,6 +49,8 @@ pub enum CallDirection {
|
||||
}
|
||||
|
||||
impl CallDirection {
|
||||
/// Wire-format string. See CallState::as_str.
|
||||
#[allow(dead_code)]
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Inbound => "inbound",
|
||||
@@ -49,55 +59,185 @@ impl CallDirection {
|
||||
}
|
||||
}
|
||||
|
||||
/// A passthrough call — both sides share the same SIP Call-ID.
|
||||
/// The proxy rewrites SDP/Contact/Request-URI and relays RTP.
|
||||
pub struct PassthroughCall {
|
||||
/// The type of a call leg.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum LegKind {
|
||||
SipProvider,
|
||||
SipDevice,
|
||||
WebRtc,
|
||||
/// Voicemail playback, IVR prompt playback, recording — not yet wired up
|
||||
/// as a distinct leg kind (those paths currently use the mixer's role
|
||||
/// system instead). Kept behind allow so adding a real media leg later
|
||||
/// doesn't require re-introducing the variant.
|
||||
#[allow(dead_code)]
|
||||
Media,
|
||||
Tool, // observer leg for recording, transcription, etc.
|
||||
}
|
||||
|
||||
impl LegKind {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::SipProvider => "sip-provider",
|
||||
Self::SipDevice => "sip-device",
|
||||
Self::WebRtc => "webrtc",
|
||||
Self::Media => "media",
|
||||
Self::Tool => "tool",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-leg state.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum LegState {
|
||||
Inviting,
|
||||
Ringing,
|
||||
Connected,
|
||||
Terminated,
|
||||
}
|
||||
|
||||
impl LegState {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Inviting => "inviting",
|
||||
Self::Ringing => "ringing",
|
||||
Self::Connected => "connected",
|
||||
Self::Terminated => "terminated",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a single leg in a call.
|
||||
pub struct LegInfo {
|
||||
pub id: LegId,
|
||||
pub kind: LegKind,
|
||||
pub state: LegState,
|
||||
pub codec_pt: u8,
|
||||
|
||||
/// For SIP legs: the SIP dialog manager (handles 407 auth, BYE, etc).
|
||||
pub sip_leg: Option<SipLeg>,
|
||||
/// For SIP legs: the SIP Call-ID for message routing.
|
||||
pub sip_call_id: Option<String>,
|
||||
/// For WebRTC legs: the session ID in WebRtcEngine.
|
||||
///
|
||||
/// Populated at leg creation but not yet consumed by the hub —
|
||||
/// WebRTC session lookup currently goes through the session registry
|
||||
/// directly. Kept for introspection/debugging.
|
||||
#[allow(dead_code)]
|
||||
pub webrtc_session_id: Option<String>,
|
||||
/// The RTP socket allocated for this leg.
|
||||
pub rtp_socket: Option<Arc<UdpSocket>>,
|
||||
/// The RTP port number.
|
||||
pub rtp_port: u16,
|
||||
/// Public IP to advertise in SDP/Record-Route when THIS leg is the
|
||||
/// destination of a rewrite. Populated only for provider legs; `None`
|
||||
/// for LAN SIP devices, WebRTC browsers, media, and tool legs (which
|
||||
/// are reachable via `lan_ip`). See `route_passthrough_message` for
|
||||
/// the per-destination advertise-IP logic.
|
||||
pub public_ip: Option<String>,
|
||||
/// The remote media endpoint (learned from SDP or address learning).
|
||||
pub remote_media: Option<SocketAddr>,
|
||||
/// SIP signaling address (provider or device).
|
||||
pub signaling_addr: Option<SocketAddr>,
|
||||
|
||||
/// Flexible key-value metadata (consent state, tool config, etc.).
|
||||
/// Persisted into call history on call end.
|
||||
pub metadata: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
/// A multiparty call with N legs and a central mixer.
|
||||
pub struct Call {
|
||||
// Duplicated from the HashMap key in CallManager. Kept for future
|
||||
// status-snapshot work.
|
||||
#[allow(dead_code)]
|
||||
pub id: String,
|
||||
pub sip_call_id: String,
|
||||
pub state: CallState,
|
||||
// Populated at call creation but not currently consumed — dashboard
|
||||
// pull snapshots are gone (push events only).
|
||||
#[allow(dead_code)]
|
||||
pub direction: CallDirection,
|
||||
pub created_at: Instant,
|
||||
|
||||
// Call metadata.
|
||||
// Metadata.
|
||||
pub caller_number: Option<String>,
|
||||
pub callee_number: Option<String>,
|
||||
#[allow(dead_code)]
|
||||
pub provider_id: String,
|
||||
|
||||
// Provider side.
|
||||
pub provider_addr: SocketAddr,
|
||||
pub provider_media: Option<SocketAddr>,
|
||||
/// Original INVITE from the device (for device-originated outbound calls).
|
||||
/// Used to construct proper 180/200/error responses back to the device.
|
||||
pub device_invite: Option<SipMessage>,
|
||||
|
||||
// Device side.
|
||||
pub device_addr: SocketAddr,
|
||||
pub device_media: Option<SocketAddr>,
|
||||
/// All legs in this call, keyed by leg ID.
|
||||
pub legs: HashMap<LegId, LegInfo>,
|
||||
|
||||
// RTP relay.
|
||||
pub rtp_port: u16,
|
||||
pub rtp_socket: Arc<UdpSocket>,
|
||||
/// Channel to send commands to the mixer task.
|
||||
pub mixer_cmd_tx: mpsc::Sender<MixerCommand>,
|
||||
|
||||
// Packet counters.
|
||||
pub pkt_from_device: u64,
|
||||
pub pkt_from_provider: u64,
|
||||
/// Handle to the mixer task (aborted on call teardown).
|
||||
mixer_task: Option<JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl PassthroughCall {
|
||||
impl Call {
|
||||
pub fn new(
|
||||
id: String,
|
||||
direction: CallDirection,
|
||||
provider_id: String,
|
||||
mixer_cmd_tx: mpsc::Sender<MixerCommand>,
|
||||
mixer_task: JoinHandle<()>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
state: CallState::SettingUp,
|
||||
direction,
|
||||
created_at: Instant::now(),
|
||||
caller_number: None,
|
||||
callee_number: None,
|
||||
provider_id,
|
||||
device_invite: None,
|
||||
legs: HashMap::new(),
|
||||
mixer_cmd_tx,
|
||||
mixer_task: Some(mixer_task),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a leg to the mixer. Sends the AddLeg command with channel endpoints.
|
||||
pub async fn add_leg_to_mixer(
|
||||
&self,
|
||||
leg_id: &str,
|
||||
codec_pt: u8,
|
||||
inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
) {
|
||||
let _ = self
|
||||
.mixer_cmd_tx
|
||||
.send(MixerCommand::AddLeg {
|
||||
leg_id: leg_id.to_string(),
|
||||
codec_pt,
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Remove a leg from the mixer.
|
||||
pub async fn remove_leg_from_mixer(&self, leg_id: &str) {
|
||||
let _ = self
|
||||
.mixer_cmd_tx
|
||||
.send(MixerCommand::RemoveLeg {
|
||||
leg_id: leg_id.to_string(),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
pub fn duration_secs(&self) -> u64 {
|
||||
self.created_at.elapsed().as_secs()
|
||||
}
|
||||
|
||||
pub fn to_status_json(&self) -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"id": self.id,
|
||||
"state": self.state.as_str(),
|
||||
"direction": self.direction.as_str(),
|
||||
"callerNumber": self.caller_number,
|
||||
"calleeNumber": self.callee_number,
|
||||
"providerUsed": self.provider_id,
|
||||
"createdAt": self.created_at.elapsed().as_millis(),
|
||||
"duration": self.duration_secs(),
|
||||
"rtpPort": self.rtp_port,
|
||||
"pktFromDevice": self.pkt_from_device,
|
||||
"pktFromProvider": self.pkt_from_provider,
|
||||
})
|
||||
/// Shut down the mixer and abort its task.
|
||||
pub async fn shutdown_mixer(&mut self) {
|
||||
let _ = self.mixer_cmd_tx.send(MixerCommand::Shutdown).await;
|
||||
if let Some(handle) = self.mixer_task.take() {
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,11 @@ impl Endpoint {
|
||||
}
|
||||
|
||||
/// Provider quirks for codec/protocol workarounds.
|
||||
//
|
||||
// Deserialized from provider config for TS parity. Early-media silence
|
||||
// injection and related workarounds are not yet ported to the Rust engine,
|
||||
// so every field is populated by serde but not yet consumed.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Quirks {
|
||||
#[serde(rename = "earlyMediaSilence")]
|
||||
@@ -44,6 +49,9 @@ pub struct Quirks {
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ProviderConfig {
|
||||
pub id: String,
|
||||
// UI label — populated by serde for parity with the TS config, not
|
||||
// consumed at runtime.
|
||||
#[allow(dead_code)]
|
||||
#[serde(rename = "displayName")]
|
||||
pub display_name: String,
|
||||
pub domain: String,
|
||||
@@ -54,6 +62,8 @@ pub struct ProviderConfig {
|
||||
#[serde(rename = "registerIntervalSec")]
|
||||
pub register_interval_sec: u32,
|
||||
pub codecs: Vec<u8>,
|
||||
// Workaround knobs populated by serde but not yet acted upon — see Quirks.
|
||||
#[allow(dead_code)]
|
||||
pub quirks: Quirks,
|
||||
}
|
||||
|
||||
@@ -84,6 +94,10 @@ pub struct RouteMatch {
|
||||
|
||||
/// Route action.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
// Several fields (voicemail_box, ivr_menu_id, no_answer_timeout) are read
|
||||
// by resolve_inbound_route but not yet honored downstream — see the
|
||||
// multi-target TODO in CallManager::create_inbound_call.
|
||||
#[allow(dead_code)]
|
||||
pub struct RouteAction {
|
||||
pub targets: Option<Vec<String>>,
|
||||
#[serde(rename = "ringBrowsers")]
|
||||
@@ -106,7 +120,11 @@ pub struct RouteAction {
|
||||
/// A routing rule.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Route {
|
||||
// `id` and `name` are UI identifiers, populated by serde but not
|
||||
// consumed by the resolvers.
|
||||
#[allow(dead_code)]
|
||||
pub id: String,
|
||||
#[allow(dead_code)]
|
||||
pub name: String,
|
||||
pub priority: i32,
|
||||
pub enabled: bool,
|
||||
@@ -192,10 +210,18 @@ pub fn matches_pattern(pattern: Option<&str>, value: &str) -> bool {
|
||||
/// Result of resolving an outbound route.
|
||||
pub struct OutboundRouteResult {
|
||||
pub provider: ProviderConfig,
|
||||
// TODO: prefix rewriting is unfinished — this is computed but the
|
||||
// caller ignores it and uses the raw dialed number.
|
||||
#[allow(dead_code)]
|
||||
pub transformed_number: String,
|
||||
}
|
||||
|
||||
/// Result of resolving an inbound route.
|
||||
//
|
||||
// `device_ids` and `ring_browsers` are consumed by create_inbound_call.
|
||||
// The remaining fields (voicemail_box, ivr_menu_id, no_answer_timeout)
|
||||
// are resolved but not yet acted upon — see the multi-target TODO.
|
||||
#[allow(dead_code)]
|
||||
pub struct InboundRouteResult {
|
||||
pub device_ids: Vec<String>,
|
||||
pub ring_browsers: bool,
|
||||
|
||||
@@ -1,200 +0,0 @@
|
||||
//! DTMF detection — parses RFC 2833 telephone-event RTP packets.
|
||||
//!
|
||||
//! Deduplicates repeated packets (same digit sent multiple times with
|
||||
//! increasing duration) and fires once per detected digit.
|
||||
//!
|
||||
//! Ported from ts/call/dtmf-detector.ts.
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
|
||||
/// RFC 2833 event ID → character mapping.
|
||||
const EVENT_CHARS: &[char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '*', '#', 'A', 'B', 'C', 'D',
|
||||
];
|
||||
|
||||
/// Safety timeout: report digit if no End packet arrives within this many ms.
|
||||
const SAFETY_TIMEOUT_MS: u64 = 200;
|
||||
|
||||
/// DTMF detector for a single RTP stream.
|
||||
pub struct DtmfDetector {
|
||||
/// Negotiated telephone-event payload type (default 101).
|
||||
telephone_event_pt: u8,
|
||||
/// Clock rate for duration calculation (default 8000 Hz).
|
||||
clock_rate: u32,
|
||||
/// Call ID for event emission.
|
||||
call_id: String,
|
||||
|
||||
// Deduplication state.
|
||||
current_event_id: Option<u8>,
|
||||
current_event_ts: Option<u32>,
|
||||
current_event_reported: bool,
|
||||
current_event_duration: u16,
|
||||
|
||||
out_tx: OutTx,
|
||||
}
|
||||
|
||||
impl DtmfDetector {
|
||||
pub fn new(call_id: String, out_tx: OutTx) -> Self {
|
||||
Self {
|
||||
telephone_event_pt: 101,
|
||||
clock_rate: 8000,
|
||||
call_id,
|
||||
current_event_id: None,
|
||||
current_event_ts: None,
|
||||
current_event_reported: false,
|
||||
current_event_duration: 0,
|
||||
out_tx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Feed an RTP packet. Checks PT; ignores non-DTMF packets.
|
||||
/// Returns Some(digit_char) if a digit was detected.
|
||||
pub fn process_rtp(&mut self, data: &[u8]) -> Option<char> {
|
||||
if data.len() < 16 {
|
||||
return None; // 12-byte header + 4-byte telephone-event minimum
|
||||
}
|
||||
|
||||
let pt = data[1] & 0x7F;
|
||||
if pt != self.telephone_event_pt {
|
||||
return None;
|
||||
}
|
||||
|
||||
let marker = (data[1] & 0x80) != 0;
|
||||
let rtp_timestamp = u32::from_be_bytes([data[4], data[5], data[6], data[7]]);
|
||||
|
||||
// Parse telephone-event payload.
|
||||
let event_id = data[12];
|
||||
let end_bit = (data[13] & 0x80) != 0;
|
||||
let duration = u16::from_be_bytes([data[14], data[15]]);
|
||||
|
||||
if event_id as usize >= EVENT_CHARS.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Detect new event.
|
||||
let is_new = marker
|
||||
|| self.current_event_id != Some(event_id)
|
||||
|| self.current_event_ts != Some(rtp_timestamp);
|
||||
|
||||
if is_new {
|
||||
// Report pending unreported event.
|
||||
let pending = self.report_pending();
|
||||
|
||||
self.current_event_id = Some(event_id);
|
||||
self.current_event_ts = Some(rtp_timestamp);
|
||||
self.current_event_reported = false;
|
||||
self.current_event_duration = duration;
|
||||
|
||||
if pending.is_some() {
|
||||
return pending;
|
||||
}
|
||||
}
|
||||
|
||||
if duration > self.current_event_duration {
|
||||
self.current_event_duration = duration;
|
||||
}
|
||||
|
||||
// Report on End bit (first time only).
|
||||
if end_bit && !self.current_event_reported {
|
||||
self.current_event_reported = true;
|
||||
let digit = EVENT_CHARS[event_id as usize];
|
||||
let duration_ms = (self.current_event_duration as f64 / self.clock_rate as f64) * 1000.0;
|
||||
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"dtmf_digit",
|
||||
serde_json::json!({
|
||||
"call_id": self.call_id,
|
||||
"digit": digit.to_string(),
|
||||
"duration_ms": duration_ms.round() as u32,
|
||||
"source": "rfc2833",
|
||||
}),
|
||||
);
|
||||
|
||||
return Some(digit);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Report a pending unreported event.
|
||||
fn report_pending(&mut self) -> Option<char> {
|
||||
if let Some(event_id) = self.current_event_id {
|
||||
if !self.current_event_reported && (event_id as usize) < EVENT_CHARS.len() {
|
||||
self.current_event_reported = true;
|
||||
let digit = EVENT_CHARS[event_id as usize];
|
||||
let duration_ms =
|
||||
(self.current_event_duration as f64 / self.clock_rate as f64) * 1000.0;
|
||||
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"dtmf_digit",
|
||||
serde_json::json!({
|
||||
"call_id": self.call_id,
|
||||
"digit": digit.to_string(),
|
||||
"duration_ms": duration_ms.round() as u32,
|
||||
"source": "rfc2833",
|
||||
}),
|
||||
);
|
||||
|
||||
return Some(digit);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Process a SIP INFO message body for DTMF.
|
||||
pub fn process_sip_info(&mut self, content_type: &str, body: &str) -> Option<char> {
|
||||
let ct = content_type.to_ascii_lowercase();
|
||||
|
||||
if ct.contains("application/dtmf-relay") {
|
||||
// Format: "Signal= 5\r\nDuration= 160\r\n"
|
||||
let signal = body
|
||||
.lines()
|
||||
.find(|l| l.to_ascii_lowercase().starts_with("signal"))
|
||||
.and_then(|l| l.split('=').nth(1))
|
||||
.map(|s| s.trim().to_string())?;
|
||||
|
||||
if signal.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
let digit = signal.chars().next()?.to_ascii_uppercase();
|
||||
if !"0123456789*#ABCD".contains(digit) {
|
||||
return None;
|
||||
}
|
||||
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"dtmf_digit",
|
||||
serde_json::json!({
|
||||
"call_id": self.call_id,
|
||||
"digit": digit.to_string(),
|
||||
"source": "sip-info",
|
||||
}),
|
||||
);
|
||||
|
||||
return Some(digit);
|
||||
}
|
||||
|
||||
if ct.contains("application/dtmf") {
|
||||
let digit = body.trim().chars().next()?.to_ascii_uppercase();
|
||||
if !"0123456789*#ABCD".contains(digit) {
|
||||
return None;
|
||||
}
|
||||
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"dtmf_digit",
|
||||
serde_json::json!({
|
||||
"call_id": self.call_id,
|
||||
"digit": digit.to_string(),
|
||||
"source": "sip-info",
|
||||
}),
|
||||
);
|
||||
|
||||
return Some(digit);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
188
rust/crates/proxy-engine/src/jitter_buffer.rs
Normal file
188
rust/crates/proxy-engine/src/jitter_buffer.rs
Normal file
@@ -0,0 +1,188 @@
|
||||
//! Per-leg adaptive jitter buffer for the audio mixer.
|
||||
//!
|
||||
//! Sits between inbound RTP packet reception and the mixer's decode step.
|
||||
//! Reorders packets by sequence number and delivers exactly one frame per
|
||||
//! 20ms mixer tick, smoothing out network jitter. When a packet is missing,
|
||||
//! the mixer can invoke codec PLC to conceal the gap.
|
||||
|
||||
use crate::mixer::RtpPacket;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
/// Per-leg jitter buffer. Collects RTP packets keyed by sequence number,
|
||||
/// delivers one frame per 20ms tick in sequence order.
|
||||
///
|
||||
/// Adaptive target depth: starts at 3 frames (60ms), adjusts between
|
||||
/// 2–6 frames based on observed jitter.
|
||||
pub struct JitterBuffer {
|
||||
/// Packets waiting for playout, keyed by seq number.
|
||||
buffer: BTreeMap<u16, RtpPacket>,
|
||||
/// Next expected sequence number for playout.
|
||||
next_seq: Option<u16>,
|
||||
/// Target buffer depth in frames (adaptive).
|
||||
target_depth: u32,
|
||||
/// Current fill level high-water mark (for adaptation).
|
||||
max_fill_seen: u32,
|
||||
/// Ticks since last adaptation adjustment.
|
||||
adapt_counter: u32,
|
||||
/// Consecutive ticks where buffer was empty (for ramp-up).
|
||||
empty_streak: u32,
|
||||
/// Consecutive ticks where buffer had excess (for ramp-down).
|
||||
excess_streak: u32,
|
||||
/// Whether we've started playout (initial fill complete).
|
||||
playing: bool,
|
||||
/// Number of frames consumed since start (for stats).
|
||||
frames_consumed: u64,
|
||||
/// Number of frames lost (gap in sequence).
|
||||
frames_lost: u64,
|
||||
}
|
||||
|
||||
/// What the mixer gets back each tick.
|
||||
pub enum JitterResult {
|
||||
/// A packet is available for decoding.
|
||||
Packet(RtpPacket),
|
||||
/// Packet was expected but missing — invoke PLC.
|
||||
Missing,
|
||||
/// Buffer is in initial fill phase — output silence.
|
||||
Filling,
|
||||
}
|
||||
|
||||
impl JitterBuffer {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
buffer: BTreeMap::new(),
|
||||
next_seq: None,
|
||||
target_depth: 3, // 60ms initial target
|
||||
max_fill_seen: 0,
|
||||
adapt_counter: 0,
|
||||
empty_streak: 0,
|
||||
excess_streak: 0,
|
||||
playing: false,
|
||||
frames_consumed: 0,
|
||||
frames_lost: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Push a received RTP packet into the buffer.
|
||||
pub fn push(&mut self, pkt: RtpPacket) {
|
||||
// Ignore duplicates.
|
||||
if self.buffer.contains_key(&pkt.seq) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Detect large forward seq jump (hold/resume, SSRC change).
|
||||
if let Some(next) = self.next_seq {
|
||||
let jump = pkt.seq.wrapping_sub(next);
|
||||
if jump > 1000 && jump < 0x8000 {
|
||||
// Massive forward jump — reset buffer.
|
||||
self.reset();
|
||||
self.next_seq = Some(pkt.seq);
|
||||
}
|
||||
}
|
||||
|
||||
if self.next_seq.is_none() {
|
||||
self.next_seq = Some(pkt.seq);
|
||||
}
|
||||
|
||||
self.buffer.insert(pkt.seq, pkt);
|
||||
}
|
||||
|
||||
/// Consume one frame for the current 20ms tick.
|
||||
/// Called once per mixer tick per leg.
|
||||
pub fn consume(&mut self) -> JitterResult {
|
||||
// Track fill level for adaptation.
|
||||
let fill = self.buffer.len() as u32;
|
||||
if fill > self.max_fill_seen {
|
||||
self.max_fill_seen = fill;
|
||||
}
|
||||
|
||||
// Initial fill phase: wait until we have target_depth packets.
|
||||
if !self.playing {
|
||||
if fill >= self.target_depth {
|
||||
self.playing = true;
|
||||
} else {
|
||||
return JitterResult::Filling;
|
||||
}
|
||||
}
|
||||
|
||||
let seq = match self.next_seq {
|
||||
Some(s) => s,
|
||||
None => return JitterResult::Filling,
|
||||
};
|
||||
|
||||
// Advance next_seq (wrapping u16).
|
||||
self.next_seq = Some(seq.wrapping_add(1));
|
||||
|
||||
// Try to pull the expected sequence number.
|
||||
if let Some(pkt) = self.buffer.remove(&seq) {
|
||||
self.frames_consumed += 1;
|
||||
self.empty_streak = 0;
|
||||
|
||||
// Adaptive: if buffer is consistently deep, we can tighten.
|
||||
if fill > self.target_depth + 2 {
|
||||
self.excess_streak += 1;
|
||||
} else {
|
||||
self.excess_streak = 0;
|
||||
}
|
||||
|
||||
JitterResult::Packet(pkt)
|
||||
} else {
|
||||
// Packet missing — PLC needed.
|
||||
self.frames_lost += 1;
|
||||
self.empty_streak += 1;
|
||||
self.excess_streak = 0;
|
||||
|
||||
JitterResult::Missing
|
||||
}
|
||||
}
|
||||
|
||||
/// Run adaptation logic. Call every tick; internally gates to ~1s intervals.
|
||||
pub fn adapt(&mut self) {
|
||||
self.adapt_counter += 1;
|
||||
if self.adapt_counter < 50 {
|
||||
return;
|
||||
}
|
||||
self.adapt_counter = 0;
|
||||
|
||||
// If we had many empty ticks, increase depth.
|
||||
if self.empty_streak > 3 && self.target_depth < 6 {
|
||||
self.target_depth += 1;
|
||||
}
|
||||
// If buffer consistently overfull, decrease depth.
|
||||
else if self.excess_streak > 25 && self.target_depth > 2 {
|
||||
self.target_depth -= 1;
|
||||
}
|
||||
|
||||
self.max_fill_seen = 0;
|
||||
}
|
||||
|
||||
/// Discard packets that are too old (seq far behind next_seq).
|
||||
/// Prevents unbounded memory growth from reordered/late packets.
|
||||
pub fn prune_stale(&mut self) {
|
||||
if let Some(next) = self.next_seq {
|
||||
// Remove anything more than 100 frames behind playout point.
|
||||
// Use wrapping arithmetic: if (next - seq) > 100, it's stale.
|
||||
let stale: Vec<u16> = self
|
||||
.buffer
|
||||
.keys()
|
||||
.filter(|&&seq| {
|
||||
let age = next.wrapping_sub(seq);
|
||||
age > 100 && age < 0x8000 // < 0x8000 means it's actually behind, not ahead
|
||||
})
|
||||
.copied()
|
||||
.collect();
|
||||
for seq in stale {
|
||||
self.buffer.remove(&seq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset the buffer (e.g., after re-INVITE / hold-resume).
|
||||
pub fn reset(&mut self) {
|
||||
self.buffer.clear();
|
||||
self.next_seq = None;
|
||||
self.playing = false;
|
||||
self.empty_streak = 0;
|
||||
self.excess_streak = 0;
|
||||
self.adapt_counter = 0;
|
||||
}
|
||||
}
|
||||
100
rust/crates/proxy-engine/src/leg_io.rs
Normal file
100
rust/crates/proxy-engine/src/leg_io.rs
Normal file
@@ -0,0 +1,100 @@
|
||||
//! Leg I/O task spawners.
|
||||
//!
|
||||
//! Each SIP leg gets two tasks:
|
||||
//! - Inbound: recv_from on RTP socket → strip header → send RtpPacket to mixer channel
|
||||
//! - Outbound: recv encoded RTP from mixer channel → send_to remote media endpoint
|
||||
//!
|
||||
//! WebRTC leg I/O is handled inside webrtc_engine.rs (on_track + track.write).
|
||||
|
||||
use crate::mixer::RtpPacket;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
/// Channel pair for connecting a leg to the mixer.
|
||||
pub struct LegChannels {
|
||||
/// Mixer receives decoded packets from this leg.
|
||||
pub inbound_tx: mpsc::Sender<RtpPacket>,
|
||||
pub inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
/// Mixer sends encoded RTP to this leg.
|
||||
pub outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
pub outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
}
|
||||
|
||||
/// Create a channel pair for a leg.
|
||||
pub fn create_leg_channels() -> LegChannels {
|
||||
let (inbound_tx, inbound_rx) = mpsc::channel::<RtpPacket>(64);
|
||||
let (outbound_tx, outbound_rx) = mpsc::channel::<Vec<u8>>(8);
|
||||
LegChannels {
|
||||
inbound_tx,
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
outbound_rx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn the inbound I/O task for a SIP leg.
|
||||
/// Reads RTP from the socket, parses the variable-length header (RFC 3550),
|
||||
/// and sends the payload to the mixer.
|
||||
/// Returns the JoinHandle (exits when the inbound_tx channel is dropped).
|
||||
pub fn spawn_sip_inbound(
|
||||
rtp_socket: Arc<UdpSocket>,
|
||||
inbound_tx: mpsc::Sender<RtpPacket>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 1500];
|
||||
loop {
|
||||
match rtp_socket.recv_from(&mut buf).await {
|
||||
Ok((n, _from)) => {
|
||||
if n < 12 {
|
||||
continue; // Too small for RTP header.
|
||||
}
|
||||
let pt = buf[1] & 0x7F;
|
||||
let marker = (buf[1] & 0x80) != 0;
|
||||
let seq = u16::from_be_bytes([buf[2], buf[3]]);
|
||||
let timestamp = u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]);
|
||||
|
||||
// RFC 3550: header length = 12 + (CC * 4) + optional extension.
|
||||
let cc = (buf[0] & 0x0F) as usize;
|
||||
let has_extension = (buf[0] & 0x10) != 0;
|
||||
let mut offset = 12 + cc * 4;
|
||||
if has_extension {
|
||||
if offset + 4 > n {
|
||||
continue; // Malformed: extension header truncated.
|
||||
}
|
||||
let ext_len = u16::from_be_bytes([buf[offset + 2], buf[offset + 3]]) as usize;
|
||||
offset += 4 + ext_len * 4;
|
||||
}
|
||||
if offset >= n {
|
||||
continue; // No payload after header.
|
||||
}
|
||||
|
||||
let payload = buf[offset..n].to_vec();
|
||||
if payload.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if inbound_tx.send(RtpPacket { payload, payload_type: pt, marker, seq, timestamp }).await.is_err() {
|
||||
break; // Channel closed — leg removed.
|
||||
}
|
||||
}
|
||||
Err(_) => break, // Socket error.
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Spawn the outbound I/O task for a SIP leg.
|
||||
/// Reads encoded RTP packets from the mixer and sends them to the remote media endpoint.
|
||||
/// Returns the JoinHandle (exits when the outbound_rx channel is closed).
|
||||
pub fn spawn_sip_outbound(
|
||||
rtp_socket: Arc<UdpSocket>,
|
||||
remote_media: SocketAddr,
|
||||
mut outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
while let Some(rtp_data) = outbound_rx.recv().await {
|
||||
let _ = rtp_socket.send_to(&rtp_data, remote_media).await;
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -10,13 +10,18 @@ mod audio_player;
|
||||
mod call;
|
||||
mod call_manager;
|
||||
mod config;
|
||||
mod dtmf;
|
||||
mod ipc;
|
||||
mod jitter_buffer;
|
||||
mod leg_io;
|
||||
mod mixer;
|
||||
mod provider;
|
||||
mod recorder;
|
||||
mod registrar;
|
||||
mod rtp;
|
||||
mod sip_leg;
|
||||
mod sip_transport;
|
||||
mod tool_leg;
|
||||
mod tts;
|
||||
mod voicemail;
|
||||
mod webrtc_engine;
|
||||
|
||||
@@ -35,14 +40,15 @@ use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
|
||||
/// Shared mutable state for the proxy engine.
|
||||
/// Shared mutable state for the proxy engine (SIP side).
|
||||
/// WebRTC is intentionally kept in a separate lock to avoid contention
|
||||
/// between SIP packet handlers and WebRTC command handlers.
|
||||
struct ProxyEngine {
|
||||
config: Option<AppConfig>,
|
||||
transport: Option<SipTransport>,
|
||||
provider_mgr: ProviderManager,
|
||||
registrar: Registrar,
|
||||
call_mgr: CallManager,
|
||||
webrtc: WebRtcEngine,
|
||||
rtp_pool: Option<RtpPortPool>,
|
||||
out_tx: OutTx,
|
||||
}
|
||||
@@ -55,7 +61,6 @@ impl ProxyEngine {
|
||||
provider_mgr: ProviderManager::new(out_tx.clone()),
|
||||
registrar: Registrar::new(out_tx.clone()),
|
||||
call_mgr: CallManager::new(out_tx.clone()),
|
||||
webrtc: WebRtcEngine::new(out_tx.clone()),
|
||||
rtp_pool: None,
|
||||
out_tx,
|
||||
}
|
||||
@@ -83,9 +88,15 @@ async fn main() {
|
||||
// Emit ready event.
|
||||
emit_event(&out_tx, "ready", serde_json::json!({}));
|
||||
|
||||
// Shared engine state.
|
||||
// Shared engine state (SIP side).
|
||||
let engine = Arc::new(Mutex::new(ProxyEngine::new(out_tx.clone())));
|
||||
|
||||
// WebRTC engine — separate lock to avoid deadlock with SIP handlers.
|
||||
let webrtc = Arc::new(Mutex::new(WebRtcEngine::new(out_tx.clone())));
|
||||
|
||||
// TTS engine — separate lock, lazy-loads model on first use.
|
||||
let tts_engine = Arc::new(Mutex::new(tts::TtsEngine::new()));
|
||||
|
||||
// Read commands from stdin.
|
||||
let stdin = tokio::io::stdin();
|
||||
let reader = BufReader::new(stdin);
|
||||
@@ -105,25 +116,47 @@ async fn main() {
|
||||
};
|
||||
|
||||
let engine = engine.clone();
|
||||
let webrtc = webrtc.clone();
|
||||
let tts_engine = tts_engine.clone();
|
||||
let out_tx = out_tx.clone();
|
||||
|
||||
// Handle commands — some are async, so we spawn.
|
||||
tokio::spawn(async move {
|
||||
handle_command(engine, &out_tx, cmd).await;
|
||||
handle_command(engine, webrtc, tts_engine, &out_tx, cmd).await;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_command(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: Command) {
|
||||
async fn handle_command(
|
||||
engine: Arc<Mutex<ProxyEngine>>,
|
||||
webrtc: Arc<Mutex<WebRtcEngine>>,
|
||||
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: Command,
|
||||
) {
|
||||
match cmd.method.as_str() {
|
||||
// SIP commands — lock engine only.
|
||||
"configure" => handle_configure(engine, out_tx, &cmd).await,
|
||||
"hangup" => handle_hangup(engine, out_tx, &cmd).await,
|
||||
"make_call" => handle_make_call(engine, out_tx, &cmd).await,
|
||||
"get_status" => handle_get_status(engine, out_tx, &cmd).await,
|
||||
"webrtc_offer" => handle_webrtc_offer(engine, out_tx, &cmd).await,
|
||||
"webrtc_ice" => handle_webrtc_ice(engine, out_tx, &cmd).await,
|
||||
"webrtc_link" => handle_webrtc_link(engine, out_tx, &cmd).await,
|
||||
"webrtc_close" => handle_webrtc_close(engine, out_tx, &cmd).await,
|
||||
"add_leg" => handle_add_leg(engine, out_tx, &cmd).await,
|
||||
"remove_leg" => handle_remove_leg(engine, out_tx, &cmd).await,
|
||||
// WebRTC commands — lock webrtc only (no engine contention).
|
||||
"webrtc_offer" => handle_webrtc_offer(webrtc, out_tx, &cmd).await,
|
||||
"webrtc_ice" => handle_webrtc_ice(webrtc, out_tx, &cmd).await,
|
||||
"webrtc_close" => handle_webrtc_close(webrtc, out_tx, &cmd).await,
|
||||
// webrtc_link needs both: engine (for mixer channels) and webrtc (for session).
|
||||
"webrtc_link" => handle_webrtc_link(engine, webrtc, out_tx, &cmd).await,
|
||||
"add_device_leg" => handle_add_device_leg(engine, out_tx, &cmd).await,
|
||||
"transfer_leg" => handle_transfer_leg(engine, out_tx, &cmd).await,
|
||||
"replace_leg" => handle_replace_leg(engine, out_tx, &cmd).await,
|
||||
// Leg interaction and tool leg commands.
|
||||
"start_interaction" => handle_start_interaction(engine, out_tx, &cmd).await,
|
||||
"add_tool_leg" => handle_add_tool_leg(engine, out_tx, &cmd).await,
|
||||
"remove_tool_leg" => handle_remove_tool_leg(engine, out_tx, &cmd).await,
|
||||
"set_leg_metadata" => handle_set_leg_metadata(engine, out_tx, &cmd).await,
|
||||
// TTS command — lock tts_engine only (no SIP/WebRTC contention).
|
||||
"generate_tts" => handle_generate_tts(tts_engine, out_tx, &cmd).await,
|
||||
_ => respond_err(out_tx, &cmd.id, &format!("unknown command: {}", cmd.method)),
|
||||
}
|
||||
}
|
||||
@@ -246,14 +279,11 @@ async fn handle_sip_packet(
|
||||
}
|
||||
|
||||
// 3. Route to existing call by SIP Call-ID.
|
||||
// Check if this Call-ID belongs to an active call (avoids borrow conflict).
|
||||
if eng.call_mgr.has_call(msg.call_id()) {
|
||||
let config_ref = eng.config.as_ref().unwrap().clone();
|
||||
// Temporarily take registrar to avoid overlapping borrows.
|
||||
let registrar_dummy = Registrar::new(eng.out_tx.clone());
|
||||
if eng
|
||||
.call_mgr
|
||||
.route_sip_message(&msg, from_addr, socket, &config_ref, ®istrar_dummy)
|
||||
.route_sip_message(&msg, from_addr, socket, &config_ref)
|
||||
.await
|
||||
{
|
||||
return;
|
||||
@@ -298,7 +328,7 @@ async fn handle_sip_packet(
|
||||
..
|
||||
} = *eng;
|
||||
let rtp_pool = rtp_pool.as_mut().unwrap();
|
||||
let call_id = call_mgr
|
||||
let inbound = call_mgr
|
||||
.create_inbound_call(
|
||||
&msg,
|
||||
from_addr,
|
||||
@@ -312,7 +342,7 @@ async fn handle_sip_packet(
|
||||
)
|
||||
.await;
|
||||
|
||||
if let Some(call_id) = call_id {
|
||||
if let Some(inbound) = inbound {
|
||||
// Emit event so TypeScript knows about the call (for dashboard, IVR routing, etc).
|
||||
let from_header = msg.get_header("From").unwrap_or("");
|
||||
let from_uri = SipMessage::extract_uri(from_header).unwrap_or("Unknown");
|
||||
@@ -325,10 +355,11 @@ async fn handle_sip_packet(
|
||||
&eng.out_tx,
|
||||
"incoming_call",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"call_id": inbound.call_id,
|
||||
"from_uri": from_uri,
|
||||
"to_number": called_number,
|
||||
"provider_id": provider_id,
|
||||
"ring_browsers": inbound.ring_browsers,
|
||||
}),
|
||||
);
|
||||
}
|
||||
@@ -351,7 +382,7 @@ async fn handle_sip_packet(
|
||||
let route_result = config_ref.resolve_outbound_route(
|
||||
&dialed_number,
|
||||
device_id.as_deref(),
|
||||
&|pid: &str| {
|
||||
&|_pid: &str| {
|
||||
// Can't call async here — use a sync check.
|
||||
// For now, assume all configured providers are available.
|
||||
true
|
||||
@@ -359,11 +390,14 @@ async fn handle_sip_packet(
|
||||
);
|
||||
|
||||
if let Some(route) = route_result {
|
||||
let public_ip = if let Some(ps_arc) = eng.provider_mgr.find_by_address(&from_addr).await {
|
||||
// Look up provider state by config ID (not by device address).
|
||||
let (public_ip, registered_aor) = if let Some(ps_arc) =
|
||||
eng.provider_mgr.find_by_provider_id(&route.provider.id).await
|
||||
{
|
||||
let ps = ps_arc.lock().await;
|
||||
ps.public_ip.clone()
|
||||
(ps.public_ip.clone(), ps.registered_aor.clone())
|
||||
} else {
|
||||
None
|
||||
(None, format!("sip:{}@{}", route.provider.username, route.provider.domain))
|
||||
};
|
||||
|
||||
let ProxyEngine {
|
||||
@@ -373,7 +407,7 @@ async fn handle_sip_packet(
|
||||
} = *eng;
|
||||
let rtp_pool = rtp_pool.as_mut().unwrap();
|
||||
let call_id = call_mgr
|
||||
.create_outbound_passthrough(
|
||||
.create_device_outbound_call(
|
||||
&msg,
|
||||
from_addr,
|
||||
&route.provider,
|
||||
@@ -381,6 +415,7 @@ async fn handle_sip_packet(
|
||||
rtp_pool,
|
||||
socket,
|
||||
public_ip.as_deref(),
|
||||
®istered_aor,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -418,13 +453,6 @@ async fn handle_sip_packet(
|
||||
);
|
||||
}
|
||||
|
||||
/// Handle `get_status` — return active call statuses from Rust.
|
||||
async fn handle_get_status(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let eng = engine.lock().await;
|
||||
let calls = eng.call_mgr.get_all_statuses();
|
||||
respond_ok(out_tx, &cmd.id, serde_json::json!({ "calls": calls }));
|
||||
}
|
||||
|
||||
/// Handle `make_call` — initiate an outbound call to a number via a provider.
|
||||
async fn handle_make_call(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let number = match cmd.params.get("number").and_then(|v| v.as_str()) {
|
||||
@@ -524,7 +552,8 @@ async fn handle_hangup(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Co
|
||||
}
|
||||
|
||||
/// Handle `webrtc_offer` — browser sends SDP offer, we create PeerConnection and return answer.
|
||||
async fn handle_webrtc_offer(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
/// Uses only the WebRTC lock — no contention with SIP handlers.
|
||||
async fn handle_webrtc_offer(webrtc: Arc<Mutex<WebRtcEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let session_id = match cmd.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing session_id"); return; }
|
||||
@@ -534,8 +563,8 @@ async fn handle_webrtc_offer(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cm
|
||||
None => { respond_err(out_tx, &cmd.id, "missing sdp"); return; }
|
||||
};
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
match eng.webrtc.handle_offer(&session_id, &offer_sdp).await {
|
||||
let mut wrtc = webrtc.lock().await;
|
||||
match wrtc.handle_offer(&session_id, &offer_sdp).await {
|
||||
Ok(answer_sdp) => {
|
||||
respond_ok(out_tx, &cmd.id, serde_json::json!({
|
||||
"session_id": session_id,
|
||||
@@ -547,7 +576,8 @@ async fn handle_webrtc_offer(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cm
|
||||
}
|
||||
|
||||
/// Handle `webrtc_ice` — forward ICE candidate from browser to Rust PeerConnection.
|
||||
async fn handle_webrtc_ice(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
/// Uses only the WebRTC lock.
|
||||
async fn handle_webrtc_ice(webrtc: Arc<Mutex<WebRtcEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let session_id = match cmd.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing session_id"); return; }
|
||||
@@ -556,15 +586,22 @@ async fn handle_webrtc_ice(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd:
|
||||
let sdp_mid = cmd.params.get("sdp_mid").and_then(|v| v.as_str());
|
||||
let sdp_mline_index = cmd.params.get("sdp_mline_index").and_then(|v| v.as_u64()).map(|v| v as u16);
|
||||
|
||||
let eng = engine.lock().await;
|
||||
match eng.webrtc.add_ice_candidate(&session_id, candidate, sdp_mid, sdp_mline_index).await {
|
||||
let wrtc = webrtc.lock().await;
|
||||
match wrtc.add_ice_candidate(&session_id, candidate, sdp_mid, sdp_mline_index).await {
|
||||
Ok(()) => respond_ok(out_tx, &cmd.id, serde_json::json!({})),
|
||||
Err(e) => respond_err(out_tx, &cmd.id, &e),
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle `webrtc_link` — link a WebRTC session to a SIP call for audio bridging.
|
||||
async fn handle_webrtc_link(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
/// Handle `webrtc_link` — link a WebRTC session to a call's mixer for audio bridging.
|
||||
/// Creates channels, adds WebRTC leg to the call, wires the WebRTC engine.
|
||||
/// Locks are never held simultaneously — no deadlock possible.
|
||||
async fn handle_webrtc_link(
|
||||
engine: Arc<Mutex<ProxyEngine>>,
|
||||
webrtc: Arc<Mutex<WebRtcEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: &Command,
|
||||
) {
|
||||
let session_id = match cmd.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing session_id"); return; }
|
||||
@@ -573,34 +610,73 @@ async fn handle_webrtc_link(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
|
||||
};
|
||||
let provider_addr = match cmd.params.get("provider_media_addr").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing provider_media_addr"); return; }
|
||||
};
|
||||
let provider_port = match cmd.params.get("provider_media_port").and_then(|v| v.as_u64()) {
|
||||
Some(p) => p as u16,
|
||||
None => { respond_err(out_tx, &cmd.id, "missing provider_media_port"); return; }
|
||||
};
|
||||
let sip_pt = cmd.params.get("sip_pt").and_then(|v| v.as_u64()).unwrap_or(9) as u8;
|
||||
|
||||
let provider_media: SocketAddr = match format!("{provider_addr}:{provider_port}").parse() {
|
||||
Ok(a) => a,
|
||||
Err(e) => { respond_err(out_tx, &cmd.id, &format!("bad address: {e}")); return; }
|
||||
};
|
||||
// Create channels for the WebRTC leg.
|
||||
let channels = crate::leg_io::create_leg_channels();
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
let sip_socket = match &eng.transport {
|
||||
Some(t) => t.socket(),
|
||||
None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
|
||||
};
|
||||
// Briefly lock engine to add the WebRTC leg to the call's mixer.
|
||||
{
|
||||
let eng = engine.lock().await;
|
||||
let call = match eng.call_mgr.calls.get(&call_id) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
// Add to mixer via channel.
|
||||
call.add_leg_to_mixer(
|
||||
&session_id,
|
||||
codec_lib::PT_OPUS,
|
||||
channels.inbound_rx,
|
||||
channels.outbound_tx,
|
||||
)
|
||||
.await;
|
||||
} // engine lock released
|
||||
|
||||
let bridge_info = crate::webrtc_engine::SipBridgeInfo {
|
||||
provider_media,
|
||||
sip_pt,
|
||||
sip_socket,
|
||||
};
|
||||
// Lock webrtc to wire the channels.
|
||||
let mut wrtc = webrtc.lock().await;
|
||||
if wrtc
|
||||
.link_to_mixer(&session_id, &call_id, channels.inbound_tx, channels.outbound_rx)
|
||||
.await
|
||||
{
|
||||
// Also store the WebRTC leg info in the call.
|
||||
drop(wrtc); // Release webrtc lock before re-acquiring engine.
|
||||
{
|
||||
let mut eng = engine.lock().await;
|
||||
if let Some(call) = eng.call_mgr.calls.get_mut(&call_id) {
|
||||
call.legs.insert(
|
||||
session_id.clone(),
|
||||
crate::call::LegInfo {
|
||||
id: session_id.clone(),
|
||||
kind: crate::call::LegKind::WebRtc,
|
||||
state: crate::call::LegState::Connected,
|
||||
codec_pt: codec_lib::PT_OPUS,
|
||||
sip_leg: None,
|
||||
sip_call_id: None,
|
||||
webrtc_session_id: Some(session_id.clone()),
|
||||
rtp_socket: None,
|
||||
rtp_port: 0,
|
||||
public_ip: None,
|
||||
remote_media: None,
|
||||
signaling_addr: None,
|
||||
metadata: std::collections::HashMap::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
emit_event(out_tx, "leg_added", serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": session_id,
|
||||
"kind": "webrtc",
|
||||
"state": "connected",
|
||||
"codec": "Opus",
|
||||
"rtpPort": 0,
|
||||
"remoteMedia": null,
|
||||
"metadata": {},
|
||||
}));
|
||||
|
||||
if eng.webrtc.link_to_sip(&session_id, &call_id, bridge_info).await {
|
||||
respond_ok(out_tx, &cmd.id, serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"call_id": call_id,
|
||||
@@ -611,16 +687,557 @@ async fn handle_webrtc_link(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle `add_leg` — add a new SIP leg to an existing call.
|
||||
async fn handle_add_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
|
||||
};
|
||||
let number = match cmd.params.get("number").and_then(|v| v.as_str()) {
|
||||
Some(n) => n.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing number"); return; }
|
||||
};
|
||||
let provider_id = cmd.params.get("provider_id").and_then(|v| v.as_str());
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
let config_ref = match &eng.config {
|
||||
Some(c) => c.clone(),
|
||||
None => { respond_err(out_tx, &cmd.id, "not configured"); return; }
|
||||
};
|
||||
|
||||
// Resolve provider.
|
||||
let provider_config = if let Some(pid) = provider_id {
|
||||
config_ref.providers.iter().find(|p| p.id == pid).cloned()
|
||||
} else {
|
||||
config_ref.resolve_outbound_route(&number, None, &|_| true).map(|r| r.provider)
|
||||
};
|
||||
|
||||
let provider_config = match provider_config {
|
||||
Some(p) => p,
|
||||
None => { respond_err(out_tx, &cmd.id, "no provider available"); return; }
|
||||
};
|
||||
|
||||
// Get registered AOR.
|
||||
let registered_aor = if let Some(ps_arc) = eng.provider_mgr.find_by_address(
|
||||
&provider_config.outbound_proxy.to_socket_addr().unwrap_or_else(|| "0.0.0.0:0".parse().unwrap())
|
||||
).await {
|
||||
let ps = ps_arc.lock().await;
|
||||
ps.registered_aor.clone()
|
||||
} else {
|
||||
format!("sip:{}@{}", provider_config.username, provider_config.domain)
|
||||
};
|
||||
|
||||
let public_ip = if let Some(ps_arc) = eng.provider_mgr.find_by_address(
|
||||
&provider_config.outbound_proxy.to_socket_addr().unwrap_or_else(|| "0.0.0.0:0".parse().unwrap())
|
||||
).await {
|
||||
let ps = ps_arc.lock().await;
|
||||
ps.public_ip.clone()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let socket = match &eng.transport {
|
||||
Some(t) => t.socket(),
|
||||
None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
|
||||
};
|
||||
|
||||
let ProxyEngine { ref mut call_mgr, ref mut rtp_pool, .. } = *eng;
|
||||
let rtp_pool = rtp_pool.as_mut().unwrap();
|
||||
|
||||
let leg_id = call_mgr.add_external_leg(
|
||||
&call_id, &number, &provider_config, &config_ref,
|
||||
rtp_pool, &socket, public_ip.as_deref(), ®istered_aor,
|
||||
).await;
|
||||
|
||||
match leg_id {
|
||||
Some(lid) => respond_ok(out_tx, &cmd.id, serde_json::json!({ "leg_id": lid })),
|
||||
None => respond_err(out_tx, &cmd.id, "failed to add leg"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle `add_device_leg` — add a local SIP device to an existing call.
|
||||
async fn handle_add_device_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
|
||||
};
|
||||
let device_id = match cmd.params.get("device_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing device_id"); return; }
|
||||
};
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
let config_ref = match &eng.config {
|
||||
Some(c) => c.clone(),
|
||||
None => { respond_err(out_tx, &cmd.id, "not configured"); return; }
|
||||
};
|
||||
let socket = match &eng.transport {
|
||||
Some(t) => t.socket(),
|
||||
None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
|
||||
};
|
||||
|
||||
let ProxyEngine { ref registrar, ref mut call_mgr, ref mut rtp_pool, .. } = *eng;
|
||||
let rtp_pool = rtp_pool.as_mut().unwrap();
|
||||
|
||||
let leg_id = call_mgr.add_device_leg(
|
||||
&call_id, &device_id, registrar, &config_ref, rtp_pool, &socket,
|
||||
).await;
|
||||
|
||||
match leg_id {
|
||||
Some(lid) => respond_ok(out_tx, &cmd.id, serde_json::json!({ "leg_id": lid })),
|
||||
None => respond_err(out_tx, &cmd.id, "failed to add device leg — device not registered or call not found"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle `transfer_leg` — move a leg from one call to another.
|
||||
async fn handle_transfer_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let source_call_id = match cmd.params.get("source_call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing source_call_id"); return; }
|
||||
};
|
||||
let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
|
||||
};
|
||||
let target_call_id = match cmd.params.get("target_call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing target_call_id"); return; }
|
||||
};
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
if eng.call_mgr.transfer_leg(&source_call_id, &leg_id, &target_call_id).await {
|
||||
respond_ok(out_tx, &cmd.id, serde_json::json!({}));
|
||||
} else {
|
||||
respond_err(out_tx, &cmd.id, "transfer failed — call or leg not found");
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle `replace_leg` — terminate a leg and dial a replacement into the same call.
|
||||
async fn handle_replace_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
|
||||
};
|
||||
let old_leg_id = match cmd.params.get("old_leg_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing old_leg_id"); return; }
|
||||
};
|
||||
let number = match cmd.params.get("number").and_then(|v| v.as_str()) {
|
||||
Some(n) => n.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing number"); return; }
|
||||
};
|
||||
let provider_id = cmd.params.get("provider_id").and_then(|v| v.as_str());
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
let config_ref = match &eng.config {
|
||||
Some(c) => c.clone(),
|
||||
None => { respond_err(out_tx, &cmd.id, "not configured"); return; }
|
||||
};
|
||||
let socket = match &eng.transport {
|
||||
Some(t) => t.socket(),
|
||||
None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
|
||||
};
|
||||
|
||||
// Resolve provider.
|
||||
let provider_config = if let Some(pid) = provider_id {
|
||||
config_ref.providers.iter().find(|p| p.id == pid).cloned()
|
||||
} else {
|
||||
config_ref.resolve_outbound_route(&number, None, &|_| true).map(|r| r.provider)
|
||||
};
|
||||
let provider_config = match provider_config {
|
||||
Some(p) => p,
|
||||
None => { respond_err(out_tx, &cmd.id, "no provider available"); return; }
|
||||
};
|
||||
|
||||
let (public_ip, registered_aor) = if let Some(ps_arc) = eng.provider_mgr.find_by_provider_id(&provider_config.id).await {
|
||||
let ps = ps_arc.lock().await;
|
||||
(ps.public_ip.clone(), ps.registered_aor.clone())
|
||||
} else {
|
||||
(None, format!("sip:{}@{}", provider_config.username, provider_config.domain))
|
||||
};
|
||||
|
||||
let ProxyEngine { ref mut call_mgr, ref mut rtp_pool, .. } = *eng;
|
||||
let rtp_pool = rtp_pool.as_mut().unwrap();
|
||||
|
||||
let new_leg_id = call_mgr.replace_leg(
|
||||
&call_id, &old_leg_id, &number, &provider_config, &config_ref,
|
||||
rtp_pool, &socket, public_ip.as_deref(), ®istered_aor,
|
||||
).await;
|
||||
|
||||
match new_leg_id {
|
||||
Some(lid) => respond_ok(out_tx, &cmd.id, serde_json::json!({ "new_leg_id": lid })),
|
||||
None => respond_err(out_tx, &cmd.id, "replace failed — call ended or dial failed"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle `remove_leg` — remove a leg from a call.
|
||||
async fn handle_remove_leg(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
|
||||
};
|
||||
let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
|
||||
};
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
let socket = match &eng.transport {
|
||||
Some(t) => t.socket(),
|
||||
None => { respond_err(out_tx, &cmd.id, "not initialized"); return; }
|
||||
};
|
||||
|
||||
if eng.call_mgr.remove_leg(&call_id, &leg_id, &socket).await {
|
||||
respond_ok(out_tx, &cmd.id, serde_json::json!({}));
|
||||
} else {
|
||||
respond_err(out_tx, &cmd.id, &format!("call/leg not found"));
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle `webrtc_close` — close a WebRTC session.
|
||||
async fn handle_webrtc_close(engine: Arc<Mutex<ProxyEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
/// Uses only the WebRTC lock.
|
||||
async fn handle_webrtc_close(webrtc: Arc<Mutex<WebRtcEngine>>, out_tx: &OutTx, cmd: &Command) {
|
||||
let session_id = match cmd.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing session_id"); return; }
|
||||
};
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
match eng.webrtc.close_session(&session_id).await {
|
||||
let mut wrtc = webrtc.lock().await;
|
||||
match wrtc.close_session(&session_id).await {
|
||||
Ok(()) => respond_ok(out_tx, &cmd.id, serde_json::json!({})),
|
||||
Err(e) => respond_err(out_tx, &cmd.id, &e),
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Leg interaction & tool leg commands
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Handle `start_interaction` — isolate a leg, play a prompt, collect DTMF.
|
||||
/// This command blocks until the interaction completes (digit, timeout, or cancel).
|
||||
async fn handle_start_interaction(
|
||||
engine: Arc<Mutex<ProxyEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: &Command,
|
||||
) {
|
||||
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
|
||||
};
|
||||
let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
|
||||
};
|
||||
let prompt_wav = match cmd.params.get("prompt_wav").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing prompt_wav"); return; }
|
||||
};
|
||||
let expected_digits: Vec<char> = cmd
|
||||
.params
|
||||
.get("expected_digits")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("12")
|
||||
.chars()
|
||||
.collect();
|
||||
let timeout_ms = cmd
|
||||
.params
|
||||
.get("timeout_ms")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(15000) as u32;
|
||||
|
||||
// Load prompt audio from WAV file.
|
||||
let prompt_frames = match crate::audio_player::load_prompt_pcm_frames(&prompt_wav) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
respond_err(out_tx, &cmd.id, &format!("prompt load failed: {e}"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Create oneshot channel for the result.
|
||||
let (result_tx, result_rx) = tokio::sync::oneshot::channel();
|
||||
|
||||
// Send StartInteraction to the mixer.
|
||||
{
|
||||
let eng = engine.lock().await;
|
||||
let call = match eng.call_mgr.calls.get(&call_id) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
let _ = call
|
||||
.mixer_cmd_tx
|
||||
.send(crate::mixer::MixerCommand::StartInteraction {
|
||||
leg_id: leg_id.clone(),
|
||||
prompt_pcm_frames: prompt_frames,
|
||||
expected_digits: expected_digits.clone(),
|
||||
timeout_ms,
|
||||
result_tx,
|
||||
})
|
||||
.await;
|
||||
} // engine lock released — we block on the oneshot, not the lock.
|
||||
|
||||
// Await the interaction result (blocks this task until complete).
|
||||
let safety_timeout = tokio::time::Duration::from_millis(timeout_ms as u64 + 30000);
|
||||
let result = match tokio::time::timeout(safety_timeout, result_rx).await {
|
||||
Ok(Ok(r)) => r,
|
||||
Ok(Err(_)) => crate::mixer::InteractionResult::Cancelled, // oneshot dropped
|
||||
Err(_) => crate::mixer::InteractionResult::Timeout, // safety timeout
|
||||
};
|
||||
|
||||
// Store consent result in leg metadata.
|
||||
let (result_str, digit_str) = match &result {
|
||||
crate::mixer::InteractionResult::Digit(d) => ("digit", Some(d.to_string())),
|
||||
crate::mixer::InteractionResult::Timeout => ("timeout", None),
|
||||
crate::mixer::InteractionResult::Cancelled => ("cancelled", None),
|
||||
};
|
||||
|
||||
{
|
||||
let mut eng = engine.lock().await;
|
||||
if let Some(call) = eng.call_mgr.calls.get_mut(&call_id) {
|
||||
if let Some(leg) = call.legs.get_mut(&leg_id) {
|
||||
leg.metadata.insert(
|
||||
"last_interaction_result".to_string(),
|
||||
serde_json::json!(result_str),
|
||||
);
|
||||
if let Some(ref d) = digit_str {
|
||||
leg.metadata.insert(
|
||||
"last_interaction_digit".to_string(),
|
||||
serde_json::json!(d),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut resp = serde_json::json!({ "result": result_str });
|
||||
if let Some(d) = digit_str {
|
||||
resp["digit"] = serde_json::json!(d);
|
||||
}
|
||||
respond_ok(out_tx, &cmd.id, resp);
|
||||
}
|
||||
|
||||
/// Handle `add_tool_leg` — add a recording or transcription tool leg to a call.
|
||||
async fn handle_add_tool_leg(
|
||||
engine: Arc<Mutex<ProxyEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: &Command,
|
||||
) {
|
||||
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
|
||||
};
|
||||
let tool_type_str = match cmd.params.get("tool_type").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing tool_type"); return; }
|
||||
};
|
||||
|
||||
let tool_type = match tool_type_str.as_str() {
|
||||
"recording" => crate::mixer::ToolType::Recording,
|
||||
"transcription" => crate::mixer::ToolType::Transcription,
|
||||
other => {
|
||||
respond_err(out_tx, &cmd.id, &format!("unknown tool_type: {other}"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let tool_leg_id = format!("{call_id}-tool-{}", rand::random::<u32>());
|
||||
|
||||
// Spawn the appropriate background task.
|
||||
let (audio_tx, _task_handle) = match tool_type {
|
||||
crate::mixer::ToolType::Recording => {
|
||||
let base_dir = cmd
|
||||
.params
|
||||
.get("config")
|
||||
.and_then(|c| c.get("base_dir"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or(".nogit/recordings")
|
||||
.to_string();
|
||||
crate::tool_leg::spawn_recording_tool(
|
||||
tool_leg_id.clone(),
|
||||
call_id.clone(),
|
||||
base_dir,
|
||||
out_tx.clone(),
|
||||
)
|
||||
}
|
||||
crate::mixer::ToolType::Transcription => {
|
||||
crate::tool_leg::spawn_transcription_tool(
|
||||
tool_leg_id.clone(),
|
||||
call_id.clone(),
|
||||
out_tx.clone(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
// Send AddToolLeg to the mixer and register in call.
|
||||
{
|
||||
let mut eng = engine.lock().await;
|
||||
let call = match eng.call_mgr.calls.get_mut(&call_id) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let _ = call
|
||||
.mixer_cmd_tx
|
||||
.send(crate::mixer::MixerCommand::AddToolLeg {
|
||||
leg_id: tool_leg_id.clone(),
|
||||
tool_type,
|
||||
audio_tx,
|
||||
})
|
||||
.await;
|
||||
|
||||
// Register tool leg in the call's leg map.
|
||||
let mut metadata = std::collections::HashMap::new();
|
||||
metadata.insert(
|
||||
"tool_type".to_string(),
|
||||
serde_json::json!(tool_type_str),
|
||||
);
|
||||
call.legs.insert(
|
||||
tool_leg_id.clone(),
|
||||
crate::call::LegInfo {
|
||||
id: tool_leg_id.clone(),
|
||||
kind: crate::call::LegKind::Tool,
|
||||
state: crate::call::LegState::Connected,
|
||||
codec_pt: 0,
|
||||
sip_leg: None,
|
||||
sip_call_id: None,
|
||||
webrtc_session_id: None,
|
||||
rtp_socket: None,
|
||||
rtp_port: 0,
|
||||
public_ip: None,
|
||||
remote_media: None,
|
||||
signaling_addr: None,
|
||||
metadata,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
emit_event(
|
||||
out_tx,
|
||||
"leg_added",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": tool_leg_id,
|
||||
"kind": "tool",
|
||||
"state": "connected",
|
||||
"codec": null,
|
||||
"rtpPort": 0,
|
||||
"remoteMedia": null,
|
||||
"metadata": { "tool_type": tool_type_str },
|
||||
}),
|
||||
);
|
||||
|
||||
respond_ok(
|
||||
out_tx,
|
||||
&cmd.id,
|
||||
serde_json::json!({ "tool_leg_id": tool_leg_id }),
|
||||
);
|
||||
}
|
||||
|
||||
/// Handle `remove_tool_leg` — remove a tool leg from a call.
|
||||
async fn handle_remove_tool_leg(
|
||||
engine: Arc<Mutex<ProxyEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: &Command,
|
||||
) {
|
||||
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
|
||||
};
|
||||
let tool_leg_id = match cmd.params.get("tool_leg_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing tool_leg_id"); return; }
|
||||
};
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
let call = match eng.call_mgr.calls.get_mut(&call_id) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Remove from mixer (drops audio_tx → background task finalizes).
|
||||
let _ = call
|
||||
.mixer_cmd_tx
|
||||
.send(crate::mixer::MixerCommand::RemoveToolLeg {
|
||||
leg_id: tool_leg_id.clone(),
|
||||
})
|
||||
.await;
|
||||
|
||||
// Remove from call's leg map.
|
||||
call.legs.remove(&tool_leg_id);
|
||||
|
||||
emit_event(
|
||||
out_tx,
|
||||
"leg_removed",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": tool_leg_id,
|
||||
}),
|
||||
);
|
||||
|
||||
respond_ok(out_tx, &cmd.id, serde_json::json!({}));
|
||||
}
|
||||
|
||||
/// Handle `set_leg_metadata` — set a metadata key on a leg.
|
||||
async fn handle_set_leg_metadata(
|
||||
engine: Arc<Mutex<ProxyEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: &Command,
|
||||
) {
|
||||
let call_id = match cmd.params.get("call_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing call_id"); return; }
|
||||
};
|
||||
let leg_id = match cmd.params.get("leg_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing leg_id"); return; }
|
||||
};
|
||||
let key = match cmd.params.get("key").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing key"); return; }
|
||||
};
|
||||
let value = match cmd.params.get("value") {
|
||||
Some(v) => v.clone(),
|
||||
None => { respond_err(out_tx, &cmd.id, "missing value"); return; }
|
||||
};
|
||||
|
||||
let mut eng = engine.lock().await;
|
||||
let call = match eng.call_mgr.calls.get_mut(&call_id) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
respond_err(out_tx, &cmd.id, &format!("call {call_id} not found"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
let leg = match call.legs.get_mut(&leg_id) {
|
||||
Some(l) => l,
|
||||
None => {
|
||||
respond_err(out_tx, &cmd.id, &format!("leg {leg_id} not found"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
leg.metadata.insert(key, value);
|
||||
respond_ok(out_tx, &cmd.id, serde_json::json!({}));
|
||||
}
|
||||
|
||||
/// Handle `generate_tts` — synthesize text to a WAV file using Kokoro TTS.
|
||||
async fn handle_generate_tts(
|
||||
tts_engine: Arc<Mutex<tts::TtsEngine>>,
|
||||
out_tx: &OutTx,
|
||||
cmd: &Command,
|
||||
) {
|
||||
let mut tts = tts_engine.lock().await;
|
||||
match tts.generate(&cmd.params).await {
|
||||
Ok(result) => respond_ok(out_tx, &cmd.id, result),
|
||||
Err(e) => respond_err(out_tx, &cmd.id, &e),
|
||||
}
|
||||
}
|
||||
|
||||
617
rust/crates/proxy-engine/src/mixer.rs
Normal file
617
rust/crates/proxy-engine/src/mixer.rs
Normal file
@@ -0,0 +1,617 @@
|
||||
//! Audio mixer — mix-minus engine for multiparty calls.
|
||||
//!
|
||||
//! Each Call spawns one mixer task. Legs communicate with the mixer via
|
||||
//! tokio mpsc channels — no shared mutable state, no lock contention.
|
||||
//!
|
||||
//! Internal bus format: 48kHz f32 PCM (960 samples per 20ms frame).
|
||||
//! All encoding/decoding happens at leg boundaries. Per-leg inbound denoising at 48kHz.
|
||||
//!
|
||||
//! The mixer runs a 20ms tick loop:
|
||||
//! 1. Drain inbound channels, decode to f32, resample to 48kHz, denoise per-leg
|
||||
//! 2. Compute total mix (sum of all **participant** legs' f32 PCM as f64)
|
||||
//! 3. For each participant leg: mix-minus = total - own, resample to leg codec rate, encode, send
|
||||
//! 4. For each isolated leg: play prompt frame or silence, check DTMF
|
||||
//! 5. For each tool leg: send per-source unmerged audio batch
|
||||
//! 6. Forward DTMF between participant legs only
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::jitter_buffer::{JitterBuffer, JitterResult};
|
||||
use crate::rtp::{build_rtp_header, rtp_clock_increment};
|
||||
use codec_lib::{codec_sample_rate, new_denoiser, TranscodeState};
|
||||
use nnnoiseless::DenoiseState;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::{self, Duration, MissedTickBehavior};
|
||||
|
||||
/// Mixing sample rate — 48kHz. Opus is native, G.722 needs 3× upsample, G.711 needs 6× upsample.
|
||||
/// All processing (denoising, mixing) happens at this rate in f32 for maximum quality.
|
||||
const MIX_RATE: u32 = 48000;
|
||||
/// Samples per 20ms frame at the mixing rate.
|
||||
const MIX_FRAME_SIZE: usize = 960; // 48000 * 0.020
|
||||
|
||||
/// A raw RTP payload received from a leg (no RTP header).
|
||||
pub struct RtpPacket {
|
||||
pub payload: Vec<u8>,
|
||||
pub payload_type: u8,
|
||||
/// RTP marker bit (first packet of a DTMF event, etc.).
|
||||
pub marker: bool,
|
||||
/// RTP sequence number for reordering.
|
||||
pub seq: u16,
|
||||
/// RTP timestamp from the original packet header.
|
||||
///
|
||||
/// Set on inbound RTP but not yet consumed downstream — reserved for
|
||||
/// future jitter/sync work in the mixer.
|
||||
#[allow(dead_code)]
|
||||
pub timestamp: u32,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Leg roles
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// What role a leg currently plays in the mixer.
|
||||
enum LegRole {
|
||||
/// Normal participant: contributes to mix, receives mix-minus.
|
||||
Participant,
|
||||
/// Temporarily isolated for IVR/consent interaction.
|
||||
Isolated(IsolationState),
|
||||
}
|
||||
|
||||
struct IsolationState {
|
||||
/// PCM frames at MIX_RATE (960 samples each, 48kHz f32) queued for playback.
|
||||
prompt_frames: VecDeque<Vec<f32>>,
|
||||
/// Digits that complete the interaction (e.g., ['1', '2']).
|
||||
expected_digits: Vec<char>,
|
||||
/// Ticks remaining before timeout (decremented each tick after prompt ends).
|
||||
timeout_ticks_remaining: u32,
|
||||
/// Whether we've finished playing the prompt.
|
||||
prompt_done: bool,
|
||||
/// Channel to send the result back to the command handler.
|
||||
result_tx: Option<oneshot::Sender<InteractionResult>>,
|
||||
}
|
||||
|
||||
/// Result of a leg interaction (consent prompt, IVR, etc.).
|
||||
pub enum InteractionResult {
|
||||
/// The participant pressed one of the expected digits.
|
||||
Digit(char),
|
||||
/// No digit was received within the timeout.
|
||||
Timeout,
|
||||
/// The leg was removed or the call tore down before completion.
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool legs
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Type of tool leg.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ToolType {
|
||||
Recording,
|
||||
Transcription,
|
||||
}
|
||||
|
||||
/// Per-source audio delivered to a tool leg each mixer tick.
|
||||
pub struct ToolAudioBatch {
|
||||
pub sources: Vec<ToolAudioSource>,
|
||||
}
|
||||
|
||||
/// One participant's 20ms audio frame.
|
||||
pub struct ToolAudioSource {
|
||||
pub leg_id: String,
|
||||
/// PCM at 48kHz f32, MIX_FRAME_SIZE (960) samples.
|
||||
pub pcm_48k: Vec<f32>,
|
||||
}
|
||||
|
||||
/// Internal storage for a tool leg inside the mixer.
|
||||
struct ToolLegSlot {
|
||||
#[allow(dead_code)]
|
||||
tool_type: ToolType,
|
||||
audio_tx: mpsc::Sender<ToolAudioBatch>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Commands
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Commands sent to the mixer task via a control channel.
|
||||
pub enum MixerCommand {
|
||||
/// Add a new participant leg to the mix.
|
||||
AddLeg {
|
||||
leg_id: String,
|
||||
codec_pt: u8,
|
||||
inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
},
|
||||
/// Remove a leg from the mix (channels are dropped, I/O tasks exit).
|
||||
RemoveLeg { leg_id: String },
|
||||
/// Shut down the mixer.
|
||||
Shutdown,
|
||||
|
||||
/// Isolate a leg and start an interaction (consent prompt, IVR).
|
||||
/// The leg is removed from the mix and hears the prompt instead.
|
||||
/// DTMF from the leg is checked against expected_digits.
|
||||
StartInteraction {
|
||||
leg_id: String,
|
||||
/// PCM frames at MIX_RATE (48kHz f32), each 960 samples.
|
||||
prompt_pcm_frames: Vec<Vec<f32>>,
|
||||
expected_digits: Vec<char>,
|
||||
timeout_ms: u32,
|
||||
result_tx: oneshot::Sender<InteractionResult>,
|
||||
},
|
||||
|
||||
/// Add a tool leg that receives per-source unmerged audio.
|
||||
AddToolLeg {
|
||||
leg_id: String,
|
||||
tool_type: ToolType,
|
||||
audio_tx: mpsc::Sender<ToolAudioBatch>,
|
||||
},
|
||||
/// Remove a tool leg (drops the channel, background task finalizes).
|
||||
RemoveToolLeg { leg_id: String },
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mixer internals
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Internal per-leg state inside the mixer.
|
||||
struct MixerLegSlot {
|
||||
codec_pt: u8,
|
||||
transcoder: TranscodeState,
|
||||
/// Per-leg inbound denoiser (48kHz, 480-sample frames).
|
||||
denoiser: Box<DenoiseState<'static>>,
|
||||
inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
/// Last decoded+denoised PCM frame at MIX_RATE (960 samples, 48kHz f32).
|
||||
last_pcm_frame: Vec<f32>,
|
||||
/// Number of consecutive ticks with no inbound packet.
|
||||
silent_ticks: u32,
|
||||
/// Per-leg jitter buffer for packet reordering and timing.
|
||||
jitter: JitterBuffer,
|
||||
// RTP output state.
|
||||
rtp_seq: u16,
|
||||
rtp_ts: u32,
|
||||
rtp_ssrc: u32,
|
||||
/// Current role of this leg in the mixer.
|
||||
role: LegRole,
|
||||
}
|
||||
|
||||
/// Spawn the mixer task for a call. Returns the command sender and task handle.
|
||||
pub fn spawn_mixer(
|
||||
call_id: String,
|
||||
out_tx: OutTx,
|
||||
) -> (mpsc::Sender<MixerCommand>, JoinHandle<()>) {
|
||||
let (cmd_tx, cmd_rx) = mpsc::channel::<MixerCommand>(32);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
mixer_loop(call_id, cmd_rx, out_tx).await;
|
||||
});
|
||||
|
||||
(cmd_tx, handle)
|
||||
}
|
||||
|
||||
/// The 20ms mixing loop.
|
||||
async fn mixer_loop(
|
||||
call_id: String,
|
||||
mut cmd_rx: mpsc::Receiver<MixerCommand>,
|
||||
out_tx: OutTx,
|
||||
) {
|
||||
let mut legs: HashMap<String, MixerLegSlot> = HashMap::new();
|
||||
let mut tool_legs: HashMap<String, ToolLegSlot> = HashMap::new();
|
||||
let mut interval = time::interval(Duration::from_millis(20));
|
||||
interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
// ── 1. Process control commands (non-blocking). ─────────────
|
||||
loop {
|
||||
match cmd_rx.try_recv() {
|
||||
Ok(MixerCommand::AddLeg {
|
||||
leg_id,
|
||||
codec_pt,
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
}) => {
|
||||
let transcoder = match TranscodeState::new() {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"mixer_error",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": leg_id,
|
||||
"error": format!("codec init: {e}"),
|
||||
}),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
legs.insert(
|
||||
leg_id,
|
||||
MixerLegSlot {
|
||||
codec_pt,
|
||||
transcoder,
|
||||
denoiser: new_denoiser(),
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
last_pcm_frame: vec![0.0f32; MIX_FRAME_SIZE],
|
||||
silent_ticks: 0,
|
||||
rtp_seq: 0,
|
||||
rtp_ts: 0,
|
||||
rtp_ssrc: rand::random(),
|
||||
role: LegRole::Participant,
|
||||
jitter: JitterBuffer::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
Ok(MixerCommand::RemoveLeg { leg_id }) => {
|
||||
// If the leg is isolated, send Cancelled before dropping.
|
||||
if let Some(slot) = legs.get_mut(&leg_id) {
|
||||
if let LegRole::Isolated(ref mut state) = slot.role {
|
||||
if let Some(tx) = state.result_tx.take() {
|
||||
let _ = tx.send(InteractionResult::Cancelled);
|
||||
}
|
||||
}
|
||||
}
|
||||
legs.remove(&leg_id);
|
||||
// Channels drop → I/O tasks exit cleanly.
|
||||
}
|
||||
Ok(MixerCommand::Shutdown) => {
|
||||
// Cancel all outstanding interactions before shutting down.
|
||||
for slot in legs.values_mut() {
|
||||
if let LegRole::Isolated(ref mut state) = slot.role {
|
||||
if let Some(tx) = state.result_tx.take() {
|
||||
let _ = tx.send(InteractionResult::Cancelled);
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
Ok(MixerCommand::StartInteraction {
|
||||
leg_id,
|
||||
prompt_pcm_frames,
|
||||
expected_digits,
|
||||
timeout_ms,
|
||||
result_tx,
|
||||
}) => {
|
||||
if let Some(slot) = legs.get_mut(&leg_id) {
|
||||
// Cancel any existing interaction first.
|
||||
if let LegRole::Isolated(ref mut old_state) = slot.role {
|
||||
if let Some(tx) = old_state.result_tx.take() {
|
||||
let _ = tx.send(InteractionResult::Cancelled);
|
||||
}
|
||||
}
|
||||
let timeout_ticks = timeout_ms / 20;
|
||||
slot.role = LegRole::Isolated(IsolationState {
|
||||
prompt_frames: VecDeque::from(prompt_pcm_frames),
|
||||
expected_digits,
|
||||
timeout_ticks_remaining: timeout_ticks,
|
||||
prompt_done: false,
|
||||
result_tx: Some(result_tx),
|
||||
});
|
||||
} else {
|
||||
// Leg not found — immediately cancel.
|
||||
let _ = result_tx.send(InteractionResult::Cancelled);
|
||||
}
|
||||
}
|
||||
Ok(MixerCommand::AddToolLeg {
|
||||
leg_id,
|
||||
tool_type,
|
||||
audio_tx,
|
||||
}) => {
|
||||
tool_legs.insert(leg_id, ToolLegSlot { tool_type, audio_tx });
|
||||
}
|
||||
Ok(MixerCommand::RemoveToolLeg { leg_id }) => {
|
||||
tool_legs.remove(&leg_id);
|
||||
// Dropping the ToolLegSlot drops audio_tx → background task sees channel close.
|
||||
}
|
||||
Err(mpsc::error::TryRecvError::Empty) => break,
|
||||
Err(mpsc::error::TryRecvError::Disconnected) => return,
|
||||
}
|
||||
}
|
||||
|
||||
if legs.is_empty() && tool_legs.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── 2. Drain inbound packets, decode to 48kHz f32 PCM. ────
|
||||
// DTMF (PT 101) packets are collected separately.
|
||||
// Audio packets are sorted by sequence number and decoded
|
||||
// in order to maintain codec state (critical for G.722 ADPCM).
|
||||
let leg_ids: Vec<String> = legs.keys().cloned().collect();
|
||||
let mut dtmf_forward: Vec<(String, RtpPacket)> = Vec::new();
|
||||
|
||||
for lid in &leg_ids {
|
||||
let slot = legs.get_mut(lid).unwrap();
|
||||
|
||||
// Step 2a: Drain all pending packets into the jitter buffer.
|
||||
let mut got_audio = false;
|
||||
loop {
|
||||
match slot.inbound_rx.try_recv() {
|
||||
Ok(pkt) => {
|
||||
if pkt.payload_type == 101 {
|
||||
dtmf_forward.push((lid.clone(), pkt));
|
||||
} else {
|
||||
got_audio = true;
|
||||
slot.jitter.push(pkt);
|
||||
}
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2b: Consume exactly one frame from the jitter buffer.
|
||||
match slot.jitter.consume() {
|
||||
JitterResult::Packet(pkt) => {
|
||||
match slot.transcoder.decode_to_f32(&pkt.payload, pkt.payload_type) {
|
||||
Ok((pcm, rate)) => {
|
||||
let pcm_48k = if rate == MIX_RATE {
|
||||
pcm
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&pcm, rate, MIX_RATE)
|
||||
.unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
|
||||
};
|
||||
let processed = if slot.codec_pt != codec_lib::PT_OPUS {
|
||||
TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k)
|
||||
} else {
|
||||
pcm_48k
|
||||
};
|
||||
let mut frame = processed;
|
||||
frame.resize(MIX_FRAME_SIZE, 0.0);
|
||||
slot.last_pcm_frame = frame;
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
JitterResult::Missing => {
|
||||
// Invoke Opus PLC or fade for non-Opus codecs.
|
||||
if slot.codec_pt == codec_lib::PT_OPUS {
|
||||
match slot.transcoder.opus_plc(MIX_FRAME_SIZE) {
|
||||
Ok(pcm) => {
|
||||
slot.last_pcm_frame = pcm;
|
||||
}
|
||||
Err(_) => {
|
||||
for s in slot.last_pcm_frame.iter_mut() {
|
||||
*s *= 0.8;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Non-Opus: fade last frame toward silence.
|
||||
for s in slot.last_pcm_frame.iter_mut() {
|
||||
*s *= 0.85;
|
||||
}
|
||||
}
|
||||
}
|
||||
JitterResult::Filling => {
|
||||
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
|
||||
}
|
||||
}
|
||||
|
||||
// Run jitter adaptation + prune stale packets.
|
||||
slot.jitter.adapt();
|
||||
slot.jitter.prune_stale();
|
||||
|
||||
// Silent ticks: based on actual network reception, not jitter buffer state.
|
||||
if got_audio || dtmf_forward.iter().any(|(src, _)| src == lid) {
|
||||
slot.silent_ticks = 0;
|
||||
} else {
|
||||
slot.silent_ticks += 1;
|
||||
}
|
||||
if slot.silent_ticks > 150 {
|
||||
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
|
||||
}
|
||||
}
|
||||
|
||||
// ── 3. Compute total mix from PARTICIPANT legs only. ────────
|
||||
// Accumulate as f64 to prevent precision loss when summing f32.
|
||||
let mut total_mix = vec![0.0f64; MIX_FRAME_SIZE];
|
||||
for slot in legs.values() {
|
||||
if matches!(slot.role, LegRole::Participant) {
|
||||
for (i, &s) in slot.last_pcm_frame.iter().enumerate().take(MIX_FRAME_SIZE) {
|
||||
total_mix[i] += s as f64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── 4. Per-leg output. ──────────────────────────────────────
|
||||
// Collect interaction completions to apply after the loop
|
||||
// (can't mutate role while iterating mutably for encode).
|
||||
let mut completed_interactions: Vec<(String, InteractionResult)> = Vec::new();
|
||||
|
||||
for (lid, slot) in legs.iter_mut() {
|
||||
match &mut slot.role {
|
||||
LegRole::Participant => {
|
||||
// Mix-minus: total minus this leg's own contribution, clamped to [-1.0, 1.0].
|
||||
let mut mix_minus = Vec::with_capacity(MIX_FRAME_SIZE);
|
||||
for i in 0..MIX_FRAME_SIZE {
|
||||
let sample =
|
||||
(total_mix[i] - slot.last_pcm_frame[i] as f64) as f32;
|
||||
mix_minus.push(sample.clamp(-1.0, 1.0));
|
||||
}
|
||||
|
||||
// Resample from 48kHz to the leg's codec native rate.
|
||||
let target_rate = codec_sample_rate(slot.codec_pt);
|
||||
let resampled = if target_rate == MIX_RATE {
|
||||
mix_minus
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&mix_minus, MIX_RATE, target_rate)
|
||||
.unwrap_or_default()
|
||||
};
|
||||
|
||||
// Encode to the leg's codec (f32 → i16 → codec inside encode_from_f32).
|
||||
let encoded =
|
||||
match slot.transcoder.encode_from_f32(&resampled, slot.codec_pt) {
|
||||
Ok(e) if !e.is_empty() => e,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Build RTP packet with header.
|
||||
let header =
|
||||
build_rtp_header(slot.codec_pt, slot.rtp_seq, slot.rtp_ts, slot.rtp_ssrc);
|
||||
let mut rtp = header.to_vec();
|
||||
rtp.extend_from_slice(&encoded);
|
||||
|
||||
slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
|
||||
slot.rtp_ts = slot.rtp_ts.wrapping_add(rtp_clock_increment(slot.codec_pt));
|
||||
|
||||
// Non-blocking send — drop frame if channel is full.
|
||||
let _ = slot.outbound_tx.try_send(rtp);
|
||||
}
|
||||
LegRole::Isolated(state) => {
|
||||
// Check for DTMF digit from this leg.
|
||||
let mut matched_digit: Option<char> = None;
|
||||
for (src_lid, dtmf_pkt) in &dtmf_forward {
|
||||
if src_lid == lid && dtmf_pkt.payload.len() >= 4 {
|
||||
let event_id = dtmf_pkt.payload[0];
|
||||
let end_bit = (dtmf_pkt.payload[1] & 0x80) != 0;
|
||||
if end_bit {
|
||||
const EVENT_CHARS: &[char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '*', '#',
|
||||
'A', 'B', 'C', 'D',
|
||||
];
|
||||
if let Some(&ch) = EVENT_CHARS.get(event_id as usize) {
|
||||
if state.expected_digits.contains(&ch) {
|
||||
matched_digit = Some(ch);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(digit) = matched_digit {
|
||||
// Interaction complete — digit matched.
|
||||
completed_interactions
|
||||
.push((lid.clone(), InteractionResult::Digit(digit)));
|
||||
} else {
|
||||
// Play prompt frame or silence.
|
||||
let pcm_frame = if let Some(frame) = state.prompt_frames.pop_front() {
|
||||
frame
|
||||
} else {
|
||||
state.prompt_done = true;
|
||||
vec![0.0f32; MIX_FRAME_SIZE]
|
||||
};
|
||||
|
||||
// Encode prompt frame to the leg's codec.
|
||||
let target_rate = codec_sample_rate(slot.codec_pt);
|
||||
let resampled = if target_rate == MIX_RATE {
|
||||
pcm_frame
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&pcm_frame, MIX_RATE, target_rate)
|
||||
.unwrap_or_default()
|
||||
};
|
||||
|
||||
if let Ok(encoded) =
|
||||
slot.transcoder.encode_from_f32(&resampled, slot.codec_pt)
|
||||
{
|
||||
if !encoded.is_empty() {
|
||||
let header = build_rtp_header(
|
||||
slot.codec_pt,
|
||||
slot.rtp_seq,
|
||||
slot.rtp_ts,
|
||||
slot.rtp_ssrc,
|
||||
);
|
||||
let mut rtp = header.to_vec();
|
||||
rtp.extend_from_slice(&encoded);
|
||||
slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
|
||||
slot.rtp_ts = slot
|
||||
.rtp_ts
|
||||
.wrapping_add(rtp_clock_increment(slot.codec_pt));
|
||||
let _ = slot.outbound_tx.try_send(rtp);
|
||||
}
|
||||
}
|
||||
|
||||
// Check timeout (only after prompt finishes).
|
||||
if state.prompt_done {
|
||||
if state.timeout_ticks_remaining == 0 {
|
||||
completed_interactions
|
||||
.push((lid.clone(), InteractionResult::Timeout));
|
||||
} else {
|
||||
state.timeout_ticks_remaining -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply completed interactions — revert legs to Participant.
|
||||
for (lid, result) in completed_interactions {
|
||||
if let Some(slot) = legs.get_mut(&lid) {
|
||||
if let LegRole::Isolated(ref mut state) = slot.role {
|
||||
if let Some(tx) = state.result_tx.take() {
|
||||
let _ = tx.send(result);
|
||||
}
|
||||
}
|
||||
slot.role = LegRole::Participant;
|
||||
}
|
||||
}
|
||||
|
||||
// ── 5. Distribute per-source audio to tool legs. ────────────
|
||||
if !tool_legs.is_empty() {
|
||||
// Collect participant PCM frames (computed in step 2).
|
||||
let sources: Vec<ToolAudioSource> = legs
|
||||
.iter()
|
||||
.filter(|(_, s)| matches!(s.role, LegRole::Participant))
|
||||
.map(|(lid, s)| ToolAudioSource {
|
||||
leg_id: lid.clone(),
|
||||
pcm_48k: s.last_pcm_frame.clone(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
for tool in tool_legs.values() {
|
||||
let batch = ToolAudioBatch {
|
||||
sources: sources
|
||||
.iter()
|
||||
.map(|s| ToolAudioSource {
|
||||
leg_id: s.leg_id.clone(),
|
||||
pcm_48k: s.pcm_48k.clone(),
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
// Non-blocking send — drop batch if tool can't keep up.
|
||||
let _ = tool.audio_tx.try_send(batch);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 6. Forward DTMF packets between participant legs only. ──
|
||||
for (source_lid, dtmf_pkt) in &dtmf_forward {
|
||||
// Skip if the source is an isolated leg (its DTMF was handled in step 4).
|
||||
if let Some(src_slot) = legs.get(source_lid) {
|
||||
if matches!(src_slot.role, LegRole::Isolated(_)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
for (target_lid, target_slot) in legs.iter_mut() {
|
||||
if target_lid == source_lid {
|
||||
continue; // Don't echo DTMF back to sender.
|
||||
}
|
||||
// Don't forward to isolated legs.
|
||||
if matches!(target_slot.role, LegRole::Isolated(_)) {
|
||||
continue;
|
||||
}
|
||||
let mut header = build_rtp_header(
|
||||
101,
|
||||
target_slot.rtp_seq,
|
||||
target_slot.rtp_ts,
|
||||
target_slot.rtp_ssrc,
|
||||
);
|
||||
if dtmf_pkt.marker {
|
||||
header[1] |= 0x80; // Set marker bit.
|
||||
}
|
||||
let mut rtp_out = header.to_vec();
|
||||
rtp_out.extend_from_slice(&dtmf_pkt.payload);
|
||||
target_slot.rtp_seq = target_slot.rtp_seq.wrapping_add(1);
|
||||
// Don't increment rtp_ts for DTMF — it shares timestamp context with audio.
|
||||
let _ = target_slot.outbound_tx.try_send(rtp_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -321,15 +321,15 @@ impl ProviderManager {
|
||||
None
|
||||
}
|
||||
|
||||
/// Check if a provider is currently registered.
|
||||
pub async fn is_registered(&self, provider_id: &str) -> bool {
|
||||
/// Find a provider by its config ID (e.g. "easybell").
|
||||
pub async fn find_by_provider_id(&self, provider_id: &str) -> Option<Arc<Mutex<ProviderState>>> {
|
||||
for ps_arc in &self.providers {
|
||||
let ps = ps_arc.lock().await;
|
||||
if ps.config.id == provider_id {
|
||||
return ps.is_registered;
|
||||
return Some(ps_arc.clone());
|
||||
}
|
||||
}
|
||||
false
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -55,6 +55,56 @@ impl Recorder {
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a recorder that writes raw PCM at a given sample rate.
|
||||
/// Used by tool legs that already have decoded PCM (no RTP processing needed).
|
||||
pub fn new_pcm(file_path: &str, sample_rate: u32, max_duration_ms: Option<u64>) -> Result<Self, String> {
|
||||
if let Some(parent) = Path::new(file_path).parent() {
|
||||
std::fs::create_dir_all(parent)
|
||||
.map_err(|e| format!("create dir: {e}"))?;
|
||||
}
|
||||
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let writer = hound::WavWriter::create(file_path, spec)
|
||||
.map_err(|e| format!("create WAV {file_path}: {e}"))?;
|
||||
|
||||
// source_pt is unused for PCM recording; set to 0.
|
||||
let transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
||||
let max_samples = max_duration_ms.map(|ms| (sample_rate as u64 * ms) / 1000);
|
||||
|
||||
Ok(Self {
|
||||
writer,
|
||||
transcoder,
|
||||
source_pt: 0,
|
||||
total_samples: 0,
|
||||
sample_rate,
|
||||
max_samples,
|
||||
file_path: file_path.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Write raw PCM samples directly (no RTP decoding).
|
||||
/// Returns true if recording should continue, false if max duration reached.
|
||||
pub fn write_pcm(&mut self, samples: &[i16]) -> bool {
|
||||
for &sample in samples {
|
||||
if self.writer.write_sample(sample).is_err() {
|
||||
return false;
|
||||
}
|
||||
self.total_samples += 1;
|
||||
if let Some(max) = self.max_samples {
|
||||
if self.total_samples >= max {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Process an incoming RTP packet (full packet with header).
|
||||
/// Returns true if recording should continue, false if max duration reached.
|
||||
pub fn process_rtp(&mut self, data: &[u8]) -> bool {
|
||||
@@ -128,5 +178,8 @@ impl Recorder {
|
||||
pub struct RecordingResult {
|
||||
pub file_path: String,
|
||||
pub duration_ms: u64,
|
||||
// Running-sample total kept for parity with the TS recorder; not yet
|
||||
// surfaced through any event or dashboard field.
|
||||
#[allow(dead_code)]
|
||||
pub total_samples: u64,
|
||||
}
|
||||
|
||||
@@ -19,11 +19,19 @@ const MAX_EXPIRES: u32 = 300;
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RegisteredDevice {
|
||||
pub device_id: String,
|
||||
// These fields are populated at REGISTER time for logging/debugging but are
|
||||
// not read back — device identity flows via the `device_registered` push
|
||||
// event, not via struct queries. Kept behind allow(dead_code) because
|
||||
// removing them would churn handle_register for no runtime benefit.
|
||||
#[allow(dead_code)]
|
||||
pub display_name: String,
|
||||
#[allow(dead_code)]
|
||||
pub extension: String,
|
||||
pub contact_addr: SocketAddr,
|
||||
#[allow(dead_code)]
|
||||
pub registered_at: Instant,
|
||||
pub expires_at: Instant,
|
||||
#[allow(dead_code)]
|
||||
pub aor: String,
|
||||
}
|
||||
|
||||
@@ -134,11 +142,6 @@ impl Registrar {
|
||||
Some(entry.contact_addr)
|
||||
}
|
||||
|
||||
/// Check if a source address belongs to a known device.
|
||||
pub fn is_known_device_address(&self, addr: &str) -> bool {
|
||||
self.devices.iter().any(|d| d.expected_address == addr)
|
||||
}
|
||||
|
||||
/// Find a registered device by its source IP address.
|
||||
pub fn find_by_address(&self, addr: &SocketAddr) -> Option<&RegisteredDevice> {
|
||||
let ip = addr.ip().to_string();
|
||||
@@ -146,26 +149,4 @@ impl Registrar {
|
||||
e.contact_addr.ip().to_string() == ip && Instant::now() <= e.expires_at
|
||||
})
|
||||
}
|
||||
|
||||
/// Get all device statuses for the dashboard.
|
||||
pub fn get_all_statuses(&self) -> Vec<serde_json::Value> {
|
||||
let now = Instant::now();
|
||||
let mut result = Vec::new();
|
||||
|
||||
for dc in &self.devices {
|
||||
let reg = self.registered.get(&dc.id);
|
||||
let connected = reg.map(|r| now <= r.expires_at).unwrap_or(false);
|
||||
result.push(serde_json::json!({
|
||||
"id": dc.id,
|
||||
"displayName": dc.display_name,
|
||||
"address": reg.filter(|_| connected).map(|r| r.contact_addr.ip().to_string()),
|
||||
"port": reg.filter(|_| connected).map(|r| r.contact_addr.port()),
|
||||
"aor": reg.map(|r| r.aor.as_str()).unwrap_or(""),
|
||||
"connected": connected,
|
||||
"isBrowser": false,
|
||||
}));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
//! RTP port pool and media forwarding.
|
||||
//! RTP port pool for media sockets.
|
||||
//!
|
||||
//! Manages a pool of even-numbered UDP ports for RTP media.
|
||||
//! Each port gets a bound tokio UdpSocket. Supports:
|
||||
//! - Direct forwarding (SIP-to-SIP, no transcoding)
|
||||
//! - Transcoding forwarding (via codec-lib, e.g. G.722 ↔ Opus)
|
||||
//! - Silence generation
|
||||
//! - NAT priming
|
||||
//! Manages a pool of even-numbered UDP ports for RTP media. `allocate()`
|
||||
//! hands back an `Arc<UdpSocket>` to the caller (stored on the owning
|
||||
//! `LegInfo`), while the pool itself keeps only a `Weak<UdpSocket>`. When
|
||||
//! the call terminates and `LegInfo` is dropped, the strong refcount
|
||||
//! reaches zero, the socket is closed, and `allocate()` prunes the dead
|
||||
//! weak ref the next time it scans that slot — so the port automatically
|
||||
//! becomes available for reuse without any explicit `release()` plumbing.
|
||||
//!
|
||||
//! Ported from ts/call/rtp-port-pool.ts + sip-leg.ts RTP handling.
|
||||
//! This fixes the previous leak where the pool held `Arc<UdpSocket>` and
|
||||
//! `release()` was never called, eventually exhausting the port range and
|
||||
//! causing "503 Service Unavailable" on new calls.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Weak};
|
||||
use tokio::net::UdpSocket;
|
||||
|
||||
/// A single RTP port allocation.
|
||||
@@ -24,7 +26,7 @@ pub struct RtpAllocation {
|
||||
pub struct RtpPortPool {
|
||||
min: u16,
|
||||
max: u16,
|
||||
allocated: HashMap<u16, Arc<UdpSocket>>,
|
||||
allocated: HashMap<u16, Weak<UdpSocket>>,
|
||||
}
|
||||
|
||||
impl RtpPortPool {
|
||||
@@ -41,11 +43,19 @@ impl RtpPortPool {
|
||||
pub async fn allocate(&mut self) -> Option<RtpAllocation> {
|
||||
let mut port = self.min;
|
||||
while port < self.max {
|
||||
// Prune a dead weak ref at this slot: if the last strong Arc
|
||||
// (held by the owning LegInfo) was dropped when the call ended,
|
||||
// the socket is already closed and the slot is free again.
|
||||
if let Some(weak) = self.allocated.get(&port) {
|
||||
if weak.strong_count() == 0 {
|
||||
self.allocated.remove(&port);
|
||||
}
|
||||
}
|
||||
if !self.allocated.contains_key(&port) {
|
||||
match UdpSocket::bind(format!("0.0.0.0:{port}")).await {
|
||||
Ok(sock) => {
|
||||
let sock = Arc::new(sock);
|
||||
self.allocated.insert(port, sock.clone());
|
||||
self.allocated.insert(port, Arc::downgrade(&sock));
|
||||
return Some(RtpAllocation { port, socket: sock });
|
||||
}
|
||||
Err(_) => {
|
||||
@@ -57,83 +67,6 @@ impl RtpPortPool {
|
||||
}
|
||||
None // Pool exhausted.
|
||||
}
|
||||
|
||||
/// Release a port back to the pool.
|
||||
pub fn release(&mut self, port: u16) {
|
||||
self.allocated.remove(&port);
|
||||
// Socket is dropped when the last Arc reference goes away.
|
||||
}
|
||||
|
||||
pub fn size(&self) -> usize {
|
||||
self.allocated.len()
|
||||
}
|
||||
|
||||
pub fn capacity(&self) -> usize {
|
||||
((self.max - self.min) / 2) as usize
|
||||
}
|
||||
}
|
||||
|
||||
/// An active RTP relay between two endpoints.
|
||||
/// Receives on `local_socket` and forwards to `remote_addr`.
|
||||
pub struct RtpRelay {
|
||||
pub local_port: u16,
|
||||
pub local_socket: Arc<UdpSocket>,
|
||||
pub remote_addr: Option<SocketAddr>,
|
||||
/// If set, transcode packets using this codec session before forwarding.
|
||||
pub transcode: Option<TranscodeConfig>,
|
||||
/// Packets received counter.
|
||||
pub pkt_received: u64,
|
||||
/// Packets sent counter.
|
||||
pub pkt_sent: u64,
|
||||
}
|
||||
|
||||
pub struct TranscodeConfig {
|
||||
pub from_pt: u8,
|
||||
pub to_pt: u8,
|
||||
pub session_id: String,
|
||||
}
|
||||
|
||||
impl RtpRelay {
|
||||
pub fn new(port: u16, socket: Arc<UdpSocket>) -> Self {
|
||||
Self {
|
||||
local_port: port,
|
||||
local_socket: socket,
|
||||
remote_addr: None,
|
||||
transcode: None,
|
||||
pkt_received: 0,
|
||||
pkt_sent: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_remote(&mut self, addr: SocketAddr) {
|
||||
self.remote_addr = Some(addr);
|
||||
}
|
||||
}
|
||||
|
||||
/// Send a 1-byte NAT priming packet to open a pinhole.
|
||||
pub async fn prime_nat(socket: &UdpSocket, remote: SocketAddr) {
|
||||
let _ = socket.send_to(&[0u8], remote).await;
|
||||
}
|
||||
|
||||
/// Build an RTP silence frame for PCMU (payload type 0).
|
||||
pub fn silence_frame_pcmu() -> Vec<u8> {
|
||||
// 12-byte RTP header + 160 bytes of µ-law silence (0xFF)
|
||||
let mut frame = vec![0u8; 172];
|
||||
frame[0] = 0x80; // V=2
|
||||
frame[1] = 0; // PT=0 (PCMU)
|
||||
// seq, timestamp, ssrc left as 0 — caller should set these
|
||||
frame[12..].fill(0xFF); // µ-law silence
|
||||
frame
|
||||
}
|
||||
|
||||
/// Build an RTP silence frame for G.722 (payload type 9).
|
||||
pub fn silence_frame_g722() -> Vec<u8> {
|
||||
// 12-byte RTP header + 160 bytes of G.722 silence
|
||||
let mut frame = vec![0u8; 172];
|
||||
frame[0] = 0x80; // V=2
|
||||
frame[1] = 9; // PT=9 (G.722)
|
||||
// G.722 silence: all zeros is valid silence
|
||||
frame
|
||||
}
|
||||
|
||||
/// Build an RTP header with the given parameters.
|
||||
|
||||
476
rust/crates/proxy-engine/src/sip_leg.rs
Normal file
476
rust/crates/proxy-engine/src/sip_leg.rs
Normal file
@@ -0,0 +1,476 @@
|
||||
//! SipLeg — manages one side of a B2BUA call.
|
||||
//!
|
||||
//! Handles the full INVITE lifecycle:
|
||||
//! - Send INVITE with SDP
|
||||
//! - Handle 407 Proxy Authentication (digest auth retry)
|
||||
//! - Handle 200 OK (ACK, learn media endpoint)
|
||||
//! - Handle BYE/CANCEL (teardown)
|
||||
//! - Track SIP dialog state (early → confirmed → terminated)
|
||||
//!
|
||||
//! Ported from ts/call/sip-leg.ts.
|
||||
|
||||
use sip_proto::dialog::{DialogState, SipDialog};
|
||||
use sip_proto::helpers::{
|
||||
build_sdp, compute_digest_auth, generate_branch, generate_tag, parse_digest_challenge,
|
||||
parse_sdp_endpoint, SdpOptions,
|
||||
};
|
||||
use sip_proto::message::{RequestOptions, SipMessage};
|
||||
use std::net::SocketAddr;
|
||||
use tokio::net::UdpSocket;
|
||||
|
||||
/// State of a SIP leg.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum LegState {
|
||||
Inviting,
|
||||
Ringing,
|
||||
Connected,
|
||||
Terminating,
|
||||
Terminated,
|
||||
}
|
||||
|
||||
/// Configuration for creating a SIP leg.
|
||||
pub struct SipLegConfig {
|
||||
/// Proxy LAN IP (for Via, Contact, SDP).
|
||||
pub lan_ip: String,
|
||||
/// Proxy LAN port.
|
||||
pub lan_port: u16,
|
||||
/// Public IP (for provider-facing legs).
|
||||
pub public_ip: Option<String>,
|
||||
/// SIP target endpoint (provider outbound proxy or device address).
|
||||
pub sip_target: SocketAddr,
|
||||
/// Provider credentials (for 407 auth).
|
||||
// username is carried for parity with the provider config but digest auth
|
||||
// rebuilds the username from the registered AOR, so this slot is never read.
|
||||
#[allow(dead_code)]
|
||||
pub username: Option<String>,
|
||||
pub password: Option<String>,
|
||||
pub registered_aor: Option<String>,
|
||||
/// Codec payload types to offer.
|
||||
pub codecs: Vec<u8>,
|
||||
/// Our RTP port for SDP.
|
||||
pub rtp_port: u16,
|
||||
}
|
||||
|
||||
/// A SIP leg with full dialog management.
|
||||
pub struct SipLeg {
|
||||
// Leg identity is tracked via the enclosing LegInfo's key in the call's
|
||||
// leg map; SipLeg itself never reads this field back. Kept to preserve
|
||||
// the (id, config) constructor shape used by the call manager.
|
||||
#[allow(dead_code)]
|
||||
pub id: String,
|
||||
pub state: LegState,
|
||||
pub config: SipLegConfig,
|
||||
pub dialog: Option<SipDialog>,
|
||||
|
||||
/// The INVITE we sent (needed for CANCEL and 407 ACK).
|
||||
invite: Option<SipMessage>,
|
||||
/// Original unauthenticated INVITE (for re-ACKing retransmitted 407s).
|
||||
orig_invite: Option<SipMessage>,
|
||||
/// Whether we've attempted digest auth.
|
||||
auth_attempted: bool,
|
||||
|
||||
/// Remote media endpoint (learned from SDP in 200 OK).
|
||||
pub remote_media: Option<SocketAddr>,
|
||||
}
|
||||
|
||||
impl SipLeg {
|
||||
pub fn new(id: String, config: SipLegConfig) -> Self {
|
||||
Self {
|
||||
id,
|
||||
state: LegState::Inviting,
|
||||
config,
|
||||
dialog: None,
|
||||
invite: None,
|
||||
orig_invite: None,
|
||||
auth_attempted: false,
|
||||
remote_media: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build and send an INVITE to establish this leg.
|
||||
pub async fn send_invite(
|
||||
&mut self,
|
||||
from_uri: &str,
|
||||
to_uri: &str,
|
||||
sip_call_id: &str,
|
||||
socket: &UdpSocket,
|
||||
) {
|
||||
let ip = self
|
||||
.config
|
||||
.public_ip
|
||||
.as_deref()
|
||||
.unwrap_or(&self.config.lan_ip);
|
||||
|
||||
let sdp = build_sdp(&SdpOptions {
|
||||
ip,
|
||||
port: self.config.rtp_port,
|
||||
payload_types: &self.config.codecs,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let invite = SipMessage::create_request(
|
||||
"INVITE",
|
||||
to_uri,
|
||||
RequestOptions {
|
||||
via_host: ip.to_string(),
|
||||
via_port: self.config.lan_port,
|
||||
via_transport: None,
|
||||
via_branch: Some(generate_branch()),
|
||||
from_uri: from_uri.to_string(),
|
||||
from_display_name: None,
|
||||
from_tag: Some(generate_tag()),
|
||||
to_uri: to_uri.to_string(),
|
||||
to_display_name: None,
|
||||
to_tag: None,
|
||||
call_id: Some(sip_call_id.to_string()),
|
||||
cseq: Some(1),
|
||||
contact: Some(format!("<sip:{ip}:{}>", self.config.lan_port)),
|
||||
max_forwards: Some(70),
|
||||
body: Some(sdp),
|
||||
content_type: Some("application/sdp".to_string()),
|
||||
extra_headers: Some(vec![
|
||||
("User-Agent".to_string(), "SipRouter/1.0".to_string()),
|
||||
]),
|
||||
},
|
||||
);
|
||||
|
||||
self.dialog = Some(SipDialog::from_uac_invite(&invite, ip, self.config.lan_port));
|
||||
self.invite = Some(invite.clone());
|
||||
self.state = LegState::Inviting;
|
||||
|
||||
let _ = socket.send_to(&invite.serialize(), self.config.sip_target).await;
|
||||
}
|
||||
|
||||
/// Handle an incoming SIP message routed to this leg.
|
||||
/// Returns an optional reply to send (e.g. ACK, auth retry INVITE).
|
||||
pub fn handle_message(&mut self, msg: &SipMessage) -> SipLegAction {
|
||||
if msg.is_response() {
|
||||
self.handle_response(msg)
|
||||
} else {
|
||||
self.handle_request(msg)
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_response(&mut self, msg: &SipMessage) -> SipLegAction {
|
||||
let code = msg.status_code().unwrap_or(0);
|
||||
let cseq_method = msg.cseq_method().unwrap_or("").to_uppercase();
|
||||
|
||||
if cseq_method != "INVITE" {
|
||||
return SipLegAction::None;
|
||||
}
|
||||
|
||||
// Handle retransmitted 407 for the original unauthenticated INVITE.
|
||||
if self.auth_attempted {
|
||||
if let Some(dialog) = &self.dialog {
|
||||
let response_cseq: u32 = msg
|
||||
.get_header("CSeq")
|
||||
.and_then(|s| s.split_whitespace().next())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
if response_cseq < dialog.local_cseq && code >= 400 {
|
||||
// ACK the retransmitted error response.
|
||||
if let Some(orig) = &self.orig_invite {
|
||||
let ack = build_non_2xx_ack(orig, msg);
|
||||
return SipLegAction::Send(ack.serialize());
|
||||
}
|
||||
return SipLegAction::None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle 407 Proxy Authentication Required.
|
||||
if code == 407 {
|
||||
return self.handle_auth_challenge(msg);
|
||||
}
|
||||
|
||||
// Update dialog state.
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.process_response(msg);
|
||||
}
|
||||
|
||||
if code == 180 || code == 183 {
|
||||
self.state = LegState::Ringing;
|
||||
SipLegAction::StateChange(LegState::Ringing)
|
||||
} else if code >= 200 && code < 300 {
|
||||
// ACK the 200 OK.
|
||||
let ack_buf = if let Some(dialog) = &self.dialog {
|
||||
let ack = dialog.create_ack();
|
||||
Some(ack.serialize())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// If already connected (200 retransmit), just re-ACK.
|
||||
if self.state == LegState::Connected {
|
||||
return match ack_buf {
|
||||
Some(buf) => SipLegAction::Send(buf),
|
||||
None => SipLegAction::None,
|
||||
};
|
||||
}
|
||||
|
||||
// Learn media endpoint from SDP.
|
||||
if msg.has_sdp_body() {
|
||||
if let Some(ep) = parse_sdp_endpoint(&msg.body) {
|
||||
if let Ok(addr) = format!("{}:{}", ep.address, ep.port).parse() {
|
||||
self.remote_media = Some(addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.state = LegState::Connected;
|
||||
|
||||
match ack_buf {
|
||||
Some(buf) => SipLegAction::ConnectedWithAck(buf),
|
||||
None => SipLegAction::StateChange(LegState::Connected),
|
||||
}
|
||||
} else if code >= 300 {
|
||||
self.state = LegState::Terminated;
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.terminate();
|
||||
}
|
||||
SipLegAction::Terminated(format!("rejected_{code}"))
|
||||
} else {
|
||||
SipLegAction::None // 1xx provisional
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_auth_challenge(&mut self, msg: &SipMessage) -> SipLegAction {
|
||||
if self.auth_attempted {
|
||||
self.state = LegState::Terminated;
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.terminate();
|
||||
}
|
||||
return SipLegAction::Terminated("auth_rejected".to_string());
|
||||
}
|
||||
self.auth_attempted = true;
|
||||
|
||||
let challenge_header = match msg.get_header("Proxy-Authenticate") {
|
||||
Some(h) => h,
|
||||
None => {
|
||||
self.state = LegState::Terminated;
|
||||
return SipLegAction::Terminated("407_no_challenge".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let challenge = match parse_digest_challenge(challenge_header) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
self.state = LegState::Terminated;
|
||||
return SipLegAction::Terminated("407_bad_challenge".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let password = match &self.config.password {
|
||||
Some(p) => p.clone(),
|
||||
None => {
|
||||
self.state = LegState::Terminated;
|
||||
return SipLegAction::Terminated("407_no_password".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let aor = match &self.config.registered_aor {
|
||||
Some(a) => a.clone(),
|
||||
None => {
|
||||
self.state = LegState::Terminated;
|
||||
return SipLegAction::Terminated("407_no_aor".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let username = aor
|
||||
.trim_start_matches("sip:")
|
||||
.trim_start_matches("sips:")
|
||||
.split('@')
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let dest_uri = self
|
||||
.invite
|
||||
.as_ref()
|
||||
.and_then(|i| i.request_uri())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let auth_value = compute_digest_auth(
|
||||
&username,
|
||||
&password,
|
||||
&challenge.realm,
|
||||
&challenge.nonce,
|
||||
"INVITE",
|
||||
&dest_uri,
|
||||
challenge.algorithm.as_deref(),
|
||||
challenge.opaque.as_deref(),
|
||||
);
|
||||
|
||||
// ACK the 407.
|
||||
let mut ack_buf = None;
|
||||
if let Some(invite) = &self.invite {
|
||||
let ack = build_non_2xx_ack(invite, msg);
|
||||
ack_buf = Some(ack.serialize());
|
||||
}
|
||||
|
||||
// Save original INVITE for retransmission handling.
|
||||
self.orig_invite = self.invite.clone();
|
||||
|
||||
// Build authenticated INVITE with same From tag, CSeq=2.
|
||||
let ip = self
|
||||
.config
|
||||
.public_ip
|
||||
.as_deref()
|
||||
.unwrap_or(&self.config.lan_ip);
|
||||
let from_tag = self
|
||||
.dialog
|
||||
.as_ref()
|
||||
.map(|d| d.local_tag.clone())
|
||||
.unwrap_or_else(generate_tag);
|
||||
|
||||
let sdp = build_sdp(&SdpOptions {
|
||||
ip,
|
||||
port: self.config.rtp_port,
|
||||
payload_types: &self.config.codecs,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let call_id = self
|
||||
.dialog
|
||||
.as_ref()
|
||||
.map(|d| d.call_id.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
let invite_auth = SipMessage::create_request(
|
||||
"INVITE",
|
||||
&dest_uri,
|
||||
RequestOptions {
|
||||
via_host: ip.to_string(),
|
||||
via_port: self.config.lan_port,
|
||||
via_transport: None,
|
||||
via_branch: Some(generate_branch()),
|
||||
from_uri: aor,
|
||||
from_display_name: None,
|
||||
from_tag: Some(from_tag),
|
||||
to_uri: dest_uri.clone(),
|
||||
to_display_name: None,
|
||||
to_tag: None,
|
||||
call_id: Some(call_id),
|
||||
cseq: Some(2),
|
||||
contact: Some(format!("<sip:{ip}:{}>", self.config.lan_port)),
|
||||
max_forwards: Some(70),
|
||||
body: Some(sdp),
|
||||
content_type: Some("application/sdp".to_string()),
|
||||
extra_headers: Some(vec![
|
||||
("Proxy-Authorization".to_string(), auth_value),
|
||||
("User-Agent".to_string(), "SipRouter/1.0".to_string()),
|
||||
]),
|
||||
},
|
||||
);
|
||||
|
||||
self.invite = Some(invite_auth.clone());
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.local_cseq = 2;
|
||||
}
|
||||
|
||||
// Return both the ACK for the 407 and the new authenticated INVITE.
|
||||
let invite_buf = invite_auth.serialize();
|
||||
SipLegAction::AuthRetry {
|
||||
ack_407: ack_buf,
|
||||
invite_with_auth: invite_buf,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_request(&mut self, msg: &SipMessage) -> SipLegAction {
|
||||
let method = msg.method().unwrap_or("");
|
||||
|
||||
if method == "BYE" {
|
||||
let ok = SipMessage::create_response(200, "OK", msg, None);
|
||||
self.state = LegState::Terminated;
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.terminate();
|
||||
}
|
||||
return SipLegAction::SendAndTerminate(ok.serialize(), "bye".to_string());
|
||||
}
|
||||
|
||||
if method == "INFO" {
|
||||
let ok = SipMessage::create_response(200, "OK", msg, None);
|
||||
return SipLegAction::Send(ok.serialize());
|
||||
}
|
||||
|
||||
SipLegAction::None
|
||||
}
|
||||
|
||||
/// Build a BYE or CANCEL to tear down this leg.
|
||||
pub fn build_hangup(&mut self) -> Option<Vec<u8>> {
|
||||
let dialog = self.dialog.as_mut()?;
|
||||
|
||||
let msg = if dialog.state == DialogState::Confirmed {
|
||||
dialog.create_request("BYE", None, None, None)
|
||||
} else if dialog.state == DialogState::Early {
|
||||
if let Some(invite) = &self.invite {
|
||||
dialog.create_cancel(invite)
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
self.state = LegState::Terminating;
|
||||
dialog.terminate();
|
||||
Some(msg.serialize())
|
||||
}
|
||||
}
|
||||
|
||||
/// Actions produced by the SipLeg message handler.
|
||||
pub enum SipLegAction {
|
||||
/// No action needed.
|
||||
None,
|
||||
/// Send a SIP message (ACK, 200 OK to INFO, etc.).
|
||||
Send(Vec<u8>),
|
||||
/// Leg state changed.
|
||||
StateChange(LegState),
|
||||
/// Connected — send this ACK.
|
||||
ConnectedWithAck(Vec<u8>),
|
||||
/// Terminated with a reason.
|
||||
Terminated(String),
|
||||
/// Send 200 OK and terminate.
|
||||
SendAndTerminate(Vec<u8>, String),
|
||||
/// 407 auth retry — send ACK for 407, then send new INVITE with auth.
|
||||
AuthRetry {
|
||||
ack_407: Option<Vec<u8>>,
|
||||
invite_with_auth: Vec<u8>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Build an ACK for a non-2xx response (same transaction as the INVITE).
|
||||
fn build_non_2xx_ack(original_invite: &SipMessage, response: &SipMessage) -> SipMessage {
|
||||
let via = original_invite.get_header("Via").unwrap_or("").to_string();
|
||||
let from = original_invite
|
||||
.get_header("From")
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let to = response.get_header("To").unwrap_or("").to_string();
|
||||
let call_id = original_invite.call_id().to_string();
|
||||
let cseq_num: u32 = original_invite
|
||||
.get_header("CSeq")
|
||||
.and_then(|s| s.split_whitespace().next())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(1);
|
||||
|
||||
let ruri = original_invite
|
||||
.request_uri()
|
||||
.unwrap_or("sip:unknown")
|
||||
.to_string();
|
||||
|
||||
SipMessage::new(
|
||||
format!("ACK {ruri} SIP/2.0"),
|
||||
vec![
|
||||
("Via".to_string(), via),
|
||||
("From".to_string(), from),
|
||||
("To".to_string(), to),
|
||||
("Call-ID".to_string(), call_id),
|
||||
("CSeq".to_string(), format!("{cseq_num} ACK")),
|
||||
("Max-Forwards".to_string(), "70".to_string()),
|
||||
("Content-Length".to_string(), "0".to_string()),
|
||||
],
|
||||
String::new(),
|
||||
)
|
||||
}
|
||||
@@ -27,22 +27,6 @@ impl SipTransport {
|
||||
self.socket.clone()
|
||||
}
|
||||
|
||||
/// Send a raw SIP message to a destination.
|
||||
pub async fn send_to(&self, data: &[u8], dest: SocketAddr) -> Result<usize, String> {
|
||||
self.socket
|
||||
.send_to(data, dest)
|
||||
.await
|
||||
.map_err(|e| format!("send to {dest}: {e}"))
|
||||
}
|
||||
|
||||
/// Send a raw SIP message to an address:port pair.
|
||||
pub async fn send_to_addr(&self, data: &[u8], addr: &str, port: u16) -> Result<usize, String> {
|
||||
let dest: SocketAddr = format!("{addr}:{port}")
|
||||
.parse()
|
||||
.map_err(|e| format!("bad address {addr}:{port}: {e}"))?;
|
||||
self.send_to(data, dest).await
|
||||
}
|
||||
|
||||
/// Spawn the UDP receive loop. Calls the handler for every received packet.
|
||||
pub fn spawn_receiver<F>(
|
||||
&self,
|
||||
|
||||
143
rust/crates/proxy-engine/src/tool_leg.rs
Normal file
143
rust/crates/proxy-engine/src/tool_leg.rs
Normal file
@@ -0,0 +1,143 @@
|
||||
//! Tool leg consumers — background tasks that process per-source unmerged audio.
|
||||
//!
|
||||
//! Tool legs are observer legs that receive individual audio streams from each
|
||||
//! participant in a call. The mixer pipes `ToolAudioBatch` every 20ms containing
|
||||
//! each participant's decoded PCM@48kHz f32 tagged with source leg ID.
|
||||
//!
|
||||
//! Consumers:
|
||||
//! - **Recording**: writes per-source WAV files for speaker-separated recording.
|
||||
//! - **Transcription**: stub for future Whisper integration (accumulates audio in Rust).
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::mixer::ToolAudioBatch;
|
||||
use crate::recorder::Recorder;
|
||||
use std::collections::HashMap;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Recording consumer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Spawn a recording tool leg that writes per-source WAV files.
|
||||
///
|
||||
/// Returns the channel sender (for the mixer to send batches) and the task handle.
|
||||
/// When the channel is closed (tool leg removed), all WAV files are finalized
|
||||
/// and a `tool_recording_done` event is emitted.
|
||||
pub fn spawn_recording_tool(
|
||||
tool_leg_id: String,
|
||||
call_id: String,
|
||||
base_dir: String,
|
||||
out_tx: OutTx,
|
||||
) -> (mpsc::Sender<ToolAudioBatch>, JoinHandle<()>) {
|
||||
let (tx, mut rx) = mpsc::channel::<ToolAudioBatch>(64);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let mut recorders: HashMap<String, Recorder> = HashMap::new();
|
||||
|
||||
while let Some(batch) = rx.recv().await {
|
||||
for source in &batch.sources {
|
||||
// Skip silence-only frames (near-zero = no audio activity).
|
||||
let has_audio = source.pcm_48k.iter().any(|&s| s.abs() > 1e-6);
|
||||
if !has_audio && !recorders.contains_key(&source.leg_id) {
|
||||
continue; // Don't create a file for silence-only sources.
|
||||
}
|
||||
|
||||
let recorder = recorders.entry(source.leg_id.clone()).or_insert_with(|| {
|
||||
let path = format!("{}/{}-{}.wav", base_dir, call_id, source.leg_id);
|
||||
Recorder::new_pcm(&path, 48000, None).unwrap_or_else(|e| {
|
||||
panic!("failed to create recorder for {}: {e}", source.leg_id);
|
||||
})
|
||||
});
|
||||
|
||||
// Convert f32 [-1.0, 1.0] to i16 for WAV writing.
|
||||
let pcm_i16: Vec<i16> = source.pcm_48k
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect();
|
||||
if !recorder.write_pcm(&pcm_i16) {
|
||||
// Max duration reached — stop recording this source.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Channel closed — finalize all recordings.
|
||||
let mut files = Vec::new();
|
||||
for (leg_id, rec) in recorders {
|
||||
let result = rec.stop();
|
||||
files.push(serde_json::json!({
|
||||
"source_leg_id": leg_id,
|
||||
"file_path": result.file_path,
|
||||
"duration_ms": result.duration_ms,
|
||||
}));
|
||||
}
|
||||
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"tool_recording_done",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"tool_leg_id": tool_leg_id,
|
||||
"files": files,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
(tx, handle)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Transcription consumer (stub — real plumbing, stub consumer)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Spawn a transcription tool leg.
|
||||
///
|
||||
/// The plumbing is fully real: it receives per-source unmerged PCM@48kHz f32 from
|
||||
/// the mixer every 20ms. The consumer is a stub that accumulates audio and
|
||||
/// reports metadata on close. Future: will stream to a Whisper HTTP endpoint.
|
||||
pub fn spawn_transcription_tool(
|
||||
tool_leg_id: String,
|
||||
call_id: String,
|
||||
out_tx: OutTx,
|
||||
) -> (mpsc::Sender<ToolAudioBatch>, JoinHandle<()>) {
|
||||
let (tx, mut rx) = mpsc::channel::<ToolAudioBatch>(64);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
// Track per-source sample counts for duration reporting.
|
||||
let mut source_samples: HashMap<String, u64> = HashMap::new();
|
||||
|
||||
while let Some(batch) = rx.recv().await {
|
||||
for source in &batch.sources {
|
||||
*source_samples.entry(source.leg_id.clone()).or_insert(0) +=
|
||||
source.pcm_48k.len() as u64;
|
||||
|
||||
// TODO: Future — accumulate chunks and stream to Whisper endpoint.
|
||||
// For now, the audio is received and counted but not processed.
|
||||
}
|
||||
}
|
||||
|
||||
// Channel closed — report metadata.
|
||||
let sources: Vec<serde_json::Value> = source_samples
|
||||
.iter()
|
||||
.map(|(leg_id, samples)| {
|
||||
serde_json::json!({
|
||||
"source_leg_id": leg_id,
|
||||
"duration_ms": (samples * 1000) / 48000,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"tool_transcription_done",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"tool_leg_id": tool_leg_id,
|
||||
"sources": sources,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
(tx, handle)
|
||||
}
|
||||
138
rust/crates/proxy-engine/src/tts.rs
Normal file
138
rust/crates/proxy-engine/src/tts.rs
Normal file
@@ -0,0 +1,138 @@
|
||||
//! Text-to-speech engine — synthesizes text to WAV files using Kokoro neural TTS.
|
||||
//!
|
||||
//! The model is loaded lazily on first use. If the model/voices files are not
|
||||
//! present, the generate command returns an error and the TS side falls back
|
||||
//! to espeak-ng.
|
||||
|
||||
use kokoro_tts::{KokoroTts, Voice};
|
||||
use std::path::Path;
|
||||
|
||||
/// Wraps the Kokoro TTS engine with lazy model loading.
|
||||
pub struct TtsEngine {
|
||||
tts: Option<KokoroTts>,
|
||||
/// Path that was used to load the current model (for cache invalidation).
|
||||
loaded_model_path: String,
|
||||
loaded_voices_path: String,
|
||||
}
|
||||
|
||||
impl TtsEngine {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
tts: None,
|
||||
loaded_model_path: String::new(),
|
||||
loaded_voices_path: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a WAV file from text.
|
||||
///
|
||||
/// Params (from IPC JSON):
|
||||
/// - `model`: path to the ONNX model file
|
||||
/// - `voices`: path to the voices.bin file
|
||||
/// - `voice`: voice name (e.g. "af_bella")
|
||||
/// - `text`: text to synthesize
|
||||
/// - `output`: output WAV file path
|
||||
pub async fn generate(&mut self, params: &serde_json::Value) -> Result<serde_json::Value, String> {
|
||||
let model_path = params.get("model").and_then(|v| v.as_str())
|
||||
.ok_or("missing 'model' param")?;
|
||||
let voices_path = params.get("voices").and_then(|v| v.as_str())
|
||||
.ok_or("missing 'voices' param")?;
|
||||
let voice_name = params.get("voice").and_then(|v| v.as_str())
|
||||
.unwrap_or("af_bella");
|
||||
let text = params.get("text").and_then(|v| v.as_str())
|
||||
.ok_or("missing 'text' param")?;
|
||||
let output_path = params.get("output").and_then(|v| v.as_str())
|
||||
.ok_or("missing 'output' param")?;
|
||||
|
||||
if text.is_empty() {
|
||||
return Err("empty text".into());
|
||||
}
|
||||
|
||||
// Check that model/voices files exist.
|
||||
if !Path::new(model_path).exists() {
|
||||
return Err(format!("model not found: {model_path}"));
|
||||
}
|
||||
if !Path::new(voices_path).exists() {
|
||||
return Err(format!("voices not found: {voices_path}"));
|
||||
}
|
||||
|
||||
// Lazy-load or reload if paths changed.
|
||||
if self.tts.is_none()
|
||||
|| self.loaded_model_path != model_path
|
||||
|| self.loaded_voices_path != voices_path
|
||||
{
|
||||
eprintln!("[tts] loading model: {model_path}");
|
||||
let tts = KokoroTts::new(model_path, voices_path)
|
||||
.await
|
||||
.map_err(|e| format!("model load failed: {e:?}"))?;
|
||||
self.tts = Some(tts);
|
||||
self.loaded_model_path = model_path.to_string();
|
||||
self.loaded_voices_path = voices_path.to_string();
|
||||
}
|
||||
|
||||
let tts = self.tts.as_ref().unwrap();
|
||||
let voice = select_voice(voice_name);
|
||||
|
||||
eprintln!("[tts] synthesizing voice '{voice_name}': \"{text}\"");
|
||||
let (samples, duration) = tts.synth(text, voice)
|
||||
.await
|
||||
.map_err(|e| format!("synthesis failed: {e:?}"))?;
|
||||
eprintln!("[tts] synthesized {} samples in {duration:?}", samples.len());
|
||||
|
||||
// Write 24kHz 16-bit mono WAV.
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate: 24000,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let mut writer = hound::WavWriter::create(output_path, spec)
|
||||
.map_err(|e| format!("WAV create failed: {e}"))?;
|
||||
for &sample in &samples {
|
||||
let s16 = (sample * 32767.0).round().clamp(-32768.0, 32767.0) as i16;
|
||||
writer.write_sample(s16).map_err(|e| format!("WAV write: {e}"))?;
|
||||
}
|
||||
writer.finalize().map_err(|e| format!("WAV finalize: {e}"))?;
|
||||
|
||||
eprintln!("[tts] wrote {output_path}");
|
||||
Ok(serde_json::json!({ "output": output_path }))
|
||||
}
|
||||
}
|
||||
|
||||
/// Map voice name string to Kokoro Voice enum variant.
|
||||
fn select_voice(name: &str) -> Voice {
|
||||
match name {
|
||||
"af_bella" => Voice::AfBella(1.0),
|
||||
"af_heart" => Voice::AfHeart(1.0),
|
||||
"af_jessica" => Voice::AfJessica(1.0),
|
||||
"af_nicole" => Voice::AfNicole(1.0),
|
||||
"af_nova" => Voice::AfNova(1.0),
|
||||
"af_sarah" => Voice::AfSarah(1.0),
|
||||
"af_sky" => Voice::AfSky(1.0),
|
||||
"af_river" => Voice::AfRiver(1.0),
|
||||
"af_alloy" => Voice::AfAlloy(1.0),
|
||||
"af_aoede" => Voice::AfAoede(1.0),
|
||||
"af_kore" => Voice::AfKore(1.0),
|
||||
"am_adam" => Voice::AmAdam(1.0),
|
||||
"am_echo" => Voice::AmEcho(1.0),
|
||||
"am_eric" => Voice::AmEric(1.0),
|
||||
"am_fenrir" => Voice::AmFenrir(1.0),
|
||||
"am_liam" => Voice::AmLiam(1.0),
|
||||
"am_michael" => Voice::AmMichael(1.0),
|
||||
"am_onyx" => Voice::AmOnyx(1.0),
|
||||
"am_puck" => Voice::AmPuck(1.0),
|
||||
"bf_alice" => Voice::BfAlice(1.0),
|
||||
"bf_emma" => Voice::BfEmma(1.0),
|
||||
"bf_isabella" => Voice::BfIsabella(1.0),
|
||||
"bf_lily" => Voice::BfLily(1.0),
|
||||
"bm_daniel" => Voice::BmDaniel(1.0),
|
||||
"bm_fable" => Voice::BmFable(1.0),
|
||||
"bm_george" => Voice::BmGeorge(1.0),
|
||||
"bm_lewis" => Voice::BmLewis(1.0),
|
||||
_ => {
|
||||
eprintln!("[tts] unknown voice '{name}', falling back to af_bella");
|
||||
Voice::AfBella(1.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,16 +1,17 @@
|
||||
//! WebRTC engine — manages browser PeerConnections with SIP audio bridging.
|
||||
//! WebRTC engine — manages browser PeerConnections.
|
||||
//!
|
||||
//! Browser Opus audio → Rust PeerConnection → transcode via codec-lib → SIP RTP
|
||||
//! SIP RTP → transcode via codec-lib → Rust PeerConnection → Browser Opus
|
||||
//! Audio bridging is now channel-based:
|
||||
//! - Browser Opus audio → on_track → mixer inbound channel
|
||||
//! - Mixer outbound channel → Opus RTP → TrackLocalStaticRTP → browser
|
||||
//!
|
||||
//! The mixer handles all transcoding. The WebRTC engine just shuttles raw Opus.
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::rtp::{build_rtp_header, rtp_clock_increment};
|
||||
use codec_lib::{TranscodeState, PT_G722, PT_OPUS};
|
||||
use crate::mixer::RtpPacket;
|
||||
use codec_lib::PT_OPUS;
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
use webrtc::api::media_engine::MediaEngine;
|
||||
use webrtc::api::APIBuilder;
|
||||
use webrtc::ice_transport::ice_candidate::RTCIceCandidateInit;
|
||||
@@ -22,24 +23,14 @@ use webrtc::rtp_transceiver::rtp_codec::RTCRtpCodecCapability;
|
||||
use webrtc::track::track_local::track_local_static_rtp::TrackLocalStaticRTP;
|
||||
use webrtc::track::track_local::{TrackLocal, TrackLocalWriter};
|
||||
|
||||
/// SIP-side bridge info for a WebRTC session.
|
||||
#[derive(Clone)]
|
||||
pub struct SipBridgeInfo {
|
||||
/// Provider's media endpoint (RTP destination).
|
||||
pub provider_media: SocketAddr,
|
||||
/// Provider's codec payload type (e.g. 9 for G.722).
|
||||
pub sip_pt: u8,
|
||||
/// The SIP UDP socket for sending RTP to the provider.
|
||||
pub sip_socket: Arc<UdpSocket>,
|
||||
}
|
||||
|
||||
/// A managed WebRTC session.
|
||||
struct WebRtcSession {
|
||||
pc: Arc<RTCPeerConnection>,
|
||||
local_track: Arc<TrackLocalStaticRTP>,
|
||||
call_id: Option<String>,
|
||||
/// SIP bridge — set when the session is linked to a call.
|
||||
sip_bridge: Arc<Mutex<Option<SipBridgeInfo>>>,
|
||||
/// Channel sender for forwarding browser Opus audio to the mixer.
|
||||
/// Set when the session is linked to a call via link_to_mixer().
|
||||
mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>>,
|
||||
}
|
||||
|
||||
/// Manages all WebRTC sessions.
|
||||
@@ -56,7 +47,7 @@ impl WebRtcEngine {
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle a WebRTC offer from a browser.
|
||||
/// Handle a WebRTC offer from a browser — create PeerConnection, return SDP answer.
|
||||
pub async fn handle_offer(
|
||||
&mut self,
|
||||
session_id: &str,
|
||||
@@ -99,8 +90,9 @@ impl WebRtcEngine {
|
||||
.await
|
||||
.map_err(|e| format!("add track: {e}"))?;
|
||||
|
||||
// Shared SIP bridge info (populated when linked to a call).
|
||||
let sip_bridge: Arc<Mutex<Option<SipBridgeInfo>>> = Arc::new(Mutex::new(None));
|
||||
// Shared mixer channel sender (populated when linked to a call).
|
||||
let mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>> =
|
||||
Arc::new(Mutex::new(None));
|
||||
|
||||
// ICE candidate handler.
|
||||
let out_tx_ice = self.out_tx.clone();
|
||||
@@ -151,14 +143,14 @@ impl WebRtcEngine {
|
||||
}));
|
||||
|
||||
// Track handler — receives Opus audio from the browser.
|
||||
// When SIP bridge is set, transcodes and forwards to provider.
|
||||
// Forwards raw Opus payload to the mixer channel (when linked).
|
||||
let out_tx_track = self.out_tx.clone();
|
||||
let sid_track = session_id.to_string();
|
||||
let sip_bridge_for_track = sip_bridge.clone();
|
||||
let mixer_tx_for_track = mixer_tx.clone();
|
||||
pc.on_track(Box::new(move |track, _receiver, _transceiver| {
|
||||
let out_tx = out_tx_track.clone();
|
||||
let sid = sid_track.clone();
|
||||
let bridge = sip_bridge_for_track.clone();
|
||||
let mixer_tx = mixer_tx_for_track.clone();
|
||||
Box::pin(async move {
|
||||
let codec_info = track.codec();
|
||||
emit_event(
|
||||
@@ -171,8 +163,8 @@ impl WebRtcEngine {
|
||||
}),
|
||||
);
|
||||
|
||||
// Spawn the browser→SIP audio forwarding task.
|
||||
tokio::spawn(browser_to_sip_loop(track, bridge, out_tx, sid));
|
||||
// Spawn browser→mixer forwarding task.
|
||||
tokio::spawn(browser_to_mixer_loop(track, mixer_tx, out_tx, sid));
|
||||
})
|
||||
}));
|
||||
|
||||
@@ -199,67 +191,41 @@ impl WebRtcEngine {
|
||||
pc,
|
||||
local_track,
|
||||
call_id: None,
|
||||
sip_bridge,
|
||||
mixer_tx,
|
||||
},
|
||||
);
|
||||
|
||||
Ok(answer_sdp)
|
||||
}
|
||||
|
||||
/// Link a WebRTC session to a SIP call — sets up the audio bridge.
|
||||
pub async fn link_to_sip(
|
||||
/// Link a WebRTC session to a call's mixer via channels.
|
||||
/// - `inbound_tx`: browser audio goes TO the mixer through this channel
|
||||
/// - `outbound_rx`: mixed audio comes FROM the mixer through this channel
|
||||
pub async fn link_to_mixer(
|
||||
&mut self,
|
||||
session_id: &str,
|
||||
call_id: &str,
|
||||
bridge_info: SipBridgeInfo,
|
||||
inbound_tx: mpsc::Sender<RtpPacket>,
|
||||
outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
) -> bool {
|
||||
if let Some(session) = self.sessions.get_mut(session_id) {
|
||||
session.call_id = Some(call_id.to_string());
|
||||
let mut bridge = session.sip_bridge.lock().await;
|
||||
*bridge = Some(bridge_info);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
let session = match self.sessions.get_mut(session_id) {
|
||||
Some(s) => s,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
/// Send transcoded audio from the SIP side to the browser.
|
||||
/// Called by the RTP relay when it receives a packet from the provider.
|
||||
pub async fn forward_sip_to_browser(
|
||||
&self,
|
||||
session_id: &str,
|
||||
sip_rtp_payload: &[u8],
|
||||
sip_pt: u8,
|
||||
) -> Result<(), String> {
|
||||
let session = self
|
||||
.sessions
|
||||
.get(session_id)
|
||||
.ok_or_else(|| format!("session {session_id} not found"))?;
|
||||
session.call_id = Some(call_id.to_string());
|
||||
|
||||
// Transcode SIP codec → Opus.
|
||||
// We create a temporary TranscodeState per packet for simplicity.
|
||||
// TODO: Use a per-session persistent state for proper codec continuity.
|
||||
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec: {e}"))?;
|
||||
let opus_payload = transcoder
|
||||
.transcode(sip_rtp_payload, sip_pt, PT_OPUS, Some("to_browser"))
|
||||
.map_err(|e| format!("transcode: {e}"))?;
|
||||
|
||||
if opus_payload.is_empty() {
|
||||
return Ok(());
|
||||
// Set the mixer sender so the on_track loop starts forwarding.
|
||||
{
|
||||
let mut tx = session.mixer_tx.lock().await;
|
||||
*tx = Some(inbound_tx);
|
||||
}
|
||||
|
||||
// Build RTP header for Opus.
|
||||
// TODO: Track seq/ts/ssrc per session for proper continuity.
|
||||
let header = build_rtp_header(PT_OPUS, 0, 0, 0);
|
||||
let mut packet = header.to_vec();
|
||||
packet.extend_from_slice(&opus_payload);
|
||||
// Spawn mixer→browser outbound task.
|
||||
let local_track = session.local_track.clone();
|
||||
tokio::spawn(mixer_to_browser_loop(outbound_rx, local_track));
|
||||
|
||||
session
|
||||
.local_track
|
||||
.write(&packet)
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(|e| format!("write: {e}"))
|
||||
true
|
||||
}
|
||||
|
||||
pub async fn add_ice_candidate(
|
||||
@@ -294,90 +260,51 @@ impl WebRtcEngine {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn has_session(&self, session_id: &str) -> bool {
|
||||
self.sessions.contains_key(session_id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Browser → SIP audio forwarding loop.
|
||||
/// Reads Opus RTP from the browser, transcodes to the SIP codec, sends to provider.
|
||||
async fn browser_to_sip_loop(
|
||||
/// Browser → Mixer audio forwarding loop.
|
||||
/// Reads Opus RTP from the browser track, sends raw Opus payload to the mixer channel.
|
||||
async fn browser_to_mixer_loop(
|
||||
track: Arc<webrtc::track::track_remote::TrackRemote>,
|
||||
sip_bridge: Arc<Mutex<Option<SipBridgeInfo>>>,
|
||||
mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>>,
|
||||
out_tx: OutTx,
|
||||
session_id: String,
|
||||
) {
|
||||
// Create a persistent codec state for this direction.
|
||||
let mut transcoder = match TranscodeState::new() {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_error",
|
||||
serde_json::json!({ "session_id": session_id, "error": format!("codec init: {e}") }),
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let mut buf = vec![0u8; 1500];
|
||||
let mut count = 0u64;
|
||||
let mut to_sip_seq: u16 = 0;
|
||||
let mut to_sip_ts: u32 = 0;
|
||||
let to_sip_ssrc: u32 = rand::random();
|
||||
|
||||
loop {
|
||||
match track.read(&mut buf).await {
|
||||
Ok((rtp_packet, _attributes)) => {
|
||||
count += 1;
|
||||
|
||||
// Get the SIP bridge info (may not be set yet if call isn't linked).
|
||||
let bridge = sip_bridge.lock().await;
|
||||
let bridge_info = match bridge.as_ref() {
|
||||
Some(b) => b.clone(),
|
||||
None => continue, // Not linked to a SIP call yet — drop the packet.
|
||||
};
|
||||
drop(bridge); // Release lock before doing I/O.
|
||||
|
||||
// Extract Opus payload from the RTP packet (skip 12-byte header).
|
||||
let payload = &rtp_packet.payload;
|
||||
if payload.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Transcode Opus → SIP codec (e.g. G.722).
|
||||
let sip_payload = match transcoder.transcode(
|
||||
payload,
|
||||
PT_OPUS,
|
||||
bridge_info.sip_pt,
|
||||
Some("to_sip"),
|
||||
) {
|
||||
Ok(p) if !p.is_empty() => p,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Build SIP RTP packet.
|
||||
let header = build_rtp_header(bridge_info.sip_pt, to_sip_seq, to_sip_ts, to_sip_ssrc);
|
||||
let mut sip_rtp = header.to_vec();
|
||||
sip_rtp.extend_from_slice(&sip_payload);
|
||||
|
||||
to_sip_seq = to_sip_seq.wrapping_add(1);
|
||||
to_sip_ts = to_sip_ts.wrapping_add(rtp_clock_increment(bridge_info.sip_pt));
|
||||
|
||||
// Send to provider.
|
||||
let _ = bridge_info
|
||||
.sip_socket
|
||||
.send_to(&sip_rtp, bridge_info.provider_media)
|
||||
.await;
|
||||
// Send raw Opus payload to mixer (if linked).
|
||||
let tx = mixer_tx.lock().await;
|
||||
if let Some(ref tx) = *tx {
|
||||
let _ = tx
|
||||
.send(RtpPacket {
|
||||
payload: payload.to_vec(),
|
||||
payload_type: PT_OPUS,
|
||||
marker: rtp_packet.header.marker,
|
||||
seq: rtp_packet.header.sequence_number,
|
||||
timestamp: rtp_packet.header.timestamp,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
drop(tx);
|
||||
|
||||
if count == 1 || count == 50 || count % 500 == 0 {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_audio_tx",
|
||||
"webrtc_audio_rx",
|
||||
serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"direction": "browser_to_sip",
|
||||
"direction": "browser_to_mixer",
|
||||
"packet_count": count,
|
||||
}),
|
||||
);
|
||||
@@ -387,3 +314,14 @@ async fn browser_to_sip_loop(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Mixer → Browser audio forwarding loop.
|
||||
/// Reads Opus-encoded RTP packets from the mixer and writes to the WebRTC track.
|
||||
async fn mixer_to_browser_loop(
|
||||
mut outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
local_track: Arc<TrackLocalStaticRTP>,
|
||||
) {
|
||||
while let Some(rtp_data) = outbound_rx.recv().await {
|
||||
let _ = local_track.write(&rtp_data).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -197,10 +197,11 @@ pub fn compute_digest_auth(
|
||||
|
||||
use crate::Endpoint;
|
||||
|
||||
/// Parse the audio media port and connection address from an SDP body.
|
||||
/// Parse the audio media port, connection address, and preferred codec from an SDP body.
|
||||
pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
|
||||
let mut addr: Option<&str> = None;
|
||||
let mut port: Option<u16> = None;
|
||||
let mut codec_pt: Option<u8> = None;
|
||||
|
||||
let normalized = sdp.replace("\r\n", "\n");
|
||||
for raw in normalized.split('\n') {
|
||||
@@ -208,10 +209,16 @@ pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
|
||||
if let Some(rest) = line.strip_prefix("c=IN IP4 ") {
|
||||
addr = Some(rest.trim());
|
||||
} else if let Some(rest) = line.strip_prefix("m=audio ") {
|
||||
// m=audio <port> RTP/AVP <pt1> [<pt2> ...]
|
||||
let parts: Vec<&str> = rest.split_whitespace().collect();
|
||||
if !parts.is_empty() {
|
||||
port = parts[0].parse().ok();
|
||||
}
|
||||
// parts[1] is "RTP/AVP" or similar, parts[2..] are payload types.
|
||||
// The first PT is the preferred codec.
|
||||
if parts.len() > 2 {
|
||||
codec_pt = parts[2].parse::<u8>().ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -219,6 +226,7 @@ pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
|
||||
(Some(a), Some(p)) => Some(Endpoint {
|
||||
address: a.to_string(),
|
||||
port: p,
|
||||
codec_pt,
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
|
||||
@@ -9,9 +9,11 @@ pub mod dialog;
|
||||
pub mod helpers;
|
||||
pub mod rewrite;
|
||||
|
||||
/// Network endpoint (address + port).
|
||||
/// Network endpoint (address + port + optional negotiated codec).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Endpoint {
|
||||
pub address: String,
|
||||
pub port: u16,
|
||||
/// First payload type from the SDP `m=audio` line (the preferred codec).
|
||||
pub codec_pt: Option<u8>,
|
||||
}
|
||||
|
||||
@@ -92,7 +92,7 @@ pub fn rewrite_sdp(body: &str, ip: &str, port: u16) -> (String, Option<Endpoint>
|
||||
.collect();
|
||||
|
||||
let original = match (orig_addr, orig_port) {
|
||||
(Some(a), Some(p)) => Some(Endpoint { address: a, port: p }),
|
||||
(Some(a), Some(p)) => Some(Endpoint { address: a, port: p, codec_pt: None }),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
[package]
|
||||
name = "tts-engine"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "tts-engine"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
kokoro-tts = { version = "0.3", default-features = false }
|
||||
# Pin to rc.11 matching kokoro-tts's expectation; enable vendored TLS to avoid system libssl-dev.
|
||||
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
|
||||
"std", "download-binaries", "copy-dylibs", "ndarray",
|
||||
"tls-native-vendored"
|
||||
] }
|
||||
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
|
||||
hound = "3.5"
|
||||
@@ -1,149 +0,0 @@
|
||||
/// TTS engine CLI — synthesizes text to a WAV file using Kokoro neural TTS.
|
||||
///
|
||||
/// Usage:
|
||||
/// echo "Hello world" | tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav
|
||||
/// tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav --text "Hello world"
|
||||
///
|
||||
/// Outputs 24kHz 16-bit mono WAV.
|
||||
|
||||
use kokoro_tts::{KokoroTts, Voice};
|
||||
use std::io::{self, Read};
|
||||
|
||||
fn parse_args() -> Result<(String, String, String, String, Option<String>), String> {
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
let mut model = String::new();
|
||||
let mut voices = String::new();
|
||||
let mut output = String::new();
|
||||
let mut text: Option<String> = None;
|
||||
let mut voice_name: Option<String> = None;
|
||||
|
||||
let mut i = 1;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--model" => { i += 1; model = args.get(i).cloned().unwrap_or_default(); }
|
||||
"--voices" => { i += 1; voices = args.get(i).cloned().unwrap_or_default(); }
|
||||
"--output" | "--output_file" => { i += 1; output = args.get(i).cloned().unwrap_or_default(); }
|
||||
"--text" => { i += 1; text = args.get(i).cloned(); }
|
||||
"--voice" => { i += 1; voice_name = args.get(i).cloned(); }
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
if model.is_empty() { return Err("--model required".into()); }
|
||||
if voices.is_empty() { return Err("--voices required".into()); }
|
||||
if output.is_empty() { return Err("--output required".into()); }
|
||||
|
||||
let voice_str = voice_name.unwrap_or_else(|| "af_bella".into());
|
||||
|
||||
Ok((model, voices, output, voice_str, text))
|
||||
}
|
||||
|
||||
fn select_voice(name: &str) -> Voice {
|
||||
match name {
|
||||
"af_bella" => Voice::AfBella(1.0),
|
||||
"af_heart" => Voice::AfHeart(1.0),
|
||||
"af_jessica" => Voice::AfJessica(1.0),
|
||||
"af_nicole" => Voice::AfNicole(1.0),
|
||||
"af_nova" => Voice::AfNova(1.0),
|
||||
"af_sarah" => Voice::AfSarah(1.0),
|
||||
"af_sky" => Voice::AfSky(1.0),
|
||||
"af_river" => Voice::AfRiver(1.0),
|
||||
"af_alloy" => Voice::AfAlloy(1.0),
|
||||
"af_aoede" => Voice::AfAoede(1.0),
|
||||
"af_kore" => Voice::AfKore(1.0),
|
||||
"am_adam" => Voice::AmAdam(1.0),
|
||||
"am_echo" => Voice::AmEcho(1.0),
|
||||
"am_eric" => Voice::AmEric(1.0),
|
||||
"am_fenrir" => Voice::AmFenrir(1.0),
|
||||
"am_liam" => Voice::AmLiam(1.0),
|
||||
"am_michael" => Voice::AmMichael(1.0),
|
||||
"am_onyx" => Voice::AmOnyx(1.0),
|
||||
"am_puck" => Voice::AmPuck(1.0),
|
||||
"bf_alice" => Voice::BfAlice(1.0),
|
||||
"bf_emma" => Voice::BfEmma(1.0),
|
||||
"bf_isabella" => Voice::BfIsabella(1.0),
|
||||
"bf_lily" => Voice::BfLily(1.0),
|
||||
"bm_daniel" => Voice::BmDaniel(1.0),
|
||||
"bm_fable" => Voice::BmFable(1.0),
|
||||
"bm_george" => Voice::BmGeorge(1.0),
|
||||
"bm_lewis" => Voice::BmLewis(1.0),
|
||||
_ => {
|
||||
eprintln!("[tts-engine] unknown voice '{}', falling back to af_bella", name);
|
||||
Voice::AfBella(1.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let (model_path, voices_path, output_path, voice_name, text_arg) = match parse_args() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error: {}", e);
|
||||
eprintln!("Usage: tts-engine --model <model.onnx> --voices <voices.bin> --output <output.wav> [--text <text>] [--voice <voice_name>]");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
// Get text from --text arg or stdin.
|
||||
let text = match text_arg {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
let mut buf = String::new();
|
||||
io::stdin().read_to_string(&mut buf).expect("failed to read stdin");
|
||||
buf.trim().to_string()
|
||||
}
|
||||
};
|
||||
|
||||
if text.is_empty() {
|
||||
eprintln!("[tts-engine] no text provided");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
eprintln!("[tts-engine] loading model: {}", model_path);
|
||||
let tts = match KokoroTts::new(&model_path, &voices_path).await {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
eprintln!("[tts-engine] failed to load model: {:?}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let voice = select_voice(&voice_name);
|
||||
eprintln!("[tts-engine] synthesizing with voice '{}': \"{}\"", voice_name, text);
|
||||
|
||||
let (samples, duration) = match tts.synth(&text, voice).await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
eprintln!("[tts-engine] synthesis failed: {:?}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
eprintln!("[tts-engine] synthesized {} samples in {:?}", samples.len(), duration);
|
||||
|
||||
// Write WAV: 24kHz, 16-bit, mono (same format announcement.ts expects).
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate: 24000,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let mut writer = match hound::WavWriter::create(&output_path, spec) {
|
||||
Ok(w) => w,
|
||||
Err(e) => {
|
||||
eprintln!("[tts-engine] failed to create WAV: {}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
for &sample in &samples {
|
||||
let s16 = (sample * 32767.0).round().clamp(-32768.0, 32767.0) as i16;
|
||||
writer.write_sample(s16).unwrap();
|
||||
}
|
||||
writer.finalize().unwrap();
|
||||
|
||||
eprintln!("[tts-engine] wrote {}", output_path);
|
||||
}
|
||||
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: 'siprouter',
|
||||
version: '1.12.0',
|
||||
version: '1.20.2',
|
||||
description: 'undefined'
|
||||
}
|
||||
|
||||
@@ -1,59 +1,22 @@
|
||||
/**
|
||||
* TTS announcement module — pre-generates audio announcements using espeak-ng
|
||||
* and caches them as encoded RTP packets for playback during call setup.
|
||||
* TTS announcement module — generates announcement WAV files at startup.
|
||||
*
|
||||
* On startup, generates the announcement WAV via espeak-ng (formant-based TTS
|
||||
* with highly accurate pronunciation), encodes each 20ms frame to G.722 (for
|
||||
* SIP) and Opus (for WebRTC) via the Rust transcoder, and caches the packets.
|
||||
* Engine priority: espeak-ng (formant TTS, fast) → Kokoro neural TTS via
|
||||
* proxy-engine → disabled.
|
||||
*
|
||||
* Falls back to the Rust tts-engine (Kokoro neural TTS) if espeak-ng is not
|
||||
* installed, and disables announcements if neither is available.
|
||||
* The generated WAV is left on disk for Rust's audio_player / start_interaction
|
||||
* to play during calls. No encoding or RTP playback happens in TypeScript.
|
||||
*/
|
||||
|
||||
import { execSync } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import { encodePcm, isCodecReady } from './opusbridge.ts';
|
||||
|
||||
/** RTP clock increment per 20ms frame for each codec. */
|
||||
function rtpClockIncrement(pt: number): number {
|
||||
if (pt === 111) return 960;
|
||||
if (pt === 9) return 160;
|
||||
return 160;
|
||||
}
|
||||
|
||||
/** Build a fresh RTP header. */
|
||||
function buildRtpHeader(pt: number, seq: number, ts: number, ssrc: number, marker: boolean): Buffer {
|
||||
const hdr = Buffer.alloc(12);
|
||||
hdr[0] = 0x80;
|
||||
hdr[1] = (marker ? 0x80 : 0) | (pt & 0x7f);
|
||||
hdr.writeUInt16BE(seq & 0xffff, 2);
|
||||
hdr.writeUInt32BE(ts >>> 0, 4);
|
||||
hdr.writeUInt32BE(ssrc >>> 0, 8);
|
||||
return hdr;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A pre-encoded announcement ready for RTP playback. */
|
||||
export interface IAnnouncementCache {
|
||||
/** G.722 encoded frames (each is a 20ms frame payload, no RTP header). */
|
||||
g722Frames: Buffer[];
|
||||
/** Opus encoded frames for WebRTC playback. */
|
||||
opusFrames: Buffer[];
|
||||
/** Total duration in milliseconds. */
|
||||
durationMs: number;
|
||||
}
|
||||
import { sendProxyCommand, isProxyReady } from './proxybridge.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// State
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let cachedAnnouncement: IAnnouncementCache | null = null;
|
||||
|
||||
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
|
||||
const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now.";
|
||||
const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav');
|
||||
@@ -64,12 +27,10 @@ const KOKORO_VOICES = 'voices.bin';
|
||||
const KOKORO_VOICE = 'af_bella';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Initialization
|
||||
// TTS generators
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Check if espeak-ng is available on the system.
|
||||
*/
|
||||
/** Check if espeak-ng is available on the system. */
|
||||
function isEspeakAvailable(): boolean {
|
||||
try {
|
||||
execSync('which espeak-ng', { stdio: 'pipe' });
|
||||
@@ -79,10 +40,7 @@ function isEspeakAvailable(): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate announcement WAV via espeak-ng (primary engine).
|
||||
* Returns true on success.
|
||||
*/
|
||||
/** Generate announcement WAV via espeak-ng (primary engine). */
|
||||
function generateViaEspeak(wavPath: string, text: string, log: (msg: string) => void): boolean {
|
||||
log('[tts] generating announcement audio via espeak-ng...');
|
||||
try {
|
||||
@@ -98,11 +56,8 @@ function generateViaEspeak(wavPath: string, text: string, log: (msg: string) =>
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate announcement WAV via Kokoro TTS (fallback engine).
|
||||
* Returns true on success.
|
||||
*/
|
||||
function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): boolean {
|
||||
/** Generate announcement WAV via Kokoro TTS (fallback, runs inside proxy-engine). */
|
||||
async function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): Promise<boolean> {
|
||||
const modelPath = path.join(TTS_DIR, KOKORO_MODEL);
|
||||
const voicesPath = path.join(TTS_DIR, KOKORO_VOICES);
|
||||
|
||||
@@ -111,25 +66,21 @@ function generateViaKokoro(wavPath: string, text: string, log: (msg: string) =>
|
||||
return false;
|
||||
}
|
||||
|
||||
const root = process.cwd();
|
||||
const ttsBinPaths = [
|
||||
path.join(root, 'dist_rust', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'release', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
|
||||
];
|
||||
const ttsBin = ttsBinPaths.find((p) => fs.existsSync(p));
|
||||
if (!ttsBin) {
|
||||
log('[tts] tts-engine binary not found — Kokoro fallback unavailable');
|
||||
if (!isProxyReady()) {
|
||||
log('[tts] proxy-engine not ready — Kokoro fallback unavailable');
|
||||
return false;
|
||||
}
|
||||
|
||||
log('[tts] generating announcement audio via Kokoro TTS (fallback)...');
|
||||
try {
|
||||
execSync(
|
||||
`"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${KOKORO_VOICE}" --output "${wavPath}" --text "${text}"`,
|
||||
{ timeout: 120000, stdio: 'pipe' },
|
||||
);
|
||||
log('[tts] Kokoro WAV generated');
|
||||
await sendProxyCommand('generate_tts', {
|
||||
model: modelPath,
|
||||
voices: voicesPath,
|
||||
voice: KOKORO_VOICE,
|
||||
text,
|
||||
output: wavPath,
|
||||
});
|
||||
log('[tts] Kokoro WAV generated (via proxy-engine)');
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log(`[tts] Kokoro failed: ${e.message}`);
|
||||
@@ -137,40 +88,13 @@ function generateViaKokoro(wavPath: string, text: string, log: (msg: string) =>
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a WAV file and detect its sample rate from the fmt chunk.
|
||||
* Returns { pcm, sampleRate } or null on failure.
|
||||
*/
|
||||
function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
|
||||
const wav = fs.readFileSync(wavPath);
|
||||
if (wav.length < 44) return null;
|
||||
if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
|
||||
if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
|
||||
|
||||
let sampleRate = 22050; // default
|
||||
let offset = 12;
|
||||
let pcm: Buffer | null = null;
|
||||
|
||||
while (offset < wav.length - 8) {
|
||||
const chunkId = wav.toString('ascii', offset, offset + 4);
|
||||
const chunkSize = wav.readUInt32LE(offset + 4);
|
||||
if (chunkId === 'fmt ') {
|
||||
sampleRate = wav.readUInt32LE(offset + 12);
|
||||
}
|
||||
if (chunkId === 'data') {
|
||||
pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
|
||||
}
|
||||
offset += 8 + chunkSize;
|
||||
if (offset % 2 !== 0) offset++;
|
||||
}
|
||||
|
||||
if (!pcm) return null;
|
||||
return { pcm, sampleRate };
|
||||
}
|
||||
// ---------------------------------------------------------------------------
|
||||
// Initialization
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Pre-generate the announcement audio and encode to G.722 + Opus frames.
|
||||
* Must be called after the codec bridge is initialized.
|
||||
* Pre-generate the announcement WAV file.
|
||||
* Must be called after the proxy engine is initialized.
|
||||
*
|
||||
* Engine priority: espeak-ng → Kokoro → disabled.
|
||||
*/
|
||||
@@ -178,7 +102,6 @@ export async function initAnnouncement(log: (msg: string) => void): Promise<bool
|
||||
fs.mkdirSync(TTS_DIR, { recursive: true });
|
||||
|
||||
try {
|
||||
// Generate WAV if not cached.
|
||||
if (!fs.existsSync(CACHE_WAV)) {
|
||||
let generated = false;
|
||||
|
||||
@@ -189,9 +112,9 @@ export async function initAnnouncement(log: (msg: string) => void): Promise<bool
|
||||
log('[tts] espeak-ng not installed — trying Kokoro fallback');
|
||||
}
|
||||
|
||||
// Fall back to Kokoro.
|
||||
// Fall back to Kokoro (via proxy-engine).
|
||||
if (!generated) {
|
||||
generated = generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
||||
generated = await generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
||||
}
|
||||
|
||||
if (!generated) {
|
||||
@@ -200,49 +123,7 @@ export async function initAnnouncement(log: (msg: string) => void): Promise<bool
|
||||
}
|
||||
}
|
||||
|
||||
// Read WAV and extract raw PCM + sample rate.
|
||||
const result = readWavWithRate(CACHE_WAV);
|
||||
if (!result) {
|
||||
log('[tts] failed to parse WAV file');
|
||||
return false;
|
||||
}
|
||||
|
||||
const { pcm, sampleRate } = result;
|
||||
|
||||
// Wait for codec bridge to be ready.
|
||||
if (!isCodecReady()) {
|
||||
log('[tts] codec bridge not ready — will retry');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Encode in 20ms chunks. The Rust encoder resamples to each codec's native rate.
|
||||
const FRAME_SAMPLES = Math.floor(sampleRate * 0.02);
|
||||
const FRAME_BYTES = FRAME_SAMPLES * 2; // 16-bit = 2 bytes per sample
|
||||
const totalFrames = Math.floor(pcm.length / FRAME_BYTES);
|
||||
|
||||
const g722Frames: Buffer[] = [];
|
||||
const opusFrames: Buffer[] = [];
|
||||
|
||||
log(`[tts] encoding ${totalFrames} frames (${FRAME_SAMPLES} samples/frame @ ${sampleRate}Hz)...`);
|
||||
for (let i = 0; i < totalFrames; i++) {
|
||||
const framePcm = pcm.subarray(i * FRAME_BYTES, (i + 1) * FRAME_BYTES);
|
||||
const pcmBuf = Buffer.from(framePcm);
|
||||
const [g722, opus] = await Promise.all([
|
||||
encodePcm(pcmBuf, sampleRate, 9), // G.722 for SIP devices
|
||||
encodePcm(pcmBuf, sampleRate, 111), // Opus for WebRTC browsers
|
||||
]);
|
||||
if (g722) g722Frames.push(g722);
|
||||
if (opus) opusFrames.push(opus);
|
||||
if (!g722 && !opus && i < 3) log(`[tts] frame ${i} encode failed`);
|
||||
}
|
||||
|
||||
cachedAnnouncement = {
|
||||
g722Frames,
|
||||
opusFrames,
|
||||
durationMs: totalFrames * 20,
|
||||
};
|
||||
|
||||
log(`[tts] announcement cached: ${g722Frames.length} frames (${(totalFrames * 20 / 1000).toFixed(1)}s)`);
|
||||
log('[tts] announcement WAV ready');
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log(`[tts] init error: ${e.message}`);
|
||||
@@ -250,100 +131,7 @@ export async function initAnnouncement(log: (msg: string) => void): Promise<bool
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Playback
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Play the pre-cached announcement to an RTP endpoint.
|
||||
*
|
||||
* @param sendPacket - function to send a raw RTP packet
|
||||
* @param ssrc - SSRC to use in RTP headers
|
||||
* @param onDone - called when the announcement finishes
|
||||
* @returns a cancel function, or null if no announcement is cached
|
||||
*/
|
||||
export function playAnnouncement(
|
||||
sendPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (!cachedAnnouncement || cachedAnnouncement.g722Frames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = cachedAnnouncement.g722Frames;
|
||||
const PT = 9; // G.722
|
||||
let frameIdx = 0;
|
||||
let seq = Math.floor(Math.random() * 0xffff);
|
||||
let rtpTs = Math.floor(Math.random() * 0xffffffff);
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendPacket(pkt);
|
||||
|
||||
seq++;
|
||||
rtpTs += rtpClockIncrement(PT);
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
// Return cancel function.
|
||||
return () => clearInterval(timer);
|
||||
/** Get the path to the cached announcement WAV, or null if not generated. */
|
||||
export function getAnnouncementWavPath(): string | null {
|
||||
return fs.existsSync(CACHE_WAV) ? CACHE_WAV : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Play pre-cached Opus announcement to a WebRTC PeerConnection sender.
|
||||
*
|
||||
* @param sendRtpPacket - function to send a raw RTP packet via sender.sendRtp()
|
||||
* @param ssrc - SSRC to use in RTP headers
|
||||
* @param onDone - called when announcement finishes
|
||||
* @returns cancel function, or null if no announcement cached
|
||||
*/
|
||||
export function playAnnouncementToWebRtc(
|
||||
sendRtpPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
counters: { seq: number; ts: number },
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (!cachedAnnouncement || cachedAnnouncement.opusFrames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = cachedAnnouncement.opusFrames;
|
||||
const PT = 111; // Opus
|
||||
let frameIdx = 0;
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendRtpPacket(pkt);
|
||||
|
||||
counters.seq++;
|
||||
counters.ts += 960; // Opus at 48kHz: 960 samples per 20ms
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
|
||||
/** Check if an announcement is cached and ready. */
|
||||
export function isAnnouncementReady(): boolean {
|
||||
return cachedAnnouncement !== null && cachedAnnouncement.g722Frames.length > 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,55 +1,31 @@
|
||||
/**
|
||||
* PromptCache — manages multiple named audio prompts for IVR and voicemail.
|
||||
* PromptCache — manages named audio prompt WAV files for IVR and voicemail.
|
||||
*
|
||||
* Each prompt is pre-encoded as both G.722 frames (for SIP legs) and Opus
|
||||
* frames (for WebRTC legs), ready for 20ms RTP playback.
|
||||
* Generates WAV files via espeak-ng (primary) or Kokoro TTS through the
|
||||
* proxy-engine (fallback). Also supports loading pre-existing WAV files
|
||||
* and programmatic tone generation.
|
||||
*
|
||||
* Supports three sources:
|
||||
* 1. TTS generation via espeak-ng (primary) or Kokoro (fallback)
|
||||
* 2. Loading from a pre-existing WAV file
|
||||
* 3. Programmatic tone generation (beep, etc.)
|
||||
*
|
||||
* The existing announcement.ts system continues to work independently;
|
||||
* this module provides generalized prompt management for IVR/voicemail.
|
||||
* All audio playback happens in Rust (audio_player / start_interaction).
|
||||
* This module only manages WAV files on disk.
|
||||
*/
|
||||
|
||||
import { execSync } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import { encodePcm, isCodecReady } from '../opusbridge.ts';
|
||||
|
||||
/** RTP clock increment per 20ms frame for each codec. */
|
||||
function rtpClockIncrement(pt: number): number {
|
||||
if (pt === 111) return 960;
|
||||
if (pt === 9) return 160;
|
||||
return 160;
|
||||
}
|
||||
|
||||
/** Build a fresh RTP header. */
|
||||
function buildRtpHeader(pt: number, seq: number, ts: number, ssrc: number, marker: boolean): Buffer {
|
||||
const hdr = Buffer.alloc(12);
|
||||
hdr[0] = 0x80;
|
||||
hdr[1] = (marker ? 0x80 : 0) | (pt & 0x7f);
|
||||
hdr.writeUInt16BE(seq & 0xffff, 2);
|
||||
hdr.writeUInt32BE(ts >>> 0, 4);
|
||||
hdr.writeUInt32BE(ssrc >>> 0, 8);
|
||||
return hdr;
|
||||
}
|
||||
import { sendProxyCommand, isProxyReady } from '../proxybridge.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A pre-encoded prompt ready for RTP playback. */
|
||||
/** A cached prompt — just a WAV file path and metadata. */
|
||||
export interface ICachedPrompt {
|
||||
/** Unique prompt identifier. */
|
||||
id: string;
|
||||
/** G.722 encoded frames (20ms each, no RTP header). */
|
||||
g722Frames: Buffer[];
|
||||
/** Opus encoded frames (20ms each, no RTP header). */
|
||||
opusFrames: Buffer[];
|
||||
/** Total duration in milliseconds. */
|
||||
/** Path to the WAV file on disk. */
|
||||
wavPath: string;
|
||||
/** Total duration in milliseconds (approximate, from WAV header). */
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
@@ -82,84 +58,61 @@ function generateViaEspeak(wavPath: string, text: string): boolean {
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate WAV via Kokoro TTS. */
|
||||
function generateViaKokoro(wavPath: string, text: string, voice: string): boolean {
|
||||
/** Generate WAV via Kokoro TTS (runs inside proxy-engine). */
|
||||
async function generateViaKokoro(wavPath: string, text: string, voice: string): Promise<boolean> {
|
||||
const modelPath = path.join(TTS_DIR, 'kokoro-v1.0.onnx');
|
||||
const voicesPath = path.join(TTS_DIR, 'voices.bin');
|
||||
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) return false;
|
||||
|
||||
const root = process.cwd();
|
||||
const ttsBin = [
|
||||
path.join(root, 'dist_rust', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'release', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
|
||||
].find((p) => fs.existsSync(p));
|
||||
if (!ttsBin) return false;
|
||||
if (!isProxyReady()) return false;
|
||||
|
||||
try {
|
||||
execSync(
|
||||
`"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${voice}" --output "${wavPath}" --text "${text}"`,
|
||||
{ timeout: 120000, stdio: 'pipe' },
|
||||
);
|
||||
await sendProxyCommand('generate_tts', {
|
||||
model: modelPath,
|
||||
voices: voicesPath,
|
||||
voice,
|
||||
text,
|
||||
output: wavPath,
|
||||
});
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Read a WAV file and return raw PCM + sample rate. */
|
||||
function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
|
||||
const wav = fs.readFileSync(wavPath);
|
||||
if (wav.length < 44) return null;
|
||||
if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
|
||||
if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
|
||||
/** Read a WAV file's duration from its header. */
|
||||
function getWavDurationMs(wavPath: string): number {
|
||||
try {
|
||||
const wav = fs.readFileSync(wavPath);
|
||||
if (wav.length < 44) return 0;
|
||||
if (wav.toString('ascii', 0, 4) !== 'RIFF') return 0;
|
||||
|
||||
let sampleRate = 22050;
|
||||
let pcm: Buffer | null = null;
|
||||
let offset = 12;
|
||||
let sampleRate = 16000;
|
||||
let dataSize = 0;
|
||||
let bitsPerSample = 16;
|
||||
let channels = 1;
|
||||
let offset = 12;
|
||||
|
||||
while (offset < wav.length - 8) {
|
||||
const chunkId = wav.toString('ascii', offset, offset + 4);
|
||||
const chunkSize = wav.readUInt32LE(offset + 4);
|
||||
if (chunkId === 'fmt ') {
|
||||
sampleRate = wav.readUInt32LE(offset + 12);
|
||||
while (offset < wav.length - 8) {
|
||||
const chunkId = wav.toString('ascii', offset, offset + 4);
|
||||
const chunkSize = wav.readUInt32LE(offset + 4);
|
||||
if (chunkId === 'fmt ') {
|
||||
channels = wav.readUInt16LE(offset + 10);
|
||||
sampleRate = wav.readUInt32LE(offset + 12);
|
||||
bitsPerSample = wav.readUInt16LE(offset + 22);
|
||||
}
|
||||
if (chunkId === 'data') {
|
||||
dataSize = chunkSize;
|
||||
}
|
||||
offset += 8 + chunkSize;
|
||||
if (offset % 2 !== 0) offset++;
|
||||
}
|
||||
if (chunkId === 'data') {
|
||||
pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
|
||||
}
|
||||
offset += 8 + chunkSize;
|
||||
if (offset % 2 !== 0) offset++;
|
||||
|
||||
const bytesPerSample = (bitsPerSample / 8) * channels;
|
||||
const totalSamples = bytesPerSample > 0 ? dataSize / bytesPerSample : 0;
|
||||
return sampleRate > 0 ? Math.round((totalSamples / sampleRate) * 1000) : 0;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return pcm ? { pcm, sampleRate } : null;
|
||||
}
|
||||
|
||||
/** Encode raw PCM frames to G.722 + Opus. */
|
||||
async function encodePcmFrames(
|
||||
pcm: Buffer,
|
||||
sampleRate: number,
|
||||
log: (msg: string) => void,
|
||||
): Promise<{ g722Frames: Buffer[]; opusFrames: Buffer[] } | null> {
|
||||
if (!isCodecReady()) return null;
|
||||
|
||||
const frameSamples = Math.floor(sampleRate * 0.02); // 20ms
|
||||
const frameBytes = frameSamples * 2; // 16-bit
|
||||
const totalFrames = Math.floor(pcm.length / frameBytes);
|
||||
|
||||
const g722Frames: Buffer[] = [];
|
||||
const opusFrames: Buffer[] = [];
|
||||
|
||||
for (let i = 0; i < totalFrames; i++) {
|
||||
const framePcm = Buffer.from(pcm.subarray(i * frameBytes, (i + 1) * frameBytes));
|
||||
const [g722, opus] = await Promise.all([
|
||||
encodePcm(framePcm, sampleRate, 9), // G.722
|
||||
encodePcm(framePcm, sampleRate, 111), // Opus
|
||||
]);
|
||||
if (g722) g722Frames.push(g722);
|
||||
if (opus) opusFrames.push(opus);
|
||||
}
|
||||
|
||||
return { g722Frames, opusFrames };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -195,7 +148,7 @@ export class PromptCache {
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a TTS prompt and cache it.
|
||||
* Generate a TTS prompt WAV and cache its path.
|
||||
* Uses espeak-ng (primary) or Kokoro (fallback).
|
||||
*/
|
||||
async generatePrompt(id: string, text: string, voice = 'af_bella'): Promise<ICachedPrompt | null> {
|
||||
@@ -207,14 +160,14 @@ export class PromptCache {
|
||||
this.espeakAvailable = isEspeakAvailable();
|
||||
}
|
||||
|
||||
// Generate WAV.
|
||||
let generated = false;
|
||||
// Generate WAV if not already on disk.
|
||||
if (!fs.existsSync(wavPath)) {
|
||||
let generated = false;
|
||||
if (this.espeakAvailable) {
|
||||
generated = generateViaEspeak(wavPath, text);
|
||||
}
|
||||
if (!generated) {
|
||||
generated = generateViaKokoro(wavPath, text, voice);
|
||||
generated = await generateViaKokoro(wavPath, text, voice);
|
||||
}
|
||||
if (!generated) {
|
||||
this.log(`[prompt-cache] failed to generate TTS for "${id}"`);
|
||||
@@ -223,49 +176,22 @@ export class PromptCache {
|
||||
this.log(`[prompt-cache] generated WAV for "${id}"`);
|
||||
}
|
||||
|
||||
return this.loadWavPrompt(id, wavPath);
|
||||
return this.registerWav(id, wavPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a WAV file as a prompt and cache it.
|
||||
* Load a pre-existing WAV file as a prompt.
|
||||
*/
|
||||
async loadWavPrompt(id: string, wavPath: string): Promise<ICachedPrompt | null> {
|
||||
if (!fs.existsSync(wavPath)) {
|
||||
this.log(`[prompt-cache] WAV not found: ${wavPath}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = readWavWithRate(wavPath);
|
||||
if (!result) {
|
||||
this.log(`[prompt-cache] failed to parse WAV: ${wavPath}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const encoded = await encodePcmFrames(result.pcm, result.sampleRate, this.log);
|
||||
if (!encoded) {
|
||||
this.log(`[prompt-cache] encoding failed for "${id}" (codec bridge not ready?)`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const durationMs = encoded.g722Frames.length * 20;
|
||||
const prompt: ICachedPrompt = {
|
||||
id,
|
||||
g722Frames: encoded.g722Frames,
|
||||
opusFrames: encoded.opusFrames,
|
||||
durationMs,
|
||||
};
|
||||
|
||||
this.prompts.set(id, prompt);
|
||||
this.log(`[prompt-cache] cached "${id}": ${encoded.g722Frames.length} frames (${(durationMs / 1000).toFixed(1)}s)`);
|
||||
return prompt;
|
||||
return this.registerWav(id, wavPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a beep tone prompt (sine wave).
|
||||
* @param id - prompt ID
|
||||
* @param freqHz - tone frequency (default 1000 Hz)
|
||||
* @param durationMs - tone duration (default 500ms)
|
||||
* @param amplitude - 16-bit amplitude (default 8000)
|
||||
* Generate a beep tone WAV and cache it.
|
||||
*/
|
||||
async generateBeep(
|
||||
id: string,
|
||||
@@ -273,149 +199,77 @@ export class PromptCache {
|
||||
durationMs = 500,
|
||||
amplitude = 8000,
|
||||
): Promise<ICachedPrompt | null> {
|
||||
// Generate at 16kHz for decent quality.
|
||||
const sampleRate = 16000;
|
||||
const totalSamples = Math.floor((sampleRate * durationMs) / 1000);
|
||||
const pcm = Buffer.alloc(totalSamples * 2);
|
||||
fs.mkdirSync(TTS_DIR, { recursive: true });
|
||||
const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`);
|
||||
|
||||
for (let i = 0; i < totalSamples; i++) {
|
||||
const t = i / sampleRate;
|
||||
// Apply a short fade-in/fade-out to avoid click artifacts.
|
||||
const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade
|
||||
let envelope = 1.0;
|
||||
if (i < fadeLen) envelope = i / fadeLen;
|
||||
else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen;
|
||||
if (!fs.existsSync(wavPath)) {
|
||||
// Generate 16kHz 16-bit mono sine wave WAV.
|
||||
const sampleRate = 16000;
|
||||
const totalSamples = Math.floor((sampleRate * durationMs) / 1000);
|
||||
const pcm = Buffer.alloc(totalSamples * 2);
|
||||
|
||||
const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope);
|
||||
pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
|
||||
for (let i = 0; i < totalSamples; i++) {
|
||||
const t = i / sampleRate;
|
||||
const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade
|
||||
let envelope = 1.0;
|
||||
if (i < fadeLen) envelope = i / fadeLen;
|
||||
else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen;
|
||||
|
||||
const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope);
|
||||
pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
|
||||
}
|
||||
|
||||
// Write WAV file.
|
||||
const headerSize = 44;
|
||||
const dataSize = pcm.length;
|
||||
const wav = Buffer.alloc(headerSize + dataSize);
|
||||
|
||||
// RIFF header
|
||||
wav.write('RIFF', 0);
|
||||
wav.writeUInt32LE(36 + dataSize, 4);
|
||||
wav.write('WAVE', 8);
|
||||
|
||||
// fmt chunk
|
||||
wav.write('fmt ', 12);
|
||||
wav.writeUInt32LE(16, 16); // chunk size
|
||||
wav.writeUInt16LE(1, 20); // PCM format
|
||||
wav.writeUInt16LE(1, 22); // mono
|
||||
wav.writeUInt32LE(sampleRate, 24);
|
||||
wav.writeUInt32LE(sampleRate * 2, 28); // byte rate
|
||||
wav.writeUInt16LE(2, 32); // block align
|
||||
wav.writeUInt16LE(16, 34); // bits per sample
|
||||
|
||||
// data chunk
|
||||
wav.write('data', 36);
|
||||
wav.writeUInt32LE(dataSize, 40);
|
||||
pcm.copy(wav, 44);
|
||||
|
||||
fs.writeFileSync(wavPath, wav);
|
||||
this.log(`[prompt-cache] beep WAV generated for "${id}"`);
|
||||
}
|
||||
|
||||
const encoded = await encodePcmFrames(pcm, sampleRate, this.log);
|
||||
if (!encoded) {
|
||||
this.log(`[prompt-cache] beep encoding failed for "${id}"`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const actualDuration = encoded.g722Frames.length * 20;
|
||||
const prompt: ICachedPrompt = {
|
||||
id,
|
||||
g722Frames: encoded.g722Frames,
|
||||
opusFrames: encoded.opusFrames,
|
||||
durationMs: actualDuration,
|
||||
};
|
||||
|
||||
this.prompts.set(id, prompt);
|
||||
this.log(`[prompt-cache] beep "${id}" cached: ${actualDuration}ms @ ${freqHz}Hz`);
|
||||
return prompt;
|
||||
return this.registerWav(id, wavPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a prompt from the cache.
|
||||
*/
|
||||
/** Remove a prompt from the cache. */
|
||||
remove(id: string): void {
|
||||
this.prompts.delete(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cached prompts.
|
||||
*/
|
||||
/** Clear all cached prompts. */
|
||||
clear(): void {
|
||||
this.prompts.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Standalone playback helpers (for use by SystemLeg)
|
||||
// ---------------------------------------------------------------------------
|
||||
// -------------------------------------------------------------------------
|
||||
// Internal
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Play a cached prompt's G.722 frames as RTP packets at 20ms intervals.
|
||||
*
|
||||
* @param prompt - the cached prompt to play
|
||||
* @param sendPacket - function to send a raw RTP packet (12-byte header + payload)
|
||||
* @param ssrc - SSRC for RTP headers
|
||||
* @param onDone - called when playback finishes
|
||||
* @returns cancel function, or null if prompt has no G.722 frames
|
||||
*/
|
||||
export function playPromptG722(
|
||||
prompt: ICachedPrompt,
|
||||
sendPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (prompt.g722Frames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
private registerWav(id: string, wavPath: string): ICachedPrompt {
|
||||
const durationMs = getWavDurationMs(wavPath);
|
||||
const prompt: ICachedPrompt = { id, wavPath, durationMs };
|
||||
this.prompts.set(id, prompt);
|
||||
this.log(`[prompt-cache] cached "${id}": ${wavPath} (${(durationMs / 1000).toFixed(1)}s)`);
|
||||
return prompt;
|
||||
}
|
||||
|
||||
const frames = prompt.g722Frames;
|
||||
const PT = 9;
|
||||
let frameIdx = 0;
|
||||
let seq = Math.floor(Math.random() * 0xffff);
|
||||
let rtpTs = Math.floor(Math.random() * 0xffffffff);
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendPacket(pkt);
|
||||
|
||||
seq++;
|
||||
rtpTs += rtpClockIncrement(PT);
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Play a cached prompt's Opus frames as RTP packets at 20ms intervals.
|
||||
*
|
||||
* @param prompt - the cached prompt to play
|
||||
* @param sendPacket - function to send a raw RTP packet
|
||||
* @param ssrc - SSRC for RTP headers
|
||||
* @param counters - shared seq/ts counters (mutated in place for seamless transitions)
|
||||
* @param onDone - called when playback finishes
|
||||
* @returns cancel function, or null if prompt has no Opus frames
|
||||
*/
|
||||
export function playPromptOpus(
|
||||
prompt: ICachedPrompt,
|
||||
sendPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
counters: { seq: number; ts: number },
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (prompt.opusFrames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = prompt.opusFrames;
|
||||
const PT = 111;
|
||||
let frameIdx = 0;
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendPacket(pkt);
|
||||
|
||||
counters.seq++;
|
||||
counters.ts += 960; // Opus 48kHz: 960 samples per 20ms
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
|
||||
@@ -128,14 +128,19 @@ async function handleRequest(
|
||||
}
|
||||
}
|
||||
|
||||
// API: add leg to call.
|
||||
// API: add a SIP device to a call (mid-call INVITE to desk phone).
|
||||
if (url.pathname.startsWith('/api/call/') && url.pathname.endsWith('/addleg') && method === 'POST') {
|
||||
try {
|
||||
const callId = url.pathname.split('/')[3];
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.deviceId) return sendJson(res, { ok: false, error: 'missing deviceId' }, 400);
|
||||
const ok = callManager?.addDeviceToCall(callId, body.deviceId) ?? false;
|
||||
return sendJson(res, { ok });
|
||||
const { addDeviceLeg } = await import('./proxybridge.ts');
|
||||
const legId = await addDeviceLeg(callId, body.deviceId);
|
||||
if (legId) {
|
||||
return sendJson(res, { ok: true, legId });
|
||||
} else {
|
||||
return sendJson(res, { ok: false, error: 'device not registered or call not found' }, 404);
|
||||
}
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -147,8 +152,9 @@ async function handleRequest(
|
||||
const callId = url.pathname.split('/')[3];
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.number) return sendJson(res, { ok: false, error: 'missing number' }, 400);
|
||||
const ok = callManager?.addExternalToCall(callId, body.number, body.providerId) ?? false;
|
||||
return sendJson(res, { ok });
|
||||
const { addLeg: addLegFn } = await import('./proxybridge.ts');
|
||||
const legId = await addLegFn(callId, body.number, body.providerId);
|
||||
return sendJson(res, { ok: !!legId, legId });
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -160,22 +166,22 @@ async function handleRequest(
|
||||
const callId = url.pathname.split('/')[3];
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.legId) return sendJson(res, { ok: false, error: 'missing legId' }, 400);
|
||||
const ok = callManager?.removeLegFromCall(callId, body.legId) ?? false;
|
||||
const { removeLeg: removeLegFn } = await import('./proxybridge.ts');
|
||||
const ok = await removeLegFn(callId, body.legId);
|
||||
return sendJson(res, { ok });
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
}
|
||||
|
||||
// API: transfer leg.
|
||||
// API: transfer leg (not yet implemented).
|
||||
if (url.pathname === '/api/transfer' && method === 'POST') {
|
||||
try {
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.sourceCallId || !body?.legId || !body?.targetCallId) {
|
||||
return sendJson(res, { ok: false, error: 'missing sourceCallId, legId, or targetCallId' }, 400);
|
||||
}
|
||||
const ok = callManager?.transferLeg(body.sourceCallId, body.legId, body.targetCallId) ?? false;
|
||||
return sendJson(res, { ok });
|
||||
return sendJson(res, { ok: false, error: 'not yet implemented' }, 501);
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -339,11 +345,13 @@ export function initWebUi(
|
||||
onHangupCall: (callId: string) => boolean,
|
||||
onConfigSaved?: () => void,
|
||||
callManager?: CallManager,
|
||||
voiceboxManager?: VoiceboxManager,
|
||||
/** WebRTC signaling handlers — forwarded to Rust proxy-engine. */
|
||||
onWebRtcOffer?: (sessionId: string, sdp: string, ws: WebSocket) => Promise<void>,
|
||||
onWebRtcIce?: (sessionId: string, candidate: any) => Promise<void>,
|
||||
onWebRtcClose?: (sessionId: string) => Promise<void>,
|
||||
voiceboxManager?: VoiceboxManager,
|
||||
/** Called when browser sends webrtc-accept (callId + sessionId linking). */
|
||||
onWebRtcAccept?: (callId: string, sessionId: string) => void,
|
||||
): void {
|
||||
const WEB_PORT = 3060;
|
||||
|
||||
@@ -382,6 +390,7 @@ export function initWebUi(
|
||||
if (msg.type === 'webrtc-offer' && msg.sessionId) {
|
||||
// Forward to Rust proxy-engine for WebRTC handling.
|
||||
if (onWebRtcOffer) {
|
||||
log(`[webrtc-ws] offer msg keys: ${Object.keys(msg).join(',')}, sdp type: ${typeof msg.sdp}, sdp len: ${msg.sdp?.length || 0}`);
|
||||
onWebRtcOffer(msg.sessionId, msg.sdp, socket as any).catch((e: any) =>
|
||||
log(`[webrtc] offer error: ${e.message}`));
|
||||
}
|
||||
@@ -394,8 +403,10 @@ export function initWebUi(
|
||||
onWebRtcClose(msg.sessionId).catch(() => {});
|
||||
}
|
||||
} else if (msg.type === 'webrtc-accept' && msg.callId) {
|
||||
// TODO: Wire to Rust call linking.
|
||||
log(`[webrtc] accept: call=${msg.callId} session=${msg.sessionId || 'none'}`);
|
||||
if (onWebRtcAccept && msg.sessionId) {
|
||||
onWebRtcAccept(msg.callId, msg.sessionId);
|
||||
}
|
||||
} else if (msg.type?.startsWith('webrtc-')) {
|
||||
msg._remoteIp = remoteIp;
|
||||
handleWebRtcSignaling(socket as any, msg);
|
||||
|
||||
199
ts/opusbridge.ts
199
ts/opusbridge.ts
@@ -1,199 +0,0 @@
|
||||
/**
|
||||
* Audio transcoding bridge — uses smartrust to communicate with the Rust
|
||||
* opus-codec binary, which handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding.
|
||||
*
|
||||
* All codec conversion happens in Rust (libopus + SpanDSP G.722 port).
|
||||
* The TypeScript side just passes raw payloads back and forth.
|
||||
*/
|
||||
|
||||
import path from 'node:path';
|
||||
import { RustBridge } from '@push.rocks/smartrust';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Command type map for smartrust
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type TCodecCommands = {
|
||||
init: {
|
||||
params: Record<string, never>;
|
||||
result: Record<string, never>;
|
||||
};
|
||||
create_session: {
|
||||
params: { session_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
destroy_session: {
|
||||
params: { session_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
transcode: {
|
||||
params: { data_b64: string; from_pt: number; to_pt: number; session_id?: string; direction?: string };
|
||||
result: { data_b64: string };
|
||||
};
|
||||
encode_pcm: {
|
||||
params: { data_b64: string; sample_rate: number; to_pt: number; session_id?: string };
|
||||
result: { data_b64: string };
|
||||
};
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Bridge singleton
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let bridge: RustBridge<TCodecCommands> | null = null;
|
||||
let initialized = false;
|
||||
|
||||
function buildLocalPaths(): string[] {
|
||||
const root = process.cwd();
|
||||
return [
|
||||
path.join(root, 'dist_rust', 'opus-codec'),
|
||||
path.join(root, 'rust', 'target', 'release', 'opus-codec'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'opus-codec'),
|
||||
];
|
||||
}
|
||||
|
||||
let logFn: ((msg: string) => void) | undefined;
|
||||
|
||||
/**
|
||||
* Initialize the audio transcoding bridge. Spawns the Rust binary.
|
||||
*/
|
||||
export async function initCodecBridge(log?: (msg: string) => void): Promise<boolean> {
|
||||
if (initialized && bridge) return true;
|
||||
logFn = log;
|
||||
|
||||
try {
|
||||
bridge = new RustBridge<TCodecCommands>({
|
||||
binaryName: 'opus-codec',
|
||||
localPaths: buildLocalPaths(),
|
||||
});
|
||||
|
||||
const spawned = await bridge.spawn();
|
||||
if (!spawned) {
|
||||
log?.('[codec] failed to spawn opus-codec binary');
|
||||
bridge = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Auto-restart: reset state when the Rust process exits so the next
|
||||
// transcode attempt triggers re-initialization instead of silent failure.
|
||||
bridge.on('exit', () => {
|
||||
logFn?.('[codec] Rust audio transcoder process exited — will re-init on next use');
|
||||
bridge = null;
|
||||
initialized = false;
|
||||
});
|
||||
|
||||
await bridge.sendCommand('init', {} as any);
|
||||
initialized = true;
|
||||
log?.('[codec] Rust audio transcoder initialized (Opus + G.722 + PCMU/PCMA)');
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log?.(`[codec] init error: ${e.message}`);
|
||||
bridge = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Session management — per-call codec isolation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create an isolated codec session. Each session gets its own Opus/G.722
|
||||
* encoder/decoder state, preventing concurrent calls from corrupting each
|
||||
* other's stateful codec predictions.
|
||||
*/
|
||||
export async function createSession(sessionId: string): Promise<boolean> {
|
||||
if (!bridge || !initialized) {
|
||||
// Attempt auto-reinit if bridge died.
|
||||
const ok = await initCodecBridge(logFn);
|
||||
if (!ok) return false;
|
||||
}
|
||||
try {
|
||||
await bridge!.sendCommand('create_session', { session_id: sessionId });
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[codec] create_session error: ${e?.message || e}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy a codec session, freeing its encoder/decoder state.
|
||||
*/
|
||||
export async function destroySession(sessionId: string): Promise<void> {
|
||||
if (!bridge || !initialized) return;
|
||||
try {
|
||||
await bridge.sendCommand('destroy_session', { session_id: sessionId });
|
||||
} catch {
|
||||
// Best-effort cleanup.
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Transcoding
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Transcode an RTP payload between two codecs.
|
||||
* All codec work (Opus, G.722, PCMU, PCMA) + resampling happens in Rust.
|
||||
*
|
||||
* @param data - raw RTP payload (no header)
|
||||
* @param fromPT - source payload type (0=PCMU, 8=PCMA, 9=G.722, 111=Opus)
|
||||
* @param toPT - target payload type
|
||||
* @param sessionId - optional session for isolated codec state
|
||||
* @returns transcoded payload, or null on failure
|
||||
*/
|
||||
export async function transcode(data: Buffer, fromPT: number, toPT: number, sessionId?: string, direction?: string): Promise<Buffer | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const params: any = {
|
||||
data_b64: data.toString('base64'),
|
||||
from_pt: fromPT,
|
||||
to_pt: toPT,
|
||||
};
|
||||
if (sessionId) params.session_id = sessionId;
|
||||
if (direction) params.direction = direction;
|
||||
const result = await bridge.sendCommand('transcode', params);
|
||||
return Buffer.from(result.data_b64, 'base64');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode raw 16-bit PCM to a target codec.
|
||||
* @param pcmData - raw 16-bit LE PCM bytes
|
||||
* @param sampleRate - input sample rate (e.g. 22050 for Piper TTS)
|
||||
* @param toPT - target payload type (9=G.722, 111=Opus, 0=PCMU, 8=PCMA)
|
||||
* @param sessionId - optional session for isolated codec state
|
||||
*/
|
||||
export async function encodePcm(pcmData: Buffer, sampleRate: number, toPT: number, sessionId?: string): Promise<Buffer | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const params: any = {
|
||||
data_b64: pcmData.toString('base64'),
|
||||
sample_rate: sampleRate,
|
||||
to_pt: toPT,
|
||||
};
|
||||
if (sessionId) params.session_id = sessionId;
|
||||
const result = await bridge.sendCommand('encode_pcm', params);
|
||||
return Buffer.from(result.data_b64, 'base64');
|
||||
} catch (e: any) {
|
||||
console.error('[encodePcm] error:', e?.message || e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Check if the codec bridge is ready. */
|
||||
export function isCodecReady(): boolean {
|
||||
return initialized && bridge !== null;
|
||||
}
|
||||
|
||||
/** Shut down the codec bridge. */
|
||||
export function shutdownCodecBridge(): void {
|
||||
if (bridge) {
|
||||
try { bridge.kill(); } catch { /* ignore */ }
|
||||
bridge = null;
|
||||
initialized = false;
|
||||
}
|
||||
}
|
||||
@@ -41,6 +41,48 @@ type TProxyCommands = {
|
||||
params: { call_id: string };
|
||||
result: { file_path: string; duration_ms: number };
|
||||
};
|
||||
add_device_leg: {
|
||||
params: { call_id: string; device_id: string };
|
||||
result: { leg_id: string };
|
||||
};
|
||||
transfer_leg: {
|
||||
params: { source_call_id: string; leg_id: string; target_call_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
replace_leg: {
|
||||
params: { call_id: string; old_leg_id: string; number: string; provider_id?: string };
|
||||
result: { new_leg_id: string };
|
||||
};
|
||||
start_interaction: {
|
||||
params: {
|
||||
call_id: string;
|
||||
leg_id: string;
|
||||
prompt_wav: string;
|
||||
expected_digits: string;
|
||||
timeout_ms: number;
|
||||
};
|
||||
result: { result: 'digit' | 'timeout' | 'cancelled'; digit?: string };
|
||||
};
|
||||
add_tool_leg: {
|
||||
params: {
|
||||
call_id: string;
|
||||
tool_type: 'recording' | 'transcription';
|
||||
config?: Record<string, unknown>;
|
||||
};
|
||||
result: { tool_leg_id: string };
|
||||
};
|
||||
remove_tool_leg: {
|
||||
params: { call_id: string; tool_leg_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
set_leg_metadata: {
|
||||
params: { call_id: string; leg_id: string; key: string; value: unknown };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
generate_tts: {
|
||||
params: { model: string; voices: string; voice: string; text: string; output: string };
|
||||
result: { output: string };
|
||||
};
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -52,6 +94,11 @@ export interface IIncomingCallEvent {
|
||||
from_uri: string;
|
||||
to_number: string;
|
||||
provider_id: string;
|
||||
/** Whether registered browsers should see a `webrtc-incoming` toast for
|
||||
* this call. Set by the Rust engine from the matched inbound route's
|
||||
* `ringBrowsers` flag (defaults to `true` when no route matches, so
|
||||
* deployments without explicit routes preserve pre-routing behavior). */
|
||||
ring_browsers?: boolean;
|
||||
}
|
||||
|
||||
export interface IOutboundCallEvent {
|
||||
@@ -92,8 +139,22 @@ let logFn: ((msg: string) => void) | undefined;
|
||||
|
||||
function buildLocalPaths(): string[] {
|
||||
const root = process.cwd();
|
||||
// Map Node's process.arch to tsrust's friendly target name.
|
||||
// tsrust writes multi-target binaries as <bin>_<os>_<arch>,
|
||||
// e.g. proxy-engine_linux_amd64 / proxy-engine_linux_arm64.
|
||||
const archSuffix =
|
||||
process.arch === 'arm64' ? 'linux_arm64' :
|
||||
process.arch === 'x64' ? 'linux_amd64' :
|
||||
null;
|
||||
const multiTarget = archSuffix
|
||||
? [path.join(root, 'dist_rust', `proxy-engine_${archSuffix}`)]
|
||||
: [];
|
||||
return [
|
||||
// 1. Multi-target output matching the running host arch (Docker image, CI, multi-target dev).
|
||||
...multiTarget,
|
||||
// 2. Single-target (unsuffixed) output — legacy/fallback when tsrust runs without targets.
|
||||
path.join(root, 'dist_rust', 'proxy-engine'),
|
||||
// 3. Direct cargo builds for dev iteration.
|
||||
path.join(root, 'rust', 'target', 'release', 'proxy-engine'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'proxy-engine'),
|
||||
];
|
||||
@@ -238,6 +299,38 @@ export async function webrtcLink(sessionId: string, callId: string, providerMedi
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an external SIP leg to an existing call (multiparty).
|
||||
*/
|
||||
export async function addLeg(callId: string, number: string, providerId?: string): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await bridge.sendCommand('add_leg', {
|
||||
call_id: callId,
|
||||
number,
|
||||
provider_id: providerId,
|
||||
} as any);
|
||||
return (result as any)?.leg_id || null;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] add_leg error: ${e?.message || e}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a leg from a call.
|
||||
*/
|
||||
export async function removeLeg(callId: string, legId: string): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await bridge.sendCommand('remove_leg', { call_id: callId, leg_id: legId } as any);
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] remove_leg error: ${e?.message || e}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close a WebRTC session.
|
||||
*/
|
||||
@@ -248,11 +341,170 @@ export async function webrtcClose(sessionId: string): Promise<void> {
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Device leg & interaction commands
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Add a local SIP device to an existing call (mid-call INVITE to desk phone).
|
||||
*/
|
||||
export async function addDeviceLeg(callId: string, deviceId: string): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await bridge.sendCommand('add_device_leg', {
|
||||
call_id: callId,
|
||||
device_id: deviceId,
|
||||
} as any);
|
||||
return (result as any)?.leg_id || null;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] add_device_leg error: ${e?.message || e}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transfer a leg from one call to another (leg stays connected, switches mixer).
|
||||
*/
|
||||
export async function transferLeg(
|
||||
sourceCallId: string,
|
||||
legId: string,
|
||||
targetCallId: string,
|
||||
): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await bridge.sendCommand('transfer_leg', {
|
||||
source_call_id: sourceCallId,
|
||||
leg_id: legId,
|
||||
target_call_id: targetCallId,
|
||||
} as any);
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] transfer_leg error: ${e?.message || e}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace a leg: terminate the old leg and dial a new number into the same call.
|
||||
*/
|
||||
export async function replaceLeg(
|
||||
callId: string,
|
||||
oldLegId: string,
|
||||
number: string,
|
||||
providerId?: string,
|
||||
): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await bridge.sendCommand('replace_leg', {
|
||||
call_id: callId,
|
||||
old_leg_id: oldLegId,
|
||||
number,
|
||||
provider_id: providerId,
|
||||
} as any);
|
||||
return (result as any)?.new_leg_id || null;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] replace_leg error: ${e?.message || e}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start an interaction on a specific leg — isolate it, play a prompt, collect DTMF.
|
||||
* Blocks until the interaction completes (digit pressed, timeout, or cancelled).
|
||||
*/
|
||||
export async function startInteraction(
|
||||
callId: string,
|
||||
legId: string,
|
||||
promptWav: string,
|
||||
expectedDigits: string,
|
||||
timeoutMs: number,
|
||||
): Promise<{ result: 'digit' | 'timeout' | 'cancelled'; digit?: string } | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await bridge.sendCommand('start_interaction', {
|
||||
call_id: callId,
|
||||
leg_id: legId,
|
||||
prompt_wav: promptWav,
|
||||
expected_digits: expectedDigits,
|
||||
timeout_ms: timeoutMs,
|
||||
} as any);
|
||||
return result as any;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] start_interaction error: ${e?.message || e}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a tool leg (recording or transcription) to a call.
|
||||
* Tool legs receive per-source unmerged audio from all participants.
|
||||
*/
|
||||
export async function addToolLeg(
|
||||
callId: string,
|
||||
toolType: 'recording' | 'transcription',
|
||||
config?: Record<string, unknown>,
|
||||
): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await bridge.sendCommand('add_tool_leg', {
|
||||
call_id: callId,
|
||||
tool_type: toolType,
|
||||
config,
|
||||
} as any);
|
||||
return (result as any)?.tool_leg_id || null;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] add_tool_leg error: ${e?.message || e}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a tool leg from a call. Triggers finalization (WAV files, metadata).
|
||||
*/
|
||||
export async function removeToolLeg(callId: string, toolLegId: string): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await bridge.sendCommand('remove_tool_leg', {
|
||||
call_id: callId,
|
||||
tool_leg_id: toolLegId,
|
||||
} as any);
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] remove_tool_leg error: ${e?.message || e}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a metadata key-value pair on a leg.
|
||||
*/
|
||||
export async function setLegMetadata(
|
||||
callId: string,
|
||||
legId: string,
|
||||
key: string,
|
||||
value: unknown,
|
||||
): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await bridge.sendCommand('set_leg_metadata', {
|
||||
call_id: callId,
|
||||
leg_id: legId,
|
||||
key,
|
||||
value,
|
||||
} as any);
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] set_leg_metadata error: ${e?.message || e}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to an event from the proxy engine.
|
||||
* Event names: incoming_call, outbound_device_call, call_ringing,
|
||||
* call_answered, call_ended, provider_registered, device_registered,
|
||||
* dtmf_digit, recording_done, sip_unhandled
|
||||
* dtmf_digit, recording_done, tool_recording_done, tool_transcription_done,
|
||||
* leg_added, leg_removed, sip_unhandled
|
||||
*/
|
||||
export function onProxyEvent(event: string, handler: (data: any) => void): void {
|
||||
if (!bridge) throw new Error('proxy engine not initialized');
|
||||
@@ -264,6 +516,15 @@ export function isProxyReady(): boolean {
|
||||
return initialized && bridge !== null;
|
||||
}
|
||||
|
||||
/** Send an arbitrary command to the proxy engine bridge. */
|
||||
export async function sendProxyCommand<K extends keyof TProxyCommands>(
|
||||
method: K,
|
||||
params: TProxyCommands[K]['params'],
|
||||
): Promise<TProxyCommands[K]['result']> {
|
||||
if (!bridge || !initialized) throw new Error('proxy engine not initialized');
|
||||
return bridge.sendCommand(method as string, params as any) as any;
|
||||
}
|
||||
|
||||
/** Shut down the proxy engine. */
|
||||
export function shutdownProxyEngine(): void {
|
||||
if (bridge) {
|
||||
|
||||
224
ts/sipproxy.ts
224
ts/sipproxy.ts
@@ -24,7 +24,6 @@ import {
|
||||
getAllBrowserDeviceIds,
|
||||
getBrowserDeviceWs,
|
||||
} from './webrtcbridge.ts';
|
||||
import { initCodecBridge } from './opusbridge.ts';
|
||||
import { initAnnouncement } from './announcement.ts';
|
||||
import { PromptCache } from './call/prompt-cache.ts';
|
||||
import { VoiceboxManager } from './voicebox.ts';
|
||||
@@ -37,7 +36,10 @@ import {
|
||||
shutdownProxyEngine,
|
||||
webrtcOffer,
|
||||
webrtcIce,
|
||||
webrtcLink,
|
||||
webrtcClose,
|
||||
addLeg,
|
||||
removeLeg,
|
||||
} from './proxybridge.ts';
|
||||
import type {
|
||||
IIncomingCallEvent,
|
||||
@@ -93,6 +95,16 @@ interface IDeviceStatus {
|
||||
isBrowser: boolean;
|
||||
}
|
||||
|
||||
interface IActiveLeg {
|
||||
id: string;
|
||||
type: 'sip-device' | 'sip-provider' | 'webrtc' | 'tool';
|
||||
state: string;
|
||||
codec: string | null;
|
||||
rtpPort: number | null;
|
||||
remoteMedia: string | null;
|
||||
metadata: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface IActiveCall {
|
||||
id: string;
|
||||
direction: string;
|
||||
@@ -101,6 +113,13 @@ interface IActiveCall {
|
||||
providerUsed: string | null;
|
||||
state: string;
|
||||
startedAt: number;
|
||||
legs: Map<string, IActiveLeg>;
|
||||
}
|
||||
|
||||
interface IHistoryLeg {
|
||||
id: string;
|
||||
type: string;
|
||||
metadata: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface ICallHistoryEntry {
|
||||
@@ -110,6 +129,7 @@ interface ICallHistoryEntry {
|
||||
calleeNumber: string | null;
|
||||
startedAt: number;
|
||||
duration: number;
|
||||
legs: IHistoryLeg[];
|
||||
}
|
||||
|
||||
const providerStatuses = new Map<string, IProviderStatus>();
|
||||
@@ -118,6 +138,12 @@ const activeCalls = new Map<string, IActiveCall>();
|
||||
const callHistory: ICallHistoryEntry[] = [];
|
||||
const MAX_HISTORY = 100;
|
||||
|
||||
// WebRTC session ↔ call linking state.
|
||||
// Both pieces (session accept + call media info) can arrive in any order.
|
||||
const webrtcSessionToCall = new Map<string, string>(); // sessionId → callId
|
||||
const webrtcCallToSession = new Map<string, string>(); // callId → sessionId
|
||||
const pendingCallMedia = new Map<string, { addr: string; port: number; sipPt: number }>(); // callId → provider media info
|
||||
|
||||
// Initialize provider statuses from config (all start as unregistered).
|
||||
for (const p of appConfig.providers) {
|
||||
providerStatuses.set(p.id, {
|
||||
@@ -178,7 +204,18 @@ function getStatus() {
|
||||
calls: [...activeCalls.values()].map((c) => ({
|
||||
...c,
|
||||
duration: Math.floor((Date.now() - c.startedAt) / 1000),
|
||||
legs: [],
|
||||
legs: [...c.legs.values()].map((l) => ({
|
||||
id: l.id,
|
||||
type: l.type,
|
||||
state: l.state,
|
||||
codec: l.codec,
|
||||
rtpPort: l.rtpPort,
|
||||
remoteMedia: l.remoteMedia,
|
||||
metadata: l.metadata || {},
|
||||
pktSent: 0,
|
||||
pktReceived: 0,
|
||||
transcoding: false,
|
||||
})),
|
||||
})),
|
||||
callHistory,
|
||||
contacts: appConfig.contacts || [],
|
||||
@@ -233,17 +270,26 @@ async function startProxyEngine(): Promise<void> {
|
||||
providerUsed: data.provider_id,
|
||||
state: 'ringing',
|
||||
startedAt: Date.now(),
|
||||
legs: new Map(),
|
||||
});
|
||||
|
||||
// Notify browsers of incoming call.
|
||||
const browserIds = getAllBrowserDeviceIds();
|
||||
for (const bid of browserIds) {
|
||||
sendToBrowserDevice(bid, {
|
||||
type: 'webrtc-incoming',
|
||||
callId: data.call_id,
|
||||
from: data.from_uri,
|
||||
deviceId: bid,
|
||||
});
|
||||
// Notify browsers of the incoming call, but only if the matched inbound
|
||||
// route asked for it. `ring_browsers !== false` preserves today's
|
||||
// ring-by-default behavior for any Rust release that predates this
|
||||
// field or for the fallback "no route matched" case (where Rust still
|
||||
// sends `true`). Note: this is an informational toast — browsers do
|
||||
// NOT race the SIP device to answer. First-to-answer-wins requires
|
||||
// a multi-leg fork which is not yet implemented.
|
||||
if (data.ring_browsers !== false) {
|
||||
const browserIds = getAllBrowserDeviceIds();
|
||||
for (const bid of browserIds) {
|
||||
sendToBrowserDevice(bid, {
|
||||
type: 'webrtc-incoming',
|
||||
callId: data.call_id,
|
||||
from: data.from_uri,
|
||||
deviceId: bid,
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -257,6 +303,7 @@ async function startProxyEngine(): Promise<void> {
|
||||
providerUsed: null,
|
||||
state: 'setting-up',
|
||||
startedAt: Date.now(),
|
||||
legs: new Map(),
|
||||
});
|
||||
});
|
||||
|
||||
@@ -270,7 +317,19 @@ async function startProxyEngine(): Promise<void> {
|
||||
providerUsed: data.provider_id,
|
||||
state: 'setting-up',
|
||||
startedAt: Date.now(),
|
||||
legs: new Map(),
|
||||
});
|
||||
|
||||
// Notify all browser devices — they can connect via WebRTC to listen/talk.
|
||||
const browserIds = getAllBrowserDeviceIds();
|
||||
for (const bid of browserIds) {
|
||||
sendToBrowserDevice(bid, {
|
||||
type: 'webrtc-incoming',
|
||||
callId: data.call_id,
|
||||
from: data.number,
|
||||
deviceId: bid,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('call_ringing', (data: { call_id: string }) => {
|
||||
@@ -278,11 +337,46 @@ async function startProxyEngine(): Promise<void> {
|
||||
if (call) call.state = 'ringing';
|
||||
});
|
||||
|
||||
onProxyEvent('call_answered', (data: { call_id: string }) => {
|
||||
onProxyEvent('call_answered', (data: { call_id: string; provider_media_addr?: string; provider_media_port?: number; sip_pt?: number }) => {
|
||||
const call = activeCalls.get(data.call_id);
|
||||
if (call) {
|
||||
call.state = 'connected';
|
||||
log(`[call] ${data.call_id} connected`);
|
||||
|
||||
// Enrich provider leg with media info from the answered event.
|
||||
if (data.provider_media_addr && data.provider_media_port) {
|
||||
for (const leg of call.legs.values()) {
|
||||
if (leg.type === 'sip-provider') {
|
||||
leg.remoteMedia = `${data.provider_media_addr}:${data.provider_media_port}`;
|
||||
if (data.sip_pt !== undefined) {
|
||||
const codecNames: Record<number, string> = { 0: 'PCMU', 8: 'PCMA', 9: 'G.722', 111: 'Opus' };
|
||||
leg.codec = codecNames[data.sip_pt] || `PT${data.sip_pt}`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to link WebRTC session to this call for audio bridging.
|
||||
if (data.provider_media_addr && data.provider_media_port) {
|
||||
const sessionId = webrtcCallToSession.get(data.call_id);
|
||||
if (sessionId) {
|
||||
// Both session and media info available — link now.
|
||||
const sipPt = data.sip_pt ?? 9;
|
||||
log(`[webrtc] linking session=${sessionId.slice(0, 8)} to call=${data.call_id} media=${data.provider_media_addr}:${data.provider_media_port} pt=${sipPt}`);
|
||||
webrtcLink(sessionId, data.call_id, data.provider_media_addr, data.provider_media_port, sipPt).then((ok) => {
|
||||
log(`[webrtc] link result: ${ok}`);
|
||||
});
|
||||
} else {
|
||||
// Session not yet accepted — store media info for when it arrives.
|
||||
pendingCallMedia.set(data.call_id, {
|
||||
addr: data.provider_media_addr,
|
||||
port: data.provider_media_port,
|
||||
sipPt: data.sip_pt ?? 9,
|
||||
});
|
||||
log(`[webrtc] media info cached for call=${data.call_id}, waiting for session accept`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -290,6 +384,15 @@ async function startProxyEngine(): Promise<void> {
|
||||
const call = activeCalls.get(data.call_id);
|
||||
if (call) {
|
||||
log(`[call] ${data.call_id} ended: ${data.reason} (${data.duration}s)`);
|
||||
// Snapshot legs with metadata for history.
|
||||
const historyLegs: IHistoryLeg[] = [];
|
||||
for (const [, leg] of call.legs) {
|
||||
historyLegs.push({
|
||||
id: leg.id,
|
||||
type: leg.type,
|
||||
metadata: leg.metadata || {},
|
||||
});
|
||||
}
|
||||
// Move to history.
|
||||
callHistory.unshift({
|
||||
id: call.id,
|
||||
@@ -298,9 +401,22 @@ async function startProxyEngine(): Promise<void> {
|
||||
calleeNumber: call.calleeNumber,
|
||||
startedAt: call.startedAt,
|
||||
duration: data.duration,
|
||||
legs: historyLegs,
|
||||
});
|
||||
if (callHistory.length > MAX_HISTORY) callHistory.pop();
|
||||
activeCalls.delete(data.call_id);
|
||||
|
||||
// Notify browser(s) that the call ended.
|
||||
broadcastWs('webrtc-call-ended', { callId: data.call_id });
|
||||
|
||||
// Clean up WebRTC session mappings.
|
||||
const sessionId = webrtcCallToSession.get(data.call_id);
|
||||
if (sessionId) {
|
||||
webrtcCallToSession.delete(data.call_id);
|
||||
webrtcSessionToCall.delete(sessionId);
|
||||
webrtcClose(sessionId).catch(() => {});
|
||||
}
|
||||
pendingCallMedia.delete(data.call_id);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -308,6 +424,52 @@ async function startProxyEngine(): Promise<void> {
|
||||
log(`[sip] unhandled ${data.method_or_status} Call-ID=${data.call_id?.slice(0, 20)} from=${data.from_addr}:${data.from_port}`);
|
||||
});
|
||||
|
||||
// Leg events (multiparty) — update shadow state so the dashboard shows legs.
|
||||
onProxyEvent('leg_added', (data: any) => {
|
||||
log(`[leg] added: call=${data.call_id} leg=${data.leg_id} kind=${data.kind} state=${data.state}`);
|
||||
const call = activeCalls.get(data.call_id);
|
||||
if (call) {
|
||||
call.legs.set(data.leg_id, {
|
||||
id: data.leg_id,
|
||||
type: data.kind,
|
||||
state: data.state,
|
||||
codec: data.codec ?? null,
|
||||
rtpPort: data.rtpPort ?? null,
|
||||
remoteMedia: data.remoteMedia ?? null,
|
||||
metadata: data.metadata || {},
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('leg_removed', (data: any) => {
|
||||
log(`[leg] removed: call=${data.call_id} leg=${data.leg_id}`);
|
||||
activeCalls.get(data.call_id)?.legs.delete(data.leg_id);
|
||||
});
|
||||
|
||||
onProxyEvent('leg_state_changed', (data: any) => {
|
||||
log(`[leg] state: call=${data.call_id} leg=${data.leg_id} → ${data.state}`);
|
||||
const call = activeCalls.get(data.call_id);
|
||||
if (!call) return;
|
||||
const leg = call.legs.get(data.leg_id);
|
||||
if (leg) {
|
||||
leg.state = data.state;
|
||||
if (data.metadata) leg.metadata = data.metadata;
|
||||
} else {
|
||||
// Initial legs (provider/device) don't emit leg_added — create on first state change.
|
||||
const legId: string = data.leg_id;
|
||||
const type = legId.includes('-prov') ? 'sip-provider' : legId.includes('-dev') ? 'sip-device' : 'webrtc';
|
||||
call.legs.set(data.leg_id, {
|
||||
id: data.leg_id,
|
||||
type,
|
||||
state: data.state,
|
||||
codec: null,
|
||||
rtpPort: null,
|
||||
remoteMedia: null,
|
||||
metadata: data.metadata || {},
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// WebRTC events from Rust — forward ICE candidates to browser via WebSocket.
|
||||
onProxyEvent('webrtc_ice_candidate', (data: any) => {
|
||||
// Find the browser's WebSocket by session ID and send the ICE candidate.
|
||||
@@ -368,9 +530,8 @@ async function startProxyEngine(): Promise<void> {
|
||||
const deviceList = appConfig.devices.map((d) => d.displayName).join(', ');
|
||||
log(`proxy engine started | LAN ${appConfig.proxy.lanIp}:${appConfig.proxy.lanPort} | providers: ${providerList} | devices: ${deviceList}`);
|
||||
|
||||
// Initialize audio codec bridge (still needed for WebRTC transcoding).
|
||||
// Generate TTS audio (WAV files on disk, played by Rust audio_player).
|
||||
try {
|
||||
await initCodecBridge(log);
|
||||
await initAnnouncement(log);
|
||||
|
||||
// Pre-generate prompts.
|
||||
@@ -392,7 +553,7 @@ async function startProxyEngine(): Promise<void> {
|
||||
}
|
||||
log(`[startup] prompts cached: ${promptCache.listIds().join(', ') || 'none'}`);
|
||||
} catch (e) {
|
||||
log(`[codec] init failed: ${e}`);
|
||||
log(`[tts] init failed: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -418,6 +579,7 @@ initWebUi(
|
||||
providerUsed: providerId || null,
|
||||
state: 'setting-up',
|
||||
startedAt: Date.now(),
|
||||
legs: new Map(),
|
||||
});
|
||||
} else {
|
||||
log(`[dashboard] call failed for ${number}`);
|
||||
@@ -467,14 +629,22 @@ initWebUi(
|
||||
}
|
||||
},
|
||||
undefined, // callManager — legacy, replaced by Rust proxy-engine
|
||||
voiceboxManager,
|
||||
voiceboxManager, // voiceboxManager
|
||||
// WebRTC signaling → forwarded to Rust proxy-engine.
|
||||
async (sessionId, sdp, ws) => {
|
||||
log(`[webrtc] offer from browser session=${sessionId.slice(0, 8)}`);
|
||||
log(`[webrtc] offer from browser session=${sessionId.slice(0, 8)} sdp_type=${typeof sdp} sdp_len=${sdp?.length || 0}`);
|
||||
if (!sdp || typeof sdp !== 'string' || sdp.length < 10) {
|
||||
log(`[webrtc] WARNING: invalid SDP (type=${typeof sdp}), skipping offer`);
|
||||
return;
|
||||
}
|
||||
log(`[webrtc] sending offer to Rust (${sdp.length}b)...`);
|
||||
const result = await webrtcOffer(sessionId, sdp);
|
||||
log(`[webrtc] Rust result: ${JSON.stringify(result)?.slice(0, 200)}`);
|
||||
if (result?.sdp) {
|
||||
ws.send(JSON.stringify({ type: 'webrtc-answer', sessionId, sdp: result.sdp }));
|
||||
log(`[webrtc] answer sent to browser session=${sessionId.slice(0, 8)}`);
|
||||
} else {
|
||||
log(`[webrtc] ERROR: no answer SDP from Rust`);
|
||||
}
|
||||
},
|
||||
async (sessionId, candidate) => {
|
||||
@@ -483,6 +653,26 @@ initWebUi(
|
||||
async (sessionId) => {
|
||||
await webrtcClose(sessionId);
|
||||
},
|
||||
// onWebRtcAccept — browser has accepted a call, linking session to call.
|
||||
(callId: string, sessionId: string) => {
|
||||
log(`[webrtc] accept: callId=${callId} sessionId=${sessionId.slice(0, 8)}`);
|
||||
|
||||
// Store bidirectional mapping.
|
||||
webrtcSessionToCall.set(sessionId, callId);
|
||||
webrtcCallToSession.set(callId, sessionId);
|
||||
|
||||
// Check if we already have media info for this call (provider answered first).
|
||||
const media = pendingCallMedia.get(callId);
|
||||
if (media) {
|
||||
pendingCallMedia.delete(callId);
|
||||
log(`[webrtc] linking session=${sessionId.slice(0, 8)} to call=${callId} media=${media.addr}:${media.port} pt=${media.sipPt}`);
|
||||
webrtcLink(sessionId, callId, media.addr, media.port, media.sipPt).then((ok) => {
|
||||
log(`[webrtc] link result: ${ok}`);
|
||||
});
|
||||
} else {
|
||||
log(`[webrtc] session ${sessionId.slice(0, 8)} accepted, waiting for call_answered media info`);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: 'siprouter',
|
||||
version: '1.12.0',
|
||||
version: '1.20.2',
|
||||
description: 'undefined'
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ const VIEW_TABS = [
|
||||
{ name: 'Phone', iconName: 'lucide:headset', element: SipproxyViewPhone },
|
||||
{ name: 'Routes', iconName: 'lucide:route', element: SipproxyViewRoutes },
|
||||
{ name: 'Voicemail', iconName: 'lucide:voicemail', element: SipproxyViewVoicemail },
|
||||
{ name: 'IVR', iconName: 'lucide:list-tree', element: SipproxyViewIvr },
|
||||
{ name: 'IVR', iconName: 'lucide:ListTree', element: SipproxyViewIvr },
|
||||
{ name: 'Contacts', iconName: 'lucide:contactRound', element: SipproxyViewContacts },
|
||||
{ name: 'Providers', iconName: 'lucide:server', element: SipproxyViewProviders },
|
||||
{ name: 'Log', iconName: 'lucide:scrollText', element: SipproxyViewLog },
|
||||
|
||||
@@ -422,7 +422,7 @@ export class SipproxyViewCalls extends DeesElement {
|
||||
menuOptions: [
|
||||
{
|
||||
name: 'Transfer',
|
||||
iconName: 'lucide:arrow-right-left',
|
||||
iconName: 'lucide:ArrowRightLeft',
|
||||
action: async (modalRef: any) => {
|
||||
if (!targetCallId || !targetLegId) {
|
||||
deesCatalog.DeesToast.error('Please select both a target call and a leg');
|
||||
@@ -620,7 +620,7 @@ export class SipproxyViewCalls extends DeesElement {
|
||||
title: 'Inbound',
|
||||
value: inboundCount,
|
||||
type: 'number',
|
||||
icon: 'lucide:phone-incoming',
|
||||
icon: 'lucide:PhoneIncoming',
|
||||
description: 'Incoming calls',
|
||||
},
|
||||
{
|
||||
@@ -628,7 +628,7 @@ export class SipproxyViewCalls extends DeesElement {
|
||||
title: 'Outbound',
|
||||
value: outboundCount,
|
||||
type: 'number',
|
||||
icon: 'lucide:phone-outgoing',
|
||||
icon: 'lucide:PhoneOutgoing',
|
||||
description: 'Outgoing calls',
|
||||
},
|
||||
];
|
||||
|
||||
@@ -140,7 +140,7 @@ export class SipproxyViewIvr extends DeesElement {
|
||||
title: 'Total Menus',
|
||||
value: ivr.menus.length,
|
||||
type: 'number',
|
||||
icon: 'lucide:list-tree',
|
||||
icon: 'lucide:ListTree',
|
||||
description: 'IVR menu definitions',
|
||||
},
|
||||
{
|
||||
@@ -148,7 +148,7 @@ export class SipproxyViewIvr extends DeesElement {
|
||||
title: 'Entry Menu',
|
||||
value: entryMenu?.name || '(none)',
|
||||
type: 'text' as any,
|
||||
icon: 'lucide:door-open',
|
||||
icon: 'lucide:DoorOpen',
|
||||
description: entryMenu ? `ID: ${entryMenu.id}` : 'No entry menu set',
|
||||
},
|
||||
{
|
||||
@@ -156,7 +156,7 @@ export class SipproxyViewIvr extends DeesElement {
|
||||
title: 'Status',
|
||||
value: ivr.enabled ? 'Enabled' : 'Disabled',
|
||||
type: 'text' as any,
|
||||
icon: ivr.enabled ? 'lucide:check-circle' : 'lucide:x-circle',
|
||||
icon: ivr.enabled ? 'lucide:CheckCircle' : 'lucide:XCircle',
|
||||
color: ivr.enabled ? 'hsl(142.1 76.2% 36.3%)' : 'hsl(0 84.2% 60.2%)',
|
||||
description: ivr.enabled ? 'IVR is active' : 'IVR is inactive',
|
||||
},
|
||||
@@ -228,7 +228,7 @@ export class SipproxyViewIvr extends DeesElement {
|
||||
},
|
||||
{
|
||||
name: 'Set as Entry',
|
||||
iconName: 'lucide:door-open' as any,
|
||||
iconName: 'lucide:DoorOpen' as any,
|
||||
type: ['inRow'] as any,
|
||||
actionFunc: async ({ item }: { item: IIvrMenu }) => {
|
||||
await this.setEntryMenu(item.id);
|
||||
@@ -236,7 +236,7 @@ export class SipproxyViewIvr extends DeesElement {
|
||||
},
|
||||
{
|
||||
name: 'Delete',
|
||||
iconName: 'lucide:trash-2' as any,
|
||||
iconName: 'lucide:Trash2' as any,
|
||||
type: ['inRow'] as any,
|
||||
actionFunc: async ({ item }: { item: IIvrMenu }) => {
|
||||
await this.confirmDeleteMenu(item);
|
||||
@@ -295,7 +295,7 @@ export class SipproxyViewIvr extends DeesElement {
|
||||
},
|
||||
{
|
||||
name: 'Delete',
|
||||
iconName: 'lucide:trash-2',
|
||||
iconName: 'lucide:Trash2',
|
||||
action: async (modalRef: any) => {
|
||||
const ivr = this.getIvrConfig();
|
||||
const menus = ivr.menus.filter((m) => m.id !== menu.id);
|
||||
|
||||
@@ -107,7 +107,7 @@ export class SipproxyViewOverview extends DeesElement {
|
||||
title: 'Inbound Calls',
|
||||
value: inboundCalls,
|
||||
type: 'number',
|
||||
icon: 'lucide:phone-incoming',
|
||||
icon: 'lucide:PhoneIncoming',
|
||||
description: 'Currently active',
|
||||
},
|
||||
{
|
||||
@@ -115,7 +115,7 @@ export class SipproxyViewOverview extends DeesElement {
|
||||
title: 'Outbound Calls',
|
||||
value: outboundCalls,
|
||||
type: 'number',
|
||||
icon: 'lucide:phone-outgoing',
|
||||
icon: 'lucide:PhoneOutgoing',
|
||||
description: 'Currently active',
|
||||
},
|
||||
{
|
||||
|
||||
@@ -86,7 +86,7 @@ export class SipproxyViewProviders extends DeesElement {
|
||||
title: 'Registered',
|
||||
value: registered,
|
||||
type: 'number',
|
||||
icon: 'lucide:check-circle',
|
||||
icon: 'lucide:CheckCircle',
|
||||
color: 'hsl(142.1 76.2% 36.3%)',
|
||||
description: 'Active registrations',
|
||||
},
|
||||
@@ -95,7 +95,7 @@ export class SipproxyViewProviders extends DeesElement {
|
||||
title: 'Unregistered',
|
||||
value: unregistered,
|
||||
type: 'number',
|
||||
icon: 'lucide:alert-circle',
|
||||
icon: 'lucide:AlertCircle',
|
||||
color: unregistered > 0 ? 'hsl(0 84.2% 60.2%)' : undefined,
|
||||
description: unregistered > 0 ? 'Needs attention' : 'All healthy',
|
||||
},
|
||||
@@ -153,7 +153,7 @@ export class SipproxyViewProviders extends DeesElement {
|
||||
},
|
||||
{
|
||||
name: 'Delete',
|
||||
iconName: 'lucide:trash-2',
|
||||
iconName: 'lucide:Trash2',
|
||||
type: ['inRow'] as any,
|
||||
actionFunc: async (actionData: any) => {
|
||||
await this.confirmDelete(actionData.item);
|
||||
@@ -579,7 +579,7 @@ export class SipproxyViewProviders extends DeesElement {
|
||||
},
|
||||
{
|
||||
name: 'Delete',
|
||||
iconName: 'lucide:trash-2',
|
||||
iconName: 'lucide:Trash2',
|
||||
action: async (modalRef: any) => {
|
||||
try {
|
||||
const result = await appState.apiSaveConfig({
|
||||
|
||||
@@ -239,7 +239,7 @@ export class SipproxyViewVoicemail extends DeesElement {
|
||||
},
|
||||
{
|
||||
name: 'Delete',
|
||||
iconName: 'lucide:trash-2',
|
||||
iconName: 'lucide:Trash2',
|
||||
action: async (modalRef: any) => {
|
||||
try {
|
||||
await fetch(
|
||||
@@ -281,7 +281,7 @@ export class SipproxyViewVoicemail extends DeesElement {
|
||||
title: 'Unheard Messages',
|
||||
value: unheard,
|
||||
type: 'number',
|
||||
icon: 'lucide:bell-ring',
|
||||
icon: 'lucide:BellRing',
|
||||
color: unheard > 0 ? 'hsl(0 84.2% 60.2%)' : 'hsl(142.1 76.2% 36.3%)',
|
||||
description: unheard > 0 ? 'Needs attention' : 'All caught up',
|
||||
},
|
||||
@@ -372,7 +372,7 @@ export class SipproxyViewVoicemail extends DeesElement {
|
||||
},
|
||||
{
|
||||
name: 'Delete',
|
||||
iconName: 'lucide:trash-2',
|
||||
iconName: 'lucide:Trash2',
|
||||
type: ['inRow'] as any,
|
||||
actionFunc: async (actionData: any) => {
|
||||
await this.deleteMessage(actionData.item as IVoicemailMessage);
|
||||
|
||||
@@ -20,7 +20,7 @@ export interface IDeviceStatus {
|
||||
|
||||
export interface ILegStatus {
|
||||
id: string;
|
||||
type: 'sip-device' | 'sip-provider' | 'webrtc';
|
||||
type: 'sip-device' | 'sip-provider' | 'webrtc' | 'tool';
|
||||
state: string;
|
||||
remoteMedia: { address: string; port: number } | null;
|
||||
rtpPort: number | null;
|
||||
@@ -28,6 +28,7 @@ export interface ILegStatus {
|
||||
pktReceived: number;
|
||||
codec: string | null;
|
||||
transcoding: boolean;
|
||||
metadata?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface ICallStatus {
|
||||
@@ -42,6 +43,12 @@ export interface ICallStatus {
|
||||
legs: ILegStatus[];
|
||||
}
|
||||
|
||||
export interface IHistoryLeg {
|
||||
id: string;
|
||||
type: string;
|
||||
metadata: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface ICallHistoryEntry {
|
||||
id: string;
|
||||
direction: 'inbound' | 'outbound' | 'internal';
|
||||
@@ -50,6 +57,7 @@ export interface ICallHistoryEntry {
|
||||
providerUsed: string | null;
|
||||
startedAt: number;
|
||||
duration: number;
|
||||
legs?: IHistoryLeg[];
|
||||
}
|
||||
|
||||
export interface IContact {
|
||||
|
||||
Reference in New Issue
Block a user