Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c3a63a4092 | |||
| 7c4756402e | |||
| b6950e11d2 | |||
| e4935fbf21 | |||
| f543ff1568 | |||
| c63a759689 | |||
| a02146633b | |||
| f78639dd19 |
29
changelog.md
29
changelog.md
@@ -1,5 +1,34 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2026-04-10 - 1.19.0 - feat(proxy-engine,codec-lib)
|
||||||
|
add adaptive RTP jitter buffering with Opus packet loss concealment and stable 20ms resampling
|
||||||
|
|
||||||
|
- introduces a per-leg adaptive jitter buffer in the mixer to reorder RTP packets, gate initial playout, and deliver one frame per 20ms tick
|
||||||
|
- adds Opus PLC support to synthesize missing audio frames when packets are lost, with fade-based fallback handling for non-Opus codecs
|
||||||
|
- updates i16 and f32 resamplers to use canonical 20ms chunks so cached resamplers preserve filter state and avoid variable-size cache thrashing
|
||||||
|
|
||||||
|
## 2026-04-10 - 1.18.0 - feat(readme)
|
||||||
|
expand documentation for voicemail, IVR, audio engine, and API capabilities
|
||||||
|
|
||||||
|
- Updates the feature overview to document voicemail, IVR menus, call recording, enhanced TTS, and the 48kHz float audio engine
|
||||||
|
- Refreshes the architecture section to describe the TypeScript control plane, Rust proxy-engine data plane, and JSON-over-stdio IPC
|
||||||
|
- Clarifies REST API and WebSocket coverage with voicemail endpoints, incoming call events, and refined endpoint descriptions
|
||||||
|
|
||||||
|
## 2026-04-10 - 1.17.2 - fix(proxy-engine)
|
||||||
|
use negotiated SDP payload types when wiring SIP legs and enable default nnnoiseless features for telephony denoising
|
||||||
|
|
||||||
|
- Select the negotiated codec payload type from SDP answers instead of always using the first offered codec
|
||||||
|
- Preserve the device leg's preferred payload type from its own INVITE SDP when attaching it to the mixer
|
||||||
|
- Enable default nnnoiseless features in codec-lib and proxy-engine dependencies
|
||||||
|
|
||||||
|
## 2026-04-10 - 1.17.1 - fix(proxy-engine,codec-lib,sip-proto,ts)
|
||||||
|
preserve negotiated media details and improve RTP audio handling across call legs
|
||||||
|
|
||||||
|
- Use native Opus float encode/decode to avoid unnecessary i16 quantization in the f32 audio path.
|
||||||
|
- Parse full RTP headers including extensions and sequence numbers, then sort inbound packets before decoding to keep codec state stable for out-of-order audio.
|
||||||
|
- Capture negotiated codec payload types from SDP offers and answers and include codec, RTP port, remote media, and metadata in leg_added events.
|
||||||
|
- Emit leg_state_changed and leg_removed events more consistently so the dashboard reflects leg lifecycle updates accurately.
|
||||||
|
|
||||||
## 2026-04-10 - 1.17.0 - feat(proxy-engine)
|
## 2026-04-10 - 1.17.0 - feat(proxy-engine)
|
||||||
upgrade the internal audio bus to 48kHz f32 with per-leg denoising and improve SIP leg routing
|
upgrade the internal audio bus to 48kHz f32 with per-leg denoising and improve SIP leg routing
|
||||||
|
|
||||||
|
|||||||
BIN
nogit/voicemail/default/msg-1775840000387.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840000387.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840014276.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840014276.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840439400.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840439400.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840447441.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840447441.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840454835.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840454835.wav
Normal file
Binary file not shown.
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "siprouter",
|
"name": "siprouter",
|
||||||
"version": "1.17.0",
|
"version": "1.19.0",
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
309
readme.md
309
readme.md
@@ -1,6 +1,6 @@
|
|||||||
# @serve.zone/siprouter
|
# @serve.zone/siprouter
|
||||||
|
|
||||||
A production-grade **SIP B2BUA + WebRTC bridge** built with TypeScript and Rust. Routes calls between SIP providers, SIP hardware devices, and browser softphones — with real-time codec transcoding, ML noise suppression, neural TTS announcements, and a slick web dashboard.
|
A production-grade **SIP B2BUA + WebRTC bridge** built with TypeScript and Rust. Routes calls between SIP providers, SIP hardware devices, and browser softphones — with real-time codec transcoding, ML noise suppression, neural TTS, voicemail, IVR menus, and a slick web dashboard.
|
||||||
|
|
||||||
## Issue Reporting and Security
|
## Issue Reporting and Security
|
||||||
|
|
||||||
@@ -12,14 +12,16 @@ For reporting bugs, issues, or security vulnerabilities, please visit [community
|
|||||||
|
|
||||||
siprouter sits between your SIP trunk providers and your endpoints — hardware phones, ATAs, browser softphones — and handles **everything** in between:
|
siprouter sits between your SIP trunk providers and your endpoints — hardware phones, ATAs, browser softphones — and handles **everything** in between:
|
||||||
|
|
||||||
- 📞 **SIP B2BUA** — Terminates and re-originates calls with full RFC 3261 dialog state management
|
- 📞 **SIP B2BUA** — Terminates and re-originates calls with full RFC 3261 dialog state management, digest auth, and SDP negotiation
|
||||||
- 🌐 **WebRTC Bridge** — Browser-based softphone with bidirectional audio to the SIP network
|
- 🌐 **WebRTC Bridge** — Browser-based softphone with bidirectional Opus audio to the SIP network
|
||||||
- 🎛️ **Multi-Provider Trunking** — Register with multiple SIP providers simultaneously (sipgate, easybell, o2, etc.)
|
- 🎛️ **Multi-Provider Trunking** — Register with multiple SIP providers simultaneously (sipgate, easybell, etc.) with automatic failover
|
||||||
- 🔊 **Rust Codec Engine** — Real-time Opus ↔ G.722 ↔ PCMU ↔ PCMA transcoding in native Rust
|
- 🎧 **48kHz f32 Audio Engine** — High-fidelity internal audio bus at 48kHz/32-bit float with native Opus float encode/decode, FFT-based resampling, and per-leg ML noise suppression
|
||||||
- 🤖 **ML Noise Suppression** — RNNoise denoiser with per-direction state (to SIP / to browser)
|
- 🔀 **N-Leg Mix-Minus Mixer** — Conference-grade mixing with dynamic leg add/remove, transfer, and per-source audio separation
|
||||||
- 🗣️ **Neural TTS** — Kokoro-powered "connecting your call" announcements, pre-encoded for instant playback
|
- 📧 **Voicemail** — Configurable voicemail boxes with TTS greetings, recording, and web playback
|
||||||
- 🔀 **Hub Model Calls** — N-leg calls with dynamic add/remove, transfer, and RTP fan-out
|
- 🔢 **IVR Menus** — DTMF-navigable interactive voice response with nested menus, routing actions, and custom prompts
|
||||||
- 🖥️ **Web Dashboard** — Real-time SPA with live call monitoring, browser phone, contact management, provider config
|
- 🗣️ **Neural TTS** — Kokoro-powered announcements and greetings with 25+ voice presets, backed by espeak-ng fallback
|
||||||
|
- 🎙️ **Call Recording** — Per-source separated WAV recording at 48kHz via tool legs
|
||||||
|
- 🖥️ **Web Dashboard** — Real-time SPA with 9 views: live calls, browser phone, routing, voicemail, IVR, contacts, providers, and streaming logs
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -35,32 +37,38 @@ siprouter sits between your SIP trunk providers and your endpoints — hardware
|
|||||||
┌──────────────────────────────────────┐
|
┌──────────────────────────────────────┐
|
||||||
│ siprouter │
|
│ siprouter │
|
||||||
│ │
|
│ │
|
||||||
│ ┌──────────┐ ┌──────────────────┐ │
|
│ TypeScript Control Plane │
|
||||||
│ │ Call Hub │ │ Rust Transcoder │ │
|
│ ┌────────────────────────────────┐ │
|
||||||
│ │ N legs │──│ Opus/G.722/PCM │ │
|
│ │ Config · WebRTC Signaling │ │
|
||||||
│ │ fan-out │ │ + RNNoise │ │
|
│ │ REST API · Web Dashboard │ │
|
||||||
│ └────┬─────┘ └──────────────────┘ │
|
│ │ Voicebox Manager · TTS Cache │ │
|
||||||
│ │ │
|
│ └────────────┬───────────────────┘ │
|
||||||
│ ┌────┴─────┐ ┌──────────────────┐ │
|
│ JSON-over-stdio IPC │
|
||||||
│ │ SIP Stack│ │ Kokoro TTS │ │
|
│ ┌────────────┴───────────────────┐ │
|
||||||
│ │ Dialog SM│ │ (ONNX Runtime) │ │
|
│ │ Rust proxy-engine (data plane) │ │
|
||||||
│ └────┬─────┘ └──────────────────┘ │
|
│ │ │ │
|
||||||
│ │ │
|
│ │ SIP Stack · Dialog SM · Auth │ │
|
||||||
│ ┌────┴──────────────────────────┐ │
|
│ │ Call Manager · N-Leg Mixer │ │
|
||||||
│ │ Local Registrar + Provider │ │
|
│ │ 48kHz f32 Bus · RNNoise │ │
|
||||||
│ │ Registration Engine │ │
|
│ │ Codec Engine · RTP Port Pool │ │
|
||||||
│ └───────────────────────────────┘ │
|
│ │ WebRTC Engine · Kokoro TTS │ │
|
||||||
└──────────┬──────────────┬────────────┘
|
│ │ Voicemail · IVR · Recording │ │
|
||||||
│ │
|
│ └────┬──────────────────┬────────┘ │
|
||||||
┌──────┴──────┐ ┌─────┴──────┐
|
└───────┤──────────────────┤───────────┘
|
||||||
│ SIP Devices │ │ SIP Trunk │
|
│ │
|
||||||
│ (HT801, etc)│ │ Providers │
|
┌──────┴──────┐ ┌──────┴──────┐
|
||||||
└─────────────┘ └────────────┘
|
│ SIP Devices │ │ SIP Trunk │
|
||||||
|
│ (HT801 etc) │ │ Providers │
|
||||||
|
└─────────────┘ └─────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
### The Hub Model
|
### 🧠 Key Design Decisions
|
||||||
|
|
||||||
Every call is a **hub** with N legs. Each leg is either a `SipLeg` (hardware device or provider) or a `WebRtcLeg` (browser). RTP flows through the hub — each leg's received audio is forwarded to all other legs, with codec transcoding handled transparently by the Rust engine.
|
- **Hub Model** — Every call is a hub with N legs. Each leg is a `SipLeg` (device/provider) or `WebRtcLeg` (browser). Legs can be dynamically added, removed, or transferred without tearing down the call.
|
||||||
|
- **Rust Data Plane** — All SIP protocol handling, codec transcoding, mixing, and RTP I/O runs in native Rust for real-time performance. TypeScript handles config, signaling, REST API, and dashboard.
|
||||||
|
- **48kHz f32 Internal Bus** — Audio is processed at maximum quality internally. Encoding/decoding to wire format (G.722, PCMU, Opus) happens solely at the leg boundary.
|
||||||
|
- **Per-Session Codec Isolation** — Each call leg gets its own encoder/decoder/resampler/denoiser state — no cross-call corruption.
|
||||||
|
- **SDP Codec Negotiation** — Outbound encoding uses the codec actually negotiated in SDP answers, not just the first offered codec.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -70,15 +78,16 @@ Every call is a **hub** with N legs. Each leg is either a `SipLeg` (hardware dev
|
|||||||
|
|
||||||
- **Node.js** ≥ 20 with `tsx` globally available
|
- **Node.js** ≥ 20 with `tsx` globally available
|
||||||
- **pnpm** for package management
|
- **pnpm** for package management
|
||||||
- **Rust** toolchain (for building the codec engine and TTS)
|
- **Rust** toolchain (for building the proxy engine)
|
||||||
|
- **espeak-ng** (optional, for TTS fallback)
|
||||||
|
|
||||||
### Install & Build
|
### Install & Build
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Clone and install
|
# Clone and install dependencies
|
||||||
pnpm install
|
pnpm install
|
||||||
|
|
||||||
# Build the Rust binaries (opus-codec + tts-engine)
|
# Build the Rust proxy-engine binary
|
||||||
pnpm run buildRust
|
pnpm run buildRust
|
||||||
|
|
||||||
# Bundle the web frontend
|
# Bundle the web frontend
|
||||||
@@ -87,57 +96,92 @@ pnpm run bundle
|
|||||||
|
|
||||||
### Configuration
|
### Configuration
|
||||||
|
|
||||||
Create `.nogit/config.json` with your setup:
|
Create `.nogit/config.json`:
|
||||||
|
|
||||||
```jsonc
|
```jsonc
|
||||||
{
|
{
|
||||||
"proxy": {
|
"proxy": {
|
||||||
"lanIp": "192.168.1.100", // Your server's LAN IP
|
"lanIp": "192.168.1.100", // Your server's LAN IP
|
||||||
"lanPort": 5070, // SIP signaling port
|
"lanPort": 5070, // SIP signaling port
|
||||||
"rtpPortRange": [20000, 20200],// RTP relay port pool (even ports)
|
"publicIpSeed": "stun.example.com", // STUN server for public IP discovery
|
||||||
"webUiPort": 3060 // Dashboard port
|
"rtpPortRange": { "min": 20000, "max": 20200 }, // RTP port pool (even ports)
|
||||||
|
"webUiPort": 3060 // Dashboard + REST API port
|
||||||
},
|
},
|
||||||
"providers": [
|
"providers": [
|
||||||
{
|
{
|
||||||
"id": "my-trunk",
|
"id": "my-trunk",
|
||||||
"name": "My SIP Provider",
|
"displayName": "My SIP Provider",
|
||||||
"host": "sip.provider.com",
|
"domain": "sip.provider.com",
|
||||||
"port": 5060,
|
"outboundProxy": { "address": "sip.provider.com", "port": 5060 },
|
||||||
"username": "user",
|
"username": "user",
|
||||||
"password": "pass",
|
"password": "pass",
|
||||||
"codecs": ["G.722", "PCMA", "PCMU"],
|
"codecs": [9, 0, 8, 101], // G.722, PCMU, PCMA, telephone-event
|
||||||
"registerExpiry": 3600
|
"registerIntervalSec": 300
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"devices": [
|
"devices": [
|
||||||
{
|
{
|
||||||
"id": "desk-phone",
|
"id": "desk-phone",
|
||||||
"name": "Desk Phone",
|
"displayName": "Desk Phone",
|
||||||
"type": "sip"
|
"expectedAddress": "192.168.1.50",
|
||||||
|
"extension": "100"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"routing": {
|
"routing": {
|
||||||
"inbound": {
|
"routes": [
|
||||||
"default": { "target": "all-devices", "ringBrowser": true }
|
{
|
||||||
|
"id": "inbound-default",
|
||||||
|
"name": "Ring all devices",
|
||||||
|
"priority": 100,
|
||||||
|
"direction": "inbound",
|
||||||
|
"match": {},
|
||||||
|
"action": {
|
||||||
|
"targets": ["desk-phone"],
|
||||||
|
"ringBrowsers": true,
|
||||||
|
"voicemailBox": "main",
|
||||||
|
"noAnswerTimeout": 25
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "outbound-default",
|
||||||
|
"name": "Route via trunk",
|
||||||
|
"priority": 100,
|
||||||
|
"direction": "outbound",
|
||||||
|
"match": {},
|
||||||
|
"action": { "provider": "my-trunk" }
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"voiceboxes": [
|
||||||
|
{
|
||||||
|
"id": "main",
|
||||||
|
"enabled": true,
|
||||||
|
"greetingText": "Please leave a message after the beep.",
|
||||||
|
"greetingVoice": "af_bella",
|
||||||
|
"noAnswerTimeoutSec": 25,
|
||||||
|
"maxRecordingSec": 120,
|
||||||
|
"maxMessages": 50
|
||||||
}
|
}
|
||||||
}
|
],
|
||||||
|
"contacts": [
|
||||||
|
{ "id": "1", "name": "Alice", "number": "+491234567890", "starred": true }
|
||||||
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### TTS Setup (Optional)
|
### TTS Setup (Optional)
|
||||||
|
|
||||||
For neural "connecting your call" announcements, download the Kokoro TTS model:
|
For neural announcements and voicemail greetings, download the Kokoro TTS model:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir -p .nogit/tts
|
mkdir -p .nogit/tts
|
||||||
# Download the full-quality model (310MB) + voices (27MB)
|
|
||||||
curl -L -o .nogit/tts/kokoro-v1.0.onnx \
|
curl -L -o .nogit/tts/kokoro-v1.0.onnx \
|
||||||
https://github.com/mzdk100/kokoro/releases/download/V1.0/kokoro-v1.0.onnx
|
https://github.com/mzdk100/kokoro/releases/download/V1.0/kokoro-v1.0.onnx
|
||||||
curl -L -o .nogit/tts/voices.bin \
|
curl -L -o .nogit/tts/voices.bin \
|
||||||
https://github.com/mzdk100/kokoro/releases/download/V1.0/voices.bin
|
https://github.com/mzdk100/kokoro/releases/download/V1.0/voices.bin
|
||||||
```
|
```
|
||||||
|
|
||||||
If the model files aren't present, the announcement feature is simply disabled — everything else works fine.
|
Without the model files, TTS falls back to `espeak-ng`. Without either, announcements are skipped — everything else works fine.
|
||||||
|
|
||||||
### Run
|
### Run
|
||||||
|
|
||||||
@@ -145,7 +189,7 @@ If the model files aren't present, the announcement feature is simply disabled
|
|||||||
pnpm start
|
pnpm start
|
||||||
```
|
```
|
||||||
|
|
||||||
The SIP proxy starts on the configured port and the web dashboard is available at `http://<your-ip>:3060`.
|
The SIP proxy starts on the configured port and the web dashboard is available at `https://<your-ip>:3060`.
|
||||||
|
|
||||||
### HTTPS (Optional)
|
### HTTPS (Optional)
|
||||||
|
|
||||||
@@ -157,68 +201,91 @@ Place `cert.pem` and `key.pem` in `.nogit/` for TLS on the dashboard.
|
|||||||
|
|
||||||
```
|
```
|
||||||
siprouter/
|
siprouter/
|
||||||
├── ts/ # TypeScript source
|
├── ts/ # TypeScript control plane
|
||||||
│ ├── sipproxy.ts # Main entry — bootstraps everything
|
│ ├── sipproxy.ts # Main entry — bootstraps everything
|
||||||
│ ├── config.ts # Config loader & validation
|
│ ├── config.ts # Config loader & validation
|
||||||
│ ├── registrar.ts # Local SIP registrar for devices
|
│ ├── proxybridge.ts # Rust proxy-engine IPC bridge (smartrust)
|
||||||
│ ├── providerstate.ts # Per-provider upstream registration engine
|
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
|
||||||
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
|
│ ├── webrtcbridge.ts # WebRTC signaling layer
|
||||||
│ ├── webrtcbridge.ts # WebRTC signaling layer
|
│ ├── registrar.ts # Browser softphone registration
|
||||||
│ ├── opusbridge.ts # Rust IPC bridge (smartrust)
|
│ ├── announcement.ts # TTS announcement generator (espeak-ng / Kokoro)
|
||||||
│ ├── codec.ts # High-level RTP transcoding interface
|
│ ├── voicebox.ts # Voicemail box management
|
||||||
│ ├── announcement.ts # Neural TTS announcement generator
|
│ └── call/
|
||||||
│ ├── sip/ # Zero-dependency SIP protocol library
|
│ └── prompt-cache.ts # Named audio prompt WAV management
|
||||||
│ │ ├── message.ts # SIP message parser/builder/mutator
|
│
|
||||||
│ │ ├── dialog.ts # RFC 3261 dialog state machine
|
├── ts_web/ # Web frontend (Lit-based SPA)
|
||||||
│ │ ├── helpers.ts # SDP builder, digest auth, codec registry
|
│ ├── elements/ # Web components (9 dashboard views)
|
||||||
│ │ └── rewrite.ts # SIP URI + SDP body rewriting
|
│ └── state/ # App state, WebRTC client, notifications
|
||||||
│ └── call/ # Hub-model call management
|
│
|
||||||
│ ├── call-manager.ts # Central registry, factory, routing
|
├── rust/ # Rust workspace (the data plane)
|
||||||
│ ├── call.ts # Call hub — owns N legs, media fan-out
|
|
||||||
│ ├── sip-leg.ts # SIP device/provider connection
|
|
||||||
│ ├── webrtc-leg.ts # Browser WebRTC connection
|
|
||||||
│ └── rtp-port-pool.ts # UDP port allocation
|
|
||||||
├── ts_web/ # Web frontend (Lit-based SPA)
|
|
||||||
│ ├── elements/ # Web components (dashboard, phone, etc.)
|
|
||||||
│ └── state/ # App state, WebRTC client, notifications
|
|
||||||
├── rust/ # Rust workspace
|
|
||||||
│ └── crates/
|
│ └── crates/
|
||||||
│ ├── opus-codec/ # Real-time audio transcoder (Opus/G.722/PCM)
|
│ ├── codec-lib/ # Audio codec library (Opus/G.722/PCMU/PCMA)
|
||||||
│ └── tts-engine/ # Kokoro neural TTS CLI
|
│ ├── sip-proto/ # Zero-dependency SIP protocol library
|
||||||
├── html/ # Static HTML shell
|
│ └── proxy-engine/ # Main binary — SIP engine + mixer + RTP
|
||||||
├── .nogit/ # Secrets, config, models (gitignored)
|
│
|
||||||
└── dist_rust/ # Compiled Rust binaries (gitignored)
|
├── html/ # Static HTML shell
|
||||||
|
├── .nogit/ # Secrets, config, TTS models (gitignored)
|
||||||
|
└── dist_rust/ # Compiled Rust binary (gitignored)
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🎧 Codec Engine (Rust)
|
## 🎧 Audio Engine (Rust)
|
||||||
|
|
||||||
The `opus-codec` binary handles all real-time audio processing via a JSON-over-stdio IPC protocol:
|
The `proxy-engine` binary handles all real-time audio processing with a **48kHz f32 internal bus** — encoding and decoding happens only at leg boundaries.
|
||||||
|
|
||||||
| Codec | Payload Type | Sample Rate | Use Case |
|
### Supported Codecs
|
||||||
|-------|-------------|-------------|----------|
|
|
||||||
| **Opus** | 111 | 48 kHz | WebRTC browsers |
|
| Codec | PT | Native Rate | Use Case |
|
||||||
| **G.722** | 9 | 16 kHz | HD SIP devices |
|
|-------|:--:|:-----------:|----------|
|
||||||
|
| **Opus** | 111 | 48 kHz | WebRTC browsers (native float encode/decode — zero i16 quantization) |
|
||||||
|
| **G.722** | 9 | 16 kHz | HD SIP devices & providers |
|
||||||
| **PCMU** (G.711 µ-law) | 0 | 8 kHz | Legacy SIP |
|
| **PCMU** (G.711 µ-law) | 0 | 8 kHz | Legacy SIP |
|
||||||
| **PCMA** (G.711 A-law) | 8 | 8 kHz | Legacy SIP |
|
| **PCMA** (G.711 A-law) | 8 | 8 kHz | Legacy SIP |
|
||||||
|
|
||||||
**Features:**
|
### Audio Pipeline
|
||||||
- Per-call isolated codec sessions (no cross-call state corruption)
|
|
||||||
- FFT-based sample rate conversion via `rubato`
|
```
|
||||||
- **RNNoise ML noise suppression** with per-direction state — denoises audio flowing to SIP separately from audio flowing to the browser
|
Inbound: Wire RTP → Decode → Resample to 48kHz → Denoise (RNNoise) → Mix Bus
|
||||||
- Raw PCM encoding for TTS frame processing
|
Outbound: Mix Bus → Mix-Minus → Resample to codec rate → Encode → Wire RTP
|
||||||
|
```
|
||||||
|
|
||||||
|
- **FFT-based resampling** via `rubato` — high-quality sinc interpolation with cached resampler state for seamless inter-frame continuity
|
||||||
|
- **ML noise suppression** via `nnnoiseless` (RNNoise) — per-leg inbound denoising with SIMD acceleration (AVX/SSE). Skipped for WebRTC legs (browsers already denoise via getUserMedia)
|
||||||
|
- **Mix-minus mixing** — each participant hears everyone except themselves, accumulated in f64 precision
|
||||||
|
- **In-tick packet reorder** — inbound RTP packets are sorted by sequence number before decoding, protecting G.722 ADPCM state from out-of-order delivery
|
||||||
|
- **RFC 3550 compliant header parsing** — properly handles CSRC lists and header extensions
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🗣️ Neural TTS (Rust)
|
## 🗣️ Neural TTS
|
||||||
|
|
||||||
The `tts-engine` binary uses [Kokoro TTS](https://github.com/mzdk100/kokoro) (82M parameter neural model) to synthesize announcements at startup:
|
Announcements and voicemail greetings are synthesized using [Kokoro TTS](https://github.com/mzdk100/kokoro) — an 82M parameter neural model running via ONNX Runtime directly in the Rust process:
|
||||||
|
|
||||||
- **24 kHz, 16-bit mono** output
|
- **24 kHz, 16-bit mono** output
|
||||||
- **25+ voice presets** — American/British, male/female (e.g., `af_bella`, `am_adam`, `bf_emma`, `bm_george`)
|
- **25+ voice presets** — American/British, male/female (e.g., `af_bella`, `am_adam`, `bf_emma`, `bm_george`)
|
||||||
- **~800ms** synthesis time for a 3-second announcement
|
- **~800ms** synthesis time for a 3-second phrase
|
||||||
- Pre-encoded to G.722 + Opus for zero-latency RTP playback during call setup
|
- Lazy-loaded on first use — no startup cost if TTS is unused
|
||||||
|
- Falls back to `espeak-ng` if the ONNX model is not available
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📧 Voicemail
|
||||||
|
|
||||||
|
- Configurable voicemail boxes with custom TTS greetings
|
||||||
|
- Automatic routing on no-answer timeout
|
||||||
|
- Recording with configurable max duration and message count
|
||||||
|
- Web dashboard playback and management
|
||||||
|
- WAV storage in `.nogit/voicemail/`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔢 IVR (Interactive Voice Response)
|
||||||
|
|
||||||
|
- DTMF-navigable menus with configurable entries
|
||||||
|
- Actions: route to extension, route to voicemail, transfer, submenu, hangup, repeat prompt
|
||||||
|
- Custom TTS prompts per menu
|
||||||
|
- Nested menu support
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -228,33 +295,42 @@ The `tts-engine` binary uses [Kokoro TTS](https://github.com/mzdk100/kokoro) (82
|
|||||||
|
|
||||||
| View | Description |
|
| View | Description |
|
||||||
|------|-------------|
|
|------|-------------|
|
||||||
| **Overview** | Stats tiles — uptime, providers, devices, active calls |
|
| 📊 **Overview** | Stats tiles — uptime, providers, devices, active calls |
|
||||||
| **Calls** | Active calls with leg details, codec info, packet counters. Add/remove legs, transfer, hangup |
|
| 📞 **Calls** | Active calls with leg details, codec info, add/remove legs, transfer, hangup |
|
||||||
| **Phone** | Browser softphone — mic/speaker selection, audio meters, dial pad, incoming call popup |
|
| ☎️ **Phone** | Browser softphone — mic/speaker selection, audio meters, dial pad, incoming call popup |
|
||||||
| **Contacts** | Contact management with click-to-call |
|
| 🔀 **Routes** | Routing rule management — match/action model with priority |
|
||||||
| **Providers** | SIP trunk config with registration status |
|
| 📧 **Voicemail** | Voicemail box management + message playback |
|
||||||
| **Log** | Live streaming log viewer |
|
| 🔢 **IVR** | IVR menu builder — DTMF entries, TTS prompts, nested menus |
|
||||||
|
| 👤 **Contacts** | Contact management with click-to-call |
|
||||||
|
| 🔌 **Providers** | SIP trunk configuration and registration status |
|
||||||
|
| 📋 **Log** | Live streaming log viewer |
|
||||||
|
|
||||||
### REST API
|
### REST API
|
||||||
|
|
||||||
| Endpoint | Method | Description |
|
| Endpoint | Method | Description |
|
||||||
|----------|--------|-------------|
|
|----------|--------|-------------|
|
||||||
| `/api/status` | GET | Full system status (providers, devices, calls) |
|
| `/api/status` | GET | Full system status (providers, devices, calls, history) |
|
||||||
| `/api/call` | POST | Originate a call |
|
| `/api/call` | POST | Originate a call |
|
||||||
| `/api/hangup` | POST | Hang up a call |
|
| `/api/hangup` | POST | Hang up a call |
|
||||||
| `/api/call/:id/addleg` | POST | Add a leg to an active call |
|
| `/api/call/:id/addleg` | POST | Add a device leg to an active call |
|
||||||
| `/api/call/:id/addexternal` | POST | Add an external participant |
|
| `/api/call/:id/addexternal` | POST | Add an external participant via provider |
|
||||||
| `/api/call/:id/removeleg` | POST | Remove a leg from a call |
|
| `/api/call/:id/removeleg` | POST | Remove a leg from a call |
|
||||||
| `/api/transfer` | POST | Transfer a call |
|
| `/api/transfer` | POST | Transfer a call |
|
||||||
| `/api/config` | GET/POST | Read or update configuration (hot-reload) |
|
| `/api/config` | GET | Read current configuration |
|
||||||
|
| `/api/config` | POST | Update configuration (hot-reload) |
|
||||||
|
| `/api/voicemail/:box` | GET | List voicemail messages |
|
||||||
|
| `/api/voicemail/:box/:id` | DELETE | Delete a voicemail message |
|
||||||
|
| `/api/voicemail/:box/:id/audio` | GET | Stream voicemail audio |
|
||||||
|
|
||||||
### WebSocket Events
|
### WebSocket Events
|
||||||
|
|
||||||
Connect to `/ws` for real-time push:
|
Connect to `/ws` for real-time push:
|
||||||
|
|
||||||
```jsonc
|
```jsonc
|
||||||
{ "type": "status", "data": { ... } } // Full status snapshot (1s interval)
|
{ "type": "status", "data": { ... } } // Full status snapshot (1s interval)
|
||||||
{ "type": "log", "data": { "message": "..." } } // Log lines in real-time
|
{ "type": "log", "data": { "message": "..." } } // Log lines in real-time
|
||||||
|
{ "type": "incoming_call", "data": { ... } } // Incoming call notification
|
||||||
|
{ "type": "call_ended", "data": { ... } } // Call ended notification
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -264,7 +340,7 @@ Connect to `/ws` for real-time push:
|
|||||||
| Port | Protocol | Purpose |
|
| Port | Protocol | Purpose |
|
||||||
|------|----------|---------|
|
|------|----------|---------|
|
||||||
| 5070 (configurable) | UDP | SIP signaling |
|
| 5070 (configurable) | UDP | SIP signaling |
|
||||||
| 20000–20200 (configurable) | UDP | RTP relay (even ports, per-call allocation) |
|
| 20000–20200 (configurable) | UDP | RTP media (even ports, per-call allocation) |
|
||||||
| 3060 (configurable) | TCP | Web dashboard + WebSocket + REST API |
|
| 3060 (configurable) | TCP | Web dashboard + WebSocket + REST API |
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -275,23 +351,16 @@ Connect to `/ws` for real-time push:
|
|||||||
# Start in dev mode
|
# Start in dev mode
|
||||||
pnpm start
|
pnpm start
|
||||||
|
|
||||||
# Build Rust crates
|
# Build Rust proxy-engine
|
||||||
pnpm run buildRust
|
pnpm run buildRust
|
||||||
|
|
||||||
# Bundle web frontend
|
# Bundle web frontend
|
||||||
pnpm run bundle
|
pnpm run bundle
|
||||||
|
|
||||||
# Restart background server (build + bundle + restart)
|
# Build + bundle + restart background server
|
||||||
pnpm run restartBackground
|
pnpm run restartBackground
|
||||||
```
|
```
|
||||||
|
|
||||||
### Key Design Decisions
|
|
||||||
|
|
||||||
- **Hub Model** — Calls are N-leg hubs, not point-to-point. This enables multi-party, dynamic leg manipulation, and transfer without tearing down the call.
|
|
||||||
- **Zero-dependency SIP library** — `ts/sip/` is a pure data-level SIP stack (parse/build/mutate/serialize). No transport or timer logic — those live in the application layer.
|
|
||||||
- **Rust for the hot path** — Codec transcoding and noise suppression run in native Rust for real-time performance. TypeScript handles signaling and orchestration.
|
|
||||||
- **Per-session codec isolation** — Each call gets its own Opus/G.722 encoder/decoder state in the Rust process, preventing stateful codec prediction from leaking between concurrent calls.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## License and Legal Information
|
## License and Legal Information
|
||||||
|
|||||||
226
rust/Cargo.lock
generated
226
rust/Cargo.lock
generated
@@ -237,6 +237,17 @@ version = "1.1.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "atty"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "audiopus"
|
name = "audiopus"
|
||||||
version = "0.3.0-rc.0"
|
version = "0.3.0-rc.0"
|
||||||
@@ -487,6 +498,31 @@ dependencies = [
|
|||||||
"inout",
|
"inout",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "3.2.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
|
||||||
|
dependencies = [
|
||||||
|
"atty",
|
||||||
|
"bitflags 1.3.2",
|
||||||
|
"clap_lex",
|
||||||
|
"indexmap 1.9.3",
|
||||||
|
"once_cell",
|
||||||
|
"strsim",
|
||||||
|
"termcolor",
|
||||||
|
"textwrap",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_lex"
|
||||||
|
version = "0.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
|
||||||
|
dependencies = [
|
||||||
|
"os_str_bytes",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cmake"
|
name = "cmake"
|
||||||
version = "0.1.58"
|
version = "0.1.58"
|
||||||
@@ -700,6 +736,125 @@ version = "0.3.8"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
|
checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7381b67da416b639690ac77c73b86a7b5e64a29e31d1f75fb3b1102301ef355a"
|
||||||
|
dependencies = [
|
||||||
|
"dasp_envelope",
|
||||||
|
"dasp_frame",
|
||||||
|
"dasp_interpolate",
|
||||||
|
"dasp_peak",
|
||||||
|
"dasp_ring_buffer",
|
||||||
|
"dasp_rms",
|
||||||
|
"dasp_sample",
|
||||||
|
"dasp_signal",
|
||||||
|
"dasp_slice",
|
||||||
|
"dasp_window",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_envelope"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ec617ce7016f101a87fe85ed44180839744265fae73bb4aa43e7ece1b7668b6"
|
||||||
|
dependencies = [
|
||||||
|
"dasp_frame",
|
||||||
|
"dasp_peak",
|
||||||
|
"dasp_ring_buffer",
|
||||||
|
"dasp_rms",
|
||||||
|
"dasp_sample",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_frame"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b2a3937f5fe2135702897535c8d4a5553f8b116f76c1529088797f2eee7c5cd6"
|
||||||
|
dependencies = [
|
||||||
|
"dasp_sample",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_interpolate"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7fc975a6563bb7ca7ec0a6c784ead49983a21c24835b0bc96eea11ee407c7486"
|
||||||
|
dependencies = [
|
||||||
|
"dasp_frame",
|
||||||
|
"dasp_ring_buffer",
|
||||||
|
"dasp_sample",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_peak"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5cf88559d79c21f3d8523d91250c397f9a15b5fc72fbb3f87fdb0a37b79915bf"
|
||||||
|
dependencies = [
|
||||||
|
"dasp_frame",
|
||||||
|
"dasp_sample",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_ring_buffer"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "07d79e19b89618a543c4adec9c5a347fe378a19041699b3278e616e387511ea1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_rms"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a6c5dcb30b7e5014486e2822537ea2beae50b19722ffe2ed7549ab03774575aa"
|
||||||
|
dependencies = [
|
||||||
|
"dasp_frame",
|
||||||
|
"dasp_ring_buffer",
|
||||||
|
"dasp_sample",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_sample"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_signal"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aa1ab7d01689c6ed4eae3d38fe1cea08cba761573fbd2d592528d55b421077e7"
|
||||||
|
dependencies = [
|
||||||
|
"dasp_envelope",
|
||||||
|
"dasp_frame",
|
||||||
|
"dasp_interpolate",
|
||||||
|
"dasp_peak",
|
||||||
|
"dasp_ring_buffer",
|
||||||
|
"dasp_rms",
|
||||||
|
"dasp_sample",
|
||||||
|
"dasp_window",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_slice"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4e1c7335d58e7baedafa516cb361360ff38d6f4d3f9d9d5ee2a2fc8e27178fa1"
|
||||||
|
dependencies = [
|
||||||
|
"dasp_frame",
|
||||||
|
"dasp_sample",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dasp_window"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "99ded7b88821d2ce4e8b842c9f1c86ac911891ab89443cc1de750cae764c5076"
|
||||||
|
dependencies = [
|
||||||
|
"dasp_sample",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "data-encoding"
|
name = "data-encoding"
|
||||||
version = "2.10.0"
|
version = "2.10.0"
|
||||||
@@ -1214,6 +1369,12 @@ dependencies = [
|
|||||||
"subtle",
|
"subtle",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.12.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashbrown"
|
name = "hashbrown"
|
||||||
version = "0.15.5"
|
version = "0.15.5"
|
||||||
@@ -1246,6 +1407,15 @@ version = "0.5.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.1.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hex"
|
name = "hex"
|
||||||
version = "0.4.3"
|
version = "0.4.3"
|
||||||
@@ -1446,6 +1616,16 @@ dependencies = [
|
|||||||
"zstd",
|
"zstd",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indexmap"
|
||||||
|
version = "1.9.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"hashbrown 0.12.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "2.14.0"
|
version = "2.14.0"
|
||||||
@@ -1739,7 +1919,13 @@ version = "0.5.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "805d5964d1e7a0006a7fdced7dae75084d66d18b35f1dfe81bd76929b1f8da0c"
|
checksum = "805d5964d1e7a0006a7fdced7dae75084d66d18b35f1dfe81bd76929b1f8da0c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"clap",
|
||||||
|
"dasp",
|
||||||
|
"dasp_interpolate",
|
||||||
|
"dasp_ring_buffer",
|
||||||
"easyfft",
|
"easyfft",
|
||||||
|
"hound",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -1905,6 +2091,12 @@ dependencies = [
|
|||||||
"ureq",
|
"ureq",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "os_str_bytes"
|
||||||
|
version = "6.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "p256"
|
name = "p256"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
@@ -2883,6 +3075,21 @@ dependencies = [
|
|||||||
"windows-sys",
|
"windows-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "termcolor"
|
||||||
|
version = "1.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "textwrap"
|
||||||
|
version = "0.16.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.69"
|
version = "1.0.69"
|
||||||
@@ -3244,7 +3451,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
|
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"indexmap",
|
"indexmap 2.14.0",
|
||||||
"wasm-encoder",
|
"wasm-encoder",
|
||||||
"wasmparser",
|
"wasmparser",
|
||||||
]
|
]
|
||||||
@@ -3257,7 +3464,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.11.0",
|
"bitflags 2.11.0",
|
||||||
"hashbrown 0.15.5",
|
"hashbrown 0.15.5",
|
||||||
"indexmap",
|
"indexmap 2.14.0",
|
||||||
"semver",
|
"semver",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -3515,6 +3722,15 @@ version = "0.4.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-util"
|
||||||
|
version = "0.1.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi-x86_64-pc-windows-gnu"
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
@@ -3564,7 +3780,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"heck",
|
"heck",
|
||||||
"indexmap",
|
"indexmap 2.14.0",
|
||||||
"prettyplease",
|
"prettyplease",
|
||||||
"syn 2.0.117",
|
"syn 2.0.117",
|
||||||
"wasm-metadata",
|
"wasm-metadata",
|
||||||
@@ -3595,7 +3811,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bitflags 2.11.0",
|
"bitflags 2.11.0",
|
||||||
"indexmap",
|
"indexmap 2.14.0",
|
||||||
"log",
|
"log",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_derive",
|
"serde_derive",
|
||||||
@@ -3614,7 +3830,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"id-arena",
|
"id-arena",
|
||||||
"indexmap",
|
"indexmap 2.14.0",
|
||||||
"log",
|
"log",
|
||||||
"semver",
|
"semver",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@@ -7,4 +7,4 @@ edition = "2021"
|
|||||||
audiopus = "0.3.0-rc.0"
|
audiopus = "0.3.0-rc.0"
|
||||||
ezk-g722 = "0.1"
|
ezk-g722 = "0.1"
|
||||||
rubato = "0.14"
|
rubato = "0.14"
|
||||||
nnnoiseless = { version = "0.5", default-features = false }
|
nnnoiseless = "0.5"
|
||||||
|
|||||||
@@ -142,8 +142,10 @@ impl TranscodeState {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// High-quality sample rate conversion using rubato FFT resampler.
|
/// High-quality sample rate conversion using rubato FFT resampler.
|
||||||
/// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
|
///
|
||||||
/// maintaining proper inter-frame state for continuous audio streams.
|
/// To maintain continuous filter state, the resampler always processes at a
|
||||||
|
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||||
|
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||||
pub fn resample(
|
pub fn resample(
|
||||||
&mut self,
|
&mut self,
|
||||||
pcm: &[i16],
|
pcm: &[i16],
|
||||||
@@ -154,28 +156,61 @@ impl TranscodeState {
|
|||||||
return Ok(pcm.to_vec());
|
return Ok(pcm.to_vec());
|
||||||
}
|
}
|
||||||
|
|
||||||
let chunk = pcm.len();
|
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||||
let key = (from_rate, to_rate, chunk);
|
let key = (from_rate, to_rate, canonical_chunk);
|
||||||
|
|
||||||
if !self.resamplers.contains_key(&key) {
|
if !self.resamplers.contains_key(&key) {
|
||||||
let r =
|
let r = FftFixedIn::<f64>::new(
|
||||||
FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
|
from_rate as usize,
|
||||||
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
|
to_rate as usize,
|
||||||
|
canonical_chunk,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
|
||||||
self.resamplers.insert(key, r);
|
self.resamplers.insert(key, r);
|
||||||
}
|
}
|
||||||
let resampler = self.resamplers.get_mut(&key).unwrap();
|
let resampler = self.resamplers.get_mut(&key).unwrap();
|
||||||
|
|
||||||
let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
|
let mut output = Vec::with_capacity(
|
||||||
let input = vec![float_in];
|
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||||
|
);
|
||||||
|
|
||||||
let result = resampler
|
let mut offset = 0;
|
||||||
.process(&input, None)
|
while offset < pcm.len() {
|
||||||
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
let remaining = pcm.len() - offset;
|
||||||
|
let copy_len = remaining.min(canonical_chunk);
|
||||||
|
let mut chunk = vec![0.0f64; canonical_chunk];
|
||||||
|
for i in 0..copy_len {
|
||||||
|
chunk[i] = pcm[offset + i] as f64 / 32768.0;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(result[0]
|
let input = vec![chunk];
|
||||||
.iter()
|
let result = resampler
|
||||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
.process(&input, None)
|
||||||
.collect())
|
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
||||||
|
|
||||||
|
if remaining < canonical_chunk {
|
||||||
|
let expected =
|
||||||
|
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||||
|
let take = expected.min(result[0].len());
|
||||||
|
output.extend(
|
||||||
|
result[0][..take]
|
||||||
|
.iter()
|
||||||
|
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
output.extend(
|
||||||
|
result[0]
|
||||||
|
.iter()
|
||||||
|
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
offset += canonical_chunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
|
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
|
||||||
@@ -301,23 +336,81 @@ impl TranscodeState {
|
|||||||
|
|
||||||
/// Decode an encoded audio payload to f32 PCM samples in [-1.0, 1.0].
|
/// Decode an encoded audio payload to f32 PCM samples in [-1.0, 1.0].
|
||||||
/// Returns (samples, sample_rate).
|
/// Returns (samples, sample_rate).
|
||||||
|
///
|
||||||
|
/// For Opus, uses native float decode (no i16 quantization).
|
||||||
|
/// For G.722/G.711, decodes to i16 then converts (codec is natively i16).
|
||||||
pub fn decode_to_f32(&mut self, data: &[u8], pt: u8) -> Result<(Vec<f32>, u32), String> {
|
pub fn decode_to_f32(&mut self, data: &[u8], pt: u8) -> Result<(Vec<f32>, u32), String> {
|
||||||
let (pcm_i16, rate) = self.decode_to_pcm(data, pt)?;
|
match pt {
|
||||||
let pcm_f32 = pcm_i16.iter().map(|&s| s as f32 / 32768.0).collect();
|
PT_OPUS => {
|
||||||
Ok((pcm_f32, rate))
|
let mut pcm = vec![0.0f32; 5760]; // up to 120ms at 48kHz
|
||||||
|
let packet =
|
||||||
|
OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
|
||||||
|
let out =
|
||||||
|
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
|
||||||
|
let n: usize = self
|
||||||
|
.opus_dec
|
||||||
|
.decode_float(Some(packet), out, false)
|
||||||
|
.map_err(|e| format!("opus decode_float: {e}"))?
|
||||||
|
.into();
|
||||||
|
pcm.truncate(n);
|
||||||
|
Ok((pcm, 48000))
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// G.722, PCMU, PCMA: natively i16 codecs — decode then convert.
|
||||||
|
let (pcm_i16, rate) = self.decode_to_pcm(data, pt)?;
|
||||||
|
let pcm_f32 = pcm_i16.iter().map(|&s| s as f32 / 32768.0).collect();
|
||||||
|
Ok((pcm_f32, rate))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Opus packet loss concealment — synthesize one frame to fill a gap.
|
||||||
|
/// Returns f32 PCM at 48kHz. `frame_size` should be 960 for 20ms.
|
||||||
|
pub fn opus_plc(&mut self, frame_size: usize) -> Result<Vec<f32>, String> {
|
||||||
|
let mut pcm = vec![0.0f32; frame_size];
|
||||||
|
let out = MutSignals::try_from(&mut pcm[..])
|
||||||
|
.map_err(|e| format!("opus plc signals: {e}"))?;
|
||||||
|
let n: usize = self
|
||||||
|
.opus_dec
|
||||||
|
.decode_float(None::<OpusPacket<'_>>, out, false)
|
||||||
|
.map_err(|e| format!("opus plc: {e}"))?
|
||||||
|
.into();
|
||||||
|
pcm.truncate(n);
|
||||||
|
Ok(pcm)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
|
/// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
|
||||||
|
///
|
||||||
|
/// For Opus, uses native float encode (no i16 quantization).
|
||||||
|
/// For G.722/G.711, converts to i16 then encodes (codec is natively i16).
|
||||||
pub fn encode_from_f32(&mut self, pcm: &[f32], pt: u8) -> Result<Vec<u8>, String> {
|
pub fn encode_from_f32(&mut self, pcm: &[f32], pt: u8) -> Result<Vec<u8>, String> {
|
||||||
let pcm_i16: Vec<i16> = pcm
|
match pt {
|
||||||
.iter()
|
PT_OPUS => {
|
||||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
let mut buf = vec![0u8; 4000];
|
||||||
.collect();
|
let n: usize = self
|
||||||
self.encode_from_pcm(&pcm_i16, pt)
|
.opus_enc
|
||||||
|
.encode_float(pcm, &mut buf)
|
||||||
|
.map_err(|e| format!("opus encode_float: {e}"))?
|
||||||
|
.into();
|
||||||
|
buf.truncate(n);
|
||||||
|
Ok(buf)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// G.722, PCMU, PCMA: natively i16 codecs.
|
||||||
|
let pcm_i16: Vec<i16> = pcm
|
||||||
|
.iter()
|
||||||
|
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||||
|
.collect();
|
||||||
|
self.encode_from_pcm(&pcm_i16, pt)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
|
/// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
|
||||||
/// Uses a separate cache from the i16 resampler.
|
///
|
||||||
|
/// To maintain continuous filter state, the resampler always processes at a
|
||||||
|
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||||
|
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||||
pub fn resample_f32(
|
pub fn resample_f32(
|
||||||
&mut self,
|
&mut self,
|
||||||
pcm: &[f32],
|
pcm: &[f32],
|
||||||
@@ -328,23 +421,50 @@ impl TranscodeState {
|
|||||||
return Ok(pcm.to_vec());
|
return Ok(pcm.to_vec());
|
||||||
}
|
}
|
||||||
|
|
||||||
let chunk = pcm.len();
|
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||||
let key = (from_rate, to_rate, chunk);
|
let key = (from_rate, to_rate, canonical_chunk);
|
||||||
|
|
||||||
if !self.resamplers_f32.contains_key(&key) {
|
if !self.resamplers_f32.contains_key(&key) {
|
||||||
let r =
|
let r = FftFixedIn::<f32>::new(
|
||||||
FftFixedIn::<f32>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
|
from_rate as usize,
|
||||||
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
|
to_rate as usize,
|
||||||
|
canonical_chunk,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
|
||||||
self.resamplers_f32.insert(key, r);
|
self.resamplers_f32.insert(key, r);
|
||||||
}
|
}
|
||||||
let resampler = self.resamplers_f32.get_mut(&key).unwrap();
|
let resampler = self.resamplers_f32.get_mut(&key).unwrap();
|
||||||
|
|
||||||
let input = vec![pcm.to_vec()];
|
let mut output = Vec::with_capacity(
|
||||||
let result = resampler
|
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||||
.process(&input, None)
|
);
|
||||||
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
|
|
||||||
|
|
||||||
Ok(result[0].clone())
|
let mut offset = 0;
|
||||||
|
while offset < pcm.len() {
|
||||||
|
let remaining = pcm.len() - offset;
|
||||||
|
let mut chunk = vec![0.0f32; canonical_chunk];
|
||||||
|
let copy_len = remaining.min(canonical_chunk);
|
||||||
|
chunk[..copy_len].copy_from_slice(&pcm[offset..offset + copy_len]);
|
||||||
|
|
||||||
|
let input = vec![chunk];
|
||||||
|
let result = resampler
|
||||||
|
.process(&input, None)
|
||||||
|
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
|
||||||
|
|
||||||
|
if remaining < canonical_chunk {
|
||||||
|
let expected =
|
||||||
|
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||||
|
output.extend_from_slice(&result[0][..expected.min(result[0].len())]);
|
||||||
|
} else {
|
||||||
|
output.extend_from_slice(&result[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
offset += canonical_chunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.
|
/// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ path = "src/main.rs"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
codec-lib = { path = "../codec-lib" }
|
codec-lib = { path = "../codec-lib" }
|
||||||
sip-proto = { path = "../sip-proto" }
|
sip-proto = { path = "../sip-proto" }
|
||||||
nnnoiseless = { version = "0.5", default-features = false }
|
nnnoiseless = "0.5"
|
||||||
tokio = { version = "1", features = ["full"] }
|
tokio = { version = "1", features = ["full"] }
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
|
|||||||
@@ -20,6 +20,35 @@ use std::net::SocketAddr;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::net::UdpSocket;
|
use tokio::net::UdpSocket;
|
||||||
|
|
||||||
|
/// Emit a `leg_added` event with full leg information.
|
||||||
|
/// Free function (not a method) to avoid `&self` borrow conflicts when `self.calls` is borrowed.
|
||||||
|
fn emit_leg_added_event(tx: &OutTx, call_id: &str, leg: &LegInfo) {
|
||||||
|
let metadata: serde_json::Value = if leg.metadata.is_empty() {
|
||||||
|
serde_json::json!({})
|
||||||
|
} else {
|
||||||
|
serde_json::Value::Object(
|
||||||
|
leg.metadata
|
||||||
|
.iter()
|
||||||
|
.map(|(k, v)| (k.clone(), v.clone()))
|
||||||
|
.collect(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
emit_event(
|
||||||
|
tx,
|
||||||
|
"leg_added",
|
||||||
|
serde_json::json!({
|
||||||
|
"call_id": call_id,
|
||||||
|
"leg_id": leg.id,
|
||||||
|
"kind": leg.kind.as_str(),
|
||||||
|
"state": leg.state.as_str(),
|
||||||
|
"codec": sip_proto::helpers::codec_name(leg.codec_pt),
|
||||||
|
"rtpPort": leg.rtp_port,
|
||||||
|
"remoteMedia": leg.remote_media.map(|a| format!("{}:{}", a.ip(), a.port())),
|
||||||
|
"metadata": metadata,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
pub struct CallManager {
|
pub struct CallManager {
|
||||||
/// All active calls, keyed by internal call ID.
|
/// All active calls, keyed by internal call ID.
|
||||||
pub calls: HashMap<String, Call>,
|
pub calls: HashMap<String, Call>,
|
||||||
@@ -167,7 +196,17 @@ impl CallManager {
|
|||||||
};
|
};
|
||||||
// Mutable borrow on call/leg is now released.
|
// Mutable borrow on call/leg is now released.
|
||||||
|
|
||||||
let sip_pt = codecs.first().copied().unwrap_or(9);
|
let mut sip_pt = codecs.first().copied().unwrap_or(9);
|
||||||
|
|
||||||
|
// If the message has SDP (e.g., 200 OK answer), use the negotiated codec
|
||||||
|
// instead of the offered one.
|
||||||
|
if msg.has_sdp_body() {
|
||||||
|
if let Some(ep) = parse_sdp_endpoint(&msg.body) {
|
||||||
|
if let Some(pt) = ep.codec_pt {
|
||||||
|
sip_pt = pt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
match action {
|
match action {
|
||||||
SipLegAction::None => {}
|
SipLegAction::None => {}
|
||||||
@@ -265,14 +304,27 @@ impl CallManager {
|
|||||||
dev_leg.state = LegState::Connected;
|
dev_leg.state = LegState::Connected;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
emit_event(
|
||||||
|
&self.out_tx,
|
||||||
|
"leg_state_changed",
|
||||||
|
serde_json::json!({ "call_id": call_id, "leg_id": dev_leg_id, "state": "connected" }),
|
||||||
|
);
|
||||||
|
|
||||||
// Wire device leg to mixer.
|
// Wire device leg to mixer.
|
||||||
|
// Use the device's preferred codec from its INVITE SDP,
|
||||||
|
// not the provider's negotiated codec.
|
||||||
|
let dev_pt = device_invite
|
||||||
|
.has_sdp_body()
|
||||||
|
.then(|| parse_sdp_endpoint(&device_invite.body))
|
||||||
|
.flatten()
|
||||||
|
.and_then(|ep| ep.codec_pt)
|
||||||
|
.unwrap_or(sip_pt);
|
||||||
if let Some(dev_remote_addr) = dev_remote {
|
if let Some(dev_remote_addr) = dev_remote {
|
||||||
let dev_channels = create_leg_channels();
|
let dev_channels = create_leg_channels();
|
||||||
spawn_sip_inbound(dev_rtp_socket.clone(), dev_channels.inbound_tx);
|
spawn_sip_inbound(dev_rtp_socket.clone(), dev_channels.inbound_tx);
|
||||||
spawn_sip_outbound(dev_rtp_socket, dev_remote_addr, dev_channels.outbound_rx);
|
spawn_sip_outbound(dev_rtp_socket, dev_remote_addr, dev_channels.outbound_rx);
|
||||||
if let Some(call) = self.calls.get(call_id) {
|
if let Some(call) = self.calls.get(call_id) {
|
||||||
call.add_leg_to_mixer(&dev_leg_id, sip_pt, dev_channels.inbound_rx, dev_channels.outbound_tx)
|
call.add_leg_to_mixer(&dev_leg_id, dev_pt, dev_channels.inbound_rx, dev_channels.outbound_tx)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -324,6 +376,8 @@ impl CallManager {
|
|||||||
leg.state = LegState::Terminated;
|
leg.state = LegState::Terminated;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
emit_event(&self.out_tx, "leg_state_changed",
|
||||||
|
serde_json::json!({ "call_id": call_id, "leg_id": leg_id, "state": "terminated" }));
|
||||||
emit_event(&self.out_tx, "call_ended",
|
emit_event(&self.out_tx, "call_ended",
|
||||||
serde_json::json!({ "call_id": call_id, "reason": reason, "duration": duration }));
|
serde_json::json!({ "call_id": call_id, "reason": reason, "duration": duration }));
|
||||||
self.terminate_call(call_id).await;
|
self.terminate_call(call_id).await;
|
||||||
@@ -529,21 +583,30 @@ impl CallManager {
|
|||||||
if let Some(leg) = call.legs.get_mut(this_leg_id) {
|
if let Some(leg) = call.legs.get_mut(this_leg_id) {
|
||||||
leg.state = LegState::Ringing;
|
leg.state = LegState::Ringing;
|
||||||
}
|
}
|
||||||
|
emit_event(&self.out_tx, "leg_state_changed",
|
||||||
|
serde_json::json!({ "call_id": call_id, "leg_id": this_leg_id, "state": "ringing" }));
|
||||||
} else if code >= 200 && code < 300 {
|
} else if code >= 200 && code < 300 {
|
||||||
let mut needs_wiring = false;
|
let mut needs_wiring = false;
|
||||||
if let Some(leg) = call.legs.get_mut(this_leg_id) {
|
if let Some(leg) = call.legs.get_mut(this_leg_id) {
|
||||||
leg.state = LegState::Connected;
|
leg.state = LegState::Connected;
|
||||||
// Learn remote media from SDP.
|
// Learn remote media and negotiated codec from SDP answer.
|
||||||
if msg.has_sdp_body() {
|
if msg.has_sdp_body() {
|
||||||
if let Some(ep) = parse_sdp_endpoint(&msg.body) {
|
if let Some(ep) = parse_sdp_endpoint(&msg.body) {
|
||||||
if let Ok(addr) = format!("{}:{}", ep.address, ep.port).parse() {
|
if let Ok(addr) = format!("{}:{}", ep.address, ep.port).parse() {
|
||||||
leg.remote_media = Some(addr);
|
leg.remote_media = Some(addr);
|
||||||
}
|
}
|
||||||
|
// Use the codec from the SDP answer (what the remote actually selected).
|
||||||
|
if let Some(pt) = ep.codec_pt {
|
||||||
|
leg.codec_pt = pt;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
needs_wiring = true;
|
needs_wiring = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
emit_event(&self.out_tx, "leg_state_changed",
|
||||||
|
serde_json::json!({ "call_id": call_id, "leg_id": this_leg_id, "state": "connected" }));
|
||||||
|
|
||||||
if call.state != CallState::Connected {
|
if call.state != CallState::Connected {
|
||||||
call.state = CallState::Connected;
|
call.state = CallState::Connected;
|
||||||
emit_event(&self.out_tx, "call_answered", serde_json::json!({ "call_id": call_id }));
|
emit_event(&self.out_tx, "call_answered", serde_json::json!({ "call_id": call_id }));
|
||||||
@@ -689,15 +752,19 @@ impl CallManager {
|
|||||||
call.callee_number = Some(called_number);
|
call.callee_number = Some(called_number);
|
||||||
call.state = CallState::Ringing;
|
call.state = CallState::Ringing;
|
||||||
|
|
||||||
let codec_pt = provider_config.codecs.first().copied().unwrap_or(9);
|
let mut codec_pt = provider_config.codecs.first().copied().unwrap_or(9);
|
||||||
|
|
||||||
// Provider leg — extract media from SDP.
|
// Provider leg — extract media and negotiated codec from SDP.
|
||||||
let mut provider_media: Option<SocketAddr> = None;
|
let mut provider_media: Option<SocketAddr> = None;
|
||||||
if invite.has_sdp_body() {
|
if invite.has_sdp_body() {
|
||||||
if let Some(ep) = parse_sdp_endpoint(&invite.body) {
|
if let Some(ep) = parse_sdp_endpoint(&invite.body) {
|
||||||
if let Ok(addr) = format!("{}:{}", ep.address, ep.port).parse() {
|
if let Ok(addr) = format!("{}:{}", ep.address, ep.port).parse() {
|
||||||
provider_media = Some(addr);
|
provider_media = Some(addr);
|
||||||
}
|
}
|
||||||
|
// Use the codec from the provider's SDP offer (what they actually want to use).
|
||||||
|
if let Some(pt) = ep.codec_pt {
|
||||||
|
codec_pt = pt;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -767,6 +834,16 @@ impl CallManager {
|
|||||||
// Store the call.
|
// Store the call.
|
||||||
self.calls.insert(call_id.clone(), call);
|
self.calls.insert(call_id.clone(), call);
|
||||||
|
|
||||||
|
// Emit leg_added for both initial legs.
|
||||||
|
if let Some(call) = self.calls.get(&call_id) {
|
||||||
|
if let Some(leg) = call.legs.get(&provider_leg_id) {
|
||||||
|
emit_leg_added_event(&self.out_tx, &call_id, leg);
|
||||||
|
}
|
||||||
|
if let Some(leg) = call.legs.get(&device_leg_id) {
|
||||||
|
emit_leg_added_event(&self.out_tx, &call_id, leg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Some(call_id)
|
Some(call_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -854,6 +931,14 @@ impl CallManager {
|
|||||||
.insert(sip_call_id, (call_id.clone(), leg_id));
|
.insert(sip_call_id, (call_id.clone(), leg_id));
|
||||||
|
|
||||||
self.calls.insert(call_id.clone(), call);
|
self.calls.insert(call_id.clone(), call);
|
||||||
|
|
||||||
|
// Emit leg_added for the provider leg.
|
||||||
|
if let Some(call) = self.calls.get(&call_id) {
|
||||||
|
for leg in call.legs.values() {
|
||||||
|
emit_leg_added_event(&self.out_tx, &call_id, leg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Some(call_id)
|
Some(call_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1002,6 +1087,14 @@ impl CallManager {
|
|||||||
.insert(provider_sip_call_id, (call_id.clone(), provider_leg_id));
|
.insert(provider_sip_call_id, (call_id.clone(), provider_leg_id));
|
||||||
|
|
||||||
self.calls.insert(call_id.clone(), call);
|
self.calls.insert(call_id.clone(), call);
|
||||||
|
|
||||||
|
// Emit leg_added for both initial legs (device + provider).
|
||||||
|
if let Some(call) = self.calls.get(&call_id) {
|
||||||
|
for leg in call.legs.values() {
|
||||||
|
emit_leg_added_event(&self.out_tx, &call_id, leg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Some(call_id)
|
Some(call_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1069,17 +1162,11 @@ impl CallManager {
|
|||||||
let call = self.calls.get_mut(call_id).unwrap();
|
let call = self.calls.get_mut(call_id).unwrap();
|
||||||
call.legs.insert(leg_id.clone(), leg_info);
|
call.legs.insert(leg_id.clone(), leg_info);
|
||||||
|
|
||||||
emit_event(
|
if let Some(call) = self.calls.get(call_id) {
|
||||||
&self.out_tx,
|
if let Some(leg) = call.legs.get(&leg_id) {
|
||||||
"leg_added",
|
emit_leg_added_event(&self.out_tx, call_id, leg);
|
||||||
serde_json::json!({
|
}
|
||||||
"call_id": call_id,
|
}
|
||||||
"leg_id": leg_id,
|
|
||||||
"kind": "sip-provider",
|
|
||||||
"state": "inviting",
|
|
||||||
"number": number,
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
Some(leg_id)
|
Some(leg_id)
|
||||||
}
|
}
|
||||||
@@ -1145,17 +1232,11 @@ impl CallManager {
|
|||||||
let call = self.calls.get_mut(call_id).unwrap();
|
let call = self.calls.get_mut(call_id).unwrap();
|
||||||
call.legs.insert(leg_id.clone(), leg_info);
|
call.legs.insert(leg_id.clone(), leg_info);
|
||||||
|
|
||||||
emit_event(
|
if let Some(call) = self.calls.get(call_id) {
|
||||||
&self.out_tx,
|
if let Some(leg) = call.legs.get(&leg_id) {
|
||||||
"leg_added",
|
emit_leg_added_event(&self.out_tx, call_id, leg);
|
||||||
serde_json::json!({
|
}
|
||||||
"call_id": call_id,
|
}
|
||||||
"leg_id": leg_id,
|
|
||||||
"kind": "sip-device",
|
|
||||||
"state": "inviting",
|
|
||||||
"device_id": device_id,
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
Some(leg_id)
|
Some(leg_id)
|
||||||
}
|
}
|
||||||
@@ -1242,6 +1323,13 @@ impl CallManager {
|
|||||||
None => return false,
|
None => return false,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Emit leg_removed for source call.
|
||||||
|
emit_event(
|
||||||
|
&self.out_tx,
|
||||||
|
"leg_removed",
|
||||||
|
serde_json::json!({ "call_id": source_call_id, "leg_id": leg_id }),
|
||||||
|
);
|
||||||
|
|
||||||
// Update SIP index to point to the target call.
|
// Update SIP index to point to the target call.
|
||||||
if let Some(sip_cid) = &leg_info.sip_call_id {
|
if let Some(sip_cid) = &leg_info.sip_call_id {
|
||||||
self.sip_index.insert(
|
self.sip_index.insert(
|
||||||
@@ -1274,15 +1362,12 @@ impl CallManager {
|
|||||||
let target_call = self.calls.get_mut(target_call_id).unwrap();
|
let target_call = self.calls.get_mut(target_call_id).unwrap();
|
||||||
target_call.legs.insert(leg_id.to_string(), leg_info);
|
target_call.legs.insert(leg_id.to_string(), leg_info);
|
||||||
|
|
||||||
emit_event(
|
// Emit leg_added for target call.
|
||||||
&self.out_tx,
|
if let Some(target) = self.calls.get(target_call_id) {
|
||||||
"leg_transferred",
|
if let Some(leg) = target.legs.get(leg_id) {
|
||||||
serde_json::json!({
|
emit_leg_added_event(&self.out_tx, target_call_id, leg);
|
||||||
"leg_id": leg_id,
|
}
|
||||||
"source_call_id": source_call_id,
|
}
|
||||||
"target_call_id": target_call_id,
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Check if source call has too few legs remaining.
|
// Check if source call has too few legs remaining.
|
||||||
let source_call = self.calls.get(source_call_id).unwrap();
|
let source_call = self.calls.get(source_call_id).unwrap();
|
||||||
@@ -1385,6 +1470,11 @@ impl CallManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
leg.state = LegState::Terminated;
|
leg.state = LegState::Terminated;
|
||||||
|
emit_event(
|
||||||
|
&self.out_tx,
|
||||||
|
"leg_state_changed",
|
||||||
|
serde_json::json!({ "call_id": call_id, "leg_id": leg.id, "state": "terminated" }),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
emit_event(
|
emit_event(
|
||||||
@@ -1503,6 +1593,13 @@ impl CallManager {
|
|||||||
);
|
);
|
||||||
self.calls.insert(call_id.to_string(), call);
|
self.calls.insert(call_id.to_string(), call);
|
||||||
|
|
||||||
|
// Emit leg_added for the provider leg.
|
||||||
|
if let Some(call) = self.calls.get(call_id) {
|
||||||
|
for leg in call.legs.values() {
|
||||||
|
emit_leg_added_event(&self.out_tx, call_id, leg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Build recording path.
|
// Build recording path.
|
||||||
let timestamp = std::time::SystemTime::now()
|
let timestamp = std::time::SystemTime::now()
|
||||||
.duration_since(std::time::UNIX_EPOCH)
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
|||||||
188
rust/crates/proxy-engine/src/jitter_buffer.rs
Normal file
188
rust/crates/proxy-engine/src/jitter_buffer.rs
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
//! Per-leg adaptive jitter buffer for the audio mixer.
|
||||||
|
//!
|
||||||
|
//! Sits between inbound RTP packet reception and the mixer's decode step.
|
||||||
|
//! Reorders packets by sequence number and delivers exactly one frame per
|
||||||
|
//! 20ms mixer tick, smoothing out network jitter. When a packet is missing,
|
||||||
|
//! the mixer can invoke codec PLC to conceal the gap.
|
||||||
|
|
||||||
|
use crate::mixer::RtpPacket;
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
/// Per-leg jitter buffer. Collects RTP packets keyed by sequence number,
|
||||||
|
/// delivers one frame per 20ms tick in sequence order.
|
||||||
|
///
|
||||||
|
/// Adaptive target depth: starts at 3 frames (60ms), adjusts between
|
||||||
|
/// 2–6 frames based on observed jitter.
|
||||||
|
pub struct JitterBuffer {
|
||||||
|
/// Packets waiting for playout, keyed by seq number.
|
||||||
|
buffer: BTreeMap<u16, RtpPacket>,
|
||||||
|
/// Next expected sequence number for playout.
|
||||||
|
next_seq: Option<u16>,
|
||||||
|
/// Target buffer depth in frames (adaptive).
|
||||||
|
target_depth: u32,
|
||||||
|
/// Current fill level high-water mark (for adaptation).
|
||||||
|
max_fill_seen: u32,
|
||||||
|
/// Ticks since last adaptation adjustment.
|
||||||
|
adapt_counter: u32,
|
||||||
|
/// Consecutive ticks where buffer was empty (for ramp-up).
|
||||||
|
empty_streak: u32,
|
||||||
|
/// Consecutive ticks where buffer had excess (for ramp-down).
|
||||||
|
excess_streak: u32,
|
||||||
|
/// Whether we've started playout (initial fill complete).
|
||||||
|
playing: bool,
|
||||||
|
/// Number of frames consumed since start (for stats).
|
||||||
|
frames_consumed: u64,
|
||||||
|
/// Number of frames lost (gap in sequence).
|
||||||
|
frames_lost: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// What the mixer gets back each tick.
|
||||||
|
pub enum JitterResult {
|
||||||
|
/// A packet is available for decoding.
|
||||||
|
Packet(RtpPacket),
|
||||||
|
/// Packet was expected but missing — invoke PLC.
|
||||||
|
Missing,
|
||||||
|
/// Buffer is in initial fill phase — output silence.
|
||||||
|
Filling,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl JitterBuffer {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
buffer: BTreeMap::new(),
|
||||||
|
next_seq: None,
|
||||||
|
target_depth: 3, // 60ms initial target
|
||||||
|
max_fill_seen: 0,
|
||||||
|
adapt_counter: 0,
|
||||||
|
empty_streak: 0,
|
||||||
|
excess_streak: 0,
|
||||||
|
playing: false,
|
||||||
|
frames_consumed: 0,
|
||||||
|
frames_lost: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Push a received RTP packet into the buffer.
|
||||||
|
pub fn push(&mut self, pkt: RtpPacket) {
|
||||||
|
// Ignore duplicates.
|
||||||
|
if self.buffer.contains_key(&pkt.seq) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect large forward seq jump (hold/resume, SSRC change).
|
||||||
|
if let Some(next) = self.next_seq {
|
||||||
|
let jump = pkt.seq.wrapping_sub(next);
|
||||||
|
if jump > 1000 && jump < 0x8000 {
|
||||||
|
// Massive forward jump — reset buffer.
|
||||||
|
self.reset();
|
||||||
|
self.next_seq = Some(pkt.seq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.next_seq.is_none() {
|
||||||
|
self.next_seq = Some(pkt.seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.buffer.insert(pkt.seq, pkt);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consume one frame for the current 20ms tick.
|
||||||
|
/// Called once per mixer tick per leg.
|
||||||
|
pub fn consume(&mut self) -> JitterResult {
|
||||||
|
// Track fill level for adaptation.
|
||||||
|
let fill = self.buffer.len() as u32;
|
||||||
|
if fill > self.max_fill_seen {
|
||||||
|
self.max_fill_seen = fill;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initial fill phase: wait until we have target_depth packets.
|
||||||
|
if !self.playing {
|
||||||
|
if fill >= self.target_depth {
|
||||||
|
self.playing = true;
|
||||||
|
} else {
|
||||||
|
return JitterResult::Filling;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let seq = match self.next_seq {
|
||||||
|
Some(s) => s,
|
||||||
|
None => return JitterResult::Filling,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Advance next_seq (wrapping u16).
|
||||||
|
self.next_seq = Some(seq.wrapping_add(1));
|
||||||
|
|
||||||
|
// Try to pull the expected sequence number.
|
||||||
|
if let Some(pkt) = self.buffer.remove(&seq) {
|
||||||
|
self.frames_consumed += 1;
|
||||||
|
self.empty_streak = 0;
|
||||||
|
|
||||||
|
// Adaptive: if buffer is consistently deep, we can tighten.
|
||||||
|
if fill > self.target_depth + 2 {
|
||||||
|
self.excess_streak += 1;
|
||||||
|
} else {
|
||||||
|
self.excess_streak = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
JitterResult::Packet(pkt)
|
||||||
|
} else {
|
||||||
|
// Packet missing — PLC needed.
|
||||||
|
self.frames_lost += 1;
|
||||||
|
self.empty_streak += 1;
|
||||||
|
self.excess_streak = 0;
|
||||||
|
|
||||||
|
JitterResult::Missing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run adaptation logic. Call every tick; internally gates to ~1s intervals.
|
||||||
|
pub fn adapt(&mut self) {
|
||||||
|
self.adapt_counter += 1;
|
||||||
|
if self.adapt_counter < 50 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
self.adapt_counter = 0;
|
||||||
|
|
||||||
|
// If we had many empty ticks, increase depth.
|
||||||
|
if self.empty_streak > 3 && self.target_depth < 6 {
|
||||||
|
self.target_depth += 1;
|
||||||
|
}
|
||||||
|
// If buffer consistently overfull, decrease depth.
|
||||||
|
else if self.excess_streak > 25 && self.target_depth > 2 {
|
||||||
|
self.target_depth -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.max_fill_seen = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Discard packets that are too old (seq far behind next_seq).
|
||||||
|
/// Prevents unbounded memory growth from reordered/late packets.
|
||||||
|
pub fn prune_stale(&mut self) {
|
||||||
|
if let Some(next) = self.next_seq {
|
||||||
|
// Remove anything more than 100 frames behind playout point.
|
||||||
|
// Use wrapping arithmetic: if (next - seq) > 100, it's stale.
|
||||||
|
let stale: Vec<u16> = self
|
||||||
|
.buffer
|
||||||
|
.keys()
|
||||||
|
.filter(|&&seq| {
|
||||||
|
let age = next.wrapping_sub(seq);
|
||||||
|
age > 100 && age < 0x8000 // < 0x8000 means it's actually behind, not ahead
|
||||||
|
})
|
||||||
|
.copied()
|
||||||
|
.collect();
|
||||||
|
for seq in stale {
|
||||||
|
self.buffer.remove(&seq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reset the buffer (e.g., after re-INVITE / hold-resume).
|
||||||
|
pub fn reset(&mut self) {
|
||||||
|
self.buffer.clear();
|
||||||
|
self.next_seq = None;
|
||||||
|
self.playing = false;
|
||||||
|
self.empty_streak = 0;
|
||||||
|
self.excess_streak = 0;
|
||||||
|
self.adapt_counter = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -35,7 +35,8 @@ pub fn create_leg_channels() -> LegChannels {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Spawn the inbound I/O task for a SIP leg.
|
/// Spawn the inbound I/O task for a SIP leg.
|
||||||
/// Reads RTP from the socket, strips the 12-byte header, sends payload to the mixer.
|
/// Reads RTP from the socket, parses the variable-length header (RFC 3550),
|
||||||
|
/// and sends the payload to the mixer.
|
||||||
/// Returns the JoinHandle (exits when the inbound_tx channel is dropped).
|
/// Returns the JoinHandle (exits when the inbound_tx channel is dropped).
|
||||||
pub fn spawn_sip_inbound(
|
pub fn spawn_sip_inbound(
|
||||||
rtp_socket: Arc<UdpSocket>,
|
rtp_socket: Arc<UdpSocket>,
|
||||||
@@ -51,12 +52,29 @@ pub fn spawn_sip_inbound(
|
|||||||
}
|
}
|
||||||
let pt = buf[1] & 0x7F;
|
let pt = buf[1] & 0x7F;
|
||||||
let marker = (buf[1] & 0x80) != 0;
|
let marker = (buf[1] & 0x80) != 0;
|
||||||
|
let seq = u16::from_be_bytes([buf[2], buf[3]]);
|
||||||
let timestamp = u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]);
|
let timestamp = u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]);
|
||||||
let payload = buf[12..n].to_vec();
|
|
||||||
|
// RFC 3550: header length = 12 + (CC * 4) + optional extension.
|
||||||
|
let cc = (buf[0] & 0x0F) as usize;
|
||||||
|
let has_extension = (buf[0] & 0x10) != 0;
|
||||||
|
let mut offset = 12 + cc * 4;
|
||||||
|
if has_extension {
|
||||||
|
if offset + 4 > n {
|
||||||
|
continue; // Malformed: extension header truncated.
|
||||||
|
}
|
||||||
|
let ext_len = u16::from_be_bytes([buf[offset + 2], buf[offset + 3]]) as usize;
|
||||||
|
offset += 4 + ext_len * 4;
|
||||||
|
}
|
||||||
|
if offset >= n {
|
||||||
|
continue; // No payload after header.
|
||||||
|
}
|
||||||
|
|
||||||
|
let payload = buf[offset..n].to_vec();
|
||||||
if payload.is_empty() {
|
if payload.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if inbound_tx.send(RtpPacket { payload, payload_type: pt, marker, timestamp }).await.is_err() {
|
if inbound_tx.send(RtpPacket { payload, payload_type: pt, marker, seq, timestamp }).await.is_err() {
|
||||||
break; // Channel closed — leg removed.
|
break; // Channel closed — leg removed.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ mod call_manager;
|
|||||||
mod config;
|
mod config;
|
||||||
mod dtmf;
|
mod dtmf;
|
||||||
mod ipc;
|
mod ipc;
|
||||||
|
mod jitter_buffer;
|
||||||
mod leg_io;
|
mod leg_io;
|
||||||
mod mixer;
|
mod mixer;
|
||||||
mod provider;
|
mod provider;
|
||||||
@@ -677,6 +678,10 @@ async fn handle_webrtc_link(
|
|||||||
"leg_id": session_id,
|
"leg_id": session_id,
|
||||||
"kind": "webrtc",
|
"kind": "webrtc",
|
||||||
"state": "connected",
|
"state": "connected",
|
||||||
|
"codec": "Opus",
|
||||||
|
"rtpPort": 0,
|
||||||
|
"remoteMedia": null,
|
||||||
|
"metadata": {},
|
||||||
}));
|
}));
|
||||||
|
|
||||||
respond_ok(out_tx, &cmd.id, serde_json::json!({
|
respond_ok(out_tx, &cmd.id, serde_json::json!({
|
||||||
@@ -1125,8 +1130,11 @@ async fn handle_add_tool_leg(
|
|||||||
"call_id": call_id,
|
"call_id": call_id,
|
||||||
"leg_id": tool_leg_id,
|
"leg_id": tool_leg_id,
|
||||||
"kind": "tool",
|
"kind": "tool",
|
||||||
"tool_type": tool_type_str,
|
|
||||||
"state": "connected",
|
"state": "connected",
|
||||||
|
"codec": null,
|
||||||
|
"rtpPort": 0,
|
||||||
|
"remoteMedia": null,
|
||||||
|
"metadata": { "tool_type": tool_type_str },
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
//! 6. Forward DTMF between participant legs only
|
//! 6. Forward DTMF between participant legs only
|
||||||
|
|
||||||
use crate::ipc::{emit_event, OutTx};
|
use crate::ipc::{emit_event, OutTx};
|
||||||
|
use crate::jitter_buffer::{JitterBuffer, JitterResult};
|
||||||
use crate::rtp::{build_rtp_header, rtp_clock_increment};
|
use crate::rtp::{build_rtp_header, rtp_clock_increment};
|
||||||
use codec_lib::{codec_sample_rate, new_denoiser, TranscodeState};
|
use codec_lib::{codec_sample_rate, new_denoiser, TranscodeState};
|
||||||
use nnnoiseless::DenoiseState;
|
use nnnoiseless::DenoiseState;
|
||||||
@@ -35,6 +36,8 @@ pub struct RtpPacket {
|
|||||||
pub payload_type: u8,
|
pub payload_type: u8,
|
||||||
/// RTP marker bit (first packet of a DTMF event, etc.).
|
/// RTP marker bit (first packet of a DTMF event, etc.).
|
||||||
pub marker: bool,
|
pub marker: bool,
|
||||||
|
/// RTP sequence number for reordering.
|
||||||
|
pub seq: u16,
|
||||||
/// RTP timestamp from the original packet header.
|
/// RTP timestamp from the original packet header.
|
||||||
pub timestamp: u32,
|
pub timestamp: u32,
|
||||||
}
|
}
|
||||||
@@ -162,6 +165,8 @@ struct MixerLegSlot {
|
|||||||
last_pcm_frame: Vec<f32>,
|
last_pcm_frame: Vec<f32>,
|
||||||
/// Number of consecutive ticks with no inbound packet.
|
/// Number of consecutive ticks with no inbound packet.
|
||||||
silent_ticks: u32,
|
silent_ticks: u32,
|
||||||
|
/// Per-leg jitter buffer for packet reordering and timing.
|
||||||
|
jitter: JitterBuffer,
|
||||||
// RTP output state.
|
// RTP output state.
|
||||||
rtp_seq: u16,
|
rtp_seq: u16,
|
||||||
rtp_ts: u32,
|
rtp_ts: u32,
|
||||||
@@ -236,6 +241,7 @@ async fn mixer_loop(
|
|||||||
rtp_ts: 0,
|
rtp_ts: 0,
|
||||||
rtp_ssrc: rand::random(),
|
rtp_ssrc: rand::random(),
|
||||||
role: LegRole::Participant,
|
role: LegRole::Participant,
|
||||||
|
jitter: JitterBuffer::new(),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -319,63 +325,93 @@ async fn mixer_loop(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── 2. Drain inbound packets, decode to 16kHz PCM. ─────────
|
// ── 2. Drain inbound packets, decode to 48kHz f32 PCM. ────
|
||||||
// DTMF (PT 101) packets are collected separately.
|
// DTMF (PT 101) packets are collected separately.
|
||||||
|
// Audio packets are sorted by sequence number and decoded
|
||||||
|
// in order to maintain codec state (critical for G.722 ADPCM).
|
||||||
let leg_ids: Vec<String> = legs.keys().cloned().collect();
|
let leg_ids: Vec<String> = legs.keys().cloned().collect();
|
||||||
let mut dtmf_forward: Vec<(String, RtpPacket)> = Vec::new();
|
let mut dtmf_forward: Vec<(String, RtpPacket)> = Vec::new();
|
||||||
|
|
||||||
for lid in &leg_ids {
|
for lid in &leg_ids {
|
||||||
let slot = legs.get_mut(lid).unwrap();
|
let slot = legs.get_mut(lid).unwrap();
|
||||||
|
|
||||||
// Drain channel — collect DTMF packets separately, keep latest audio.
|
// Step 2a: Drain all pending packets into the jitter buffer.
|
||||||
let mut latest_audio: Option<RtpPacket> = None;
|
let mut got_audio = false;
|
||||||
loop {
|
loop {
|
||||||
match slot.inbound_rx.try_recv() {
|
match slot.inbound_rx.try_recv() {
|
||||||
Ok(pkt) => {
|
Ok(pkt) => {
|
||||||
if pkt.payload_type == 101 {
|
if pkt.payload_type == 101 {
|
||||||
// DTMF telephone-event: collect for processing.
|
|
||||||
dtmf_forward.push((lid.clone(), pkt));
|
dtmf_forward.push((lid.clone(), pkt));
|
||||||
} else {
|
} else {
|
||||||
latest_audio = Some(pkt);
|
got_audio = true;
|
||||||
|
slot.jitter.push(pkt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(_) => break,
|
Err(_) => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(pkt) = latest_audio {
|
// Step 2b: Consume exactly one frame from the jitter buffer.
|
||||||
slot.silent_ticks = 0;
|
match slot.jitter.consume() {
|
||||||
match slot.transcoder.decode_to_f32(&pkt.payload, pkt.payload_type) {
|
JitterResult::Packet(pkt) => {
|
||||||
Ok((pcm, rate)) => {
|
match slot.transcoder.decode_to_f32(&pkt.payload, pkt.payload_type) {
|
||||||
// Resample to 48kHz mixing rate if needed.
|
Ok((pcm, rate)) => {
|
||||||
let pcm_48k = if rate == MIX_RATE {
|
let pcm_48k = if rate == MIX_RATE {
|
||||||
pcm
|
pcm
|
||||||
} else {
|
} else {
|
||||||
slot.transcoder
|
slot.transcoder
|
||||||
.resample_f32(&pcm, rate, MIX_RATE)
|
.resample_f32(&pcm, rate, MIX_RATE)
|
||||||
.unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
|
.unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
|
||||||
};
|
};
|
||||||
// Per-leg inbound denoising at 48kHz.
|
let processed = if slot.codec_pt != codec_lib::PT_OPUS {
|
||||||
let denoised = TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k);
|
TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k)
|
||||||
// Pad or truncate to exactly MIX_FRAME_SIZE.
|
} else {
|
||||||
let mut frame = denoised;
|
pcm_48k
|
||||||
frame.resize(MIX_FRAME_SIZE, 0.0);
|
};
|
||||||
slot.last_pcm_frame = frame;
|
let mut frame = processed;
|
||||||
}
|
frame.resize(MIX_FRAME_SIZE, 0.0);
|
||||||
Err(_) => {
|
slot.last_pcm_frame = frame;
|
||||||
// Decode failed — use silence.
|
}
|
||||||
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
|
Err(_) => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if dtmf_forward.iter().any(|(src, _)| src == lid) {
|
JitterResult::Missing => {
|
||||||
// Got DTMF but no audio — don't bump silent_ticks (DTMF counts as activity).
|
// Invoke Opus PLC or fade for non-Opus codecs.
|
||||||
|
if slot.codec_pt == codec_lib::PT_OPUS {
|
||||||
|
match slot.transcoder.opus_plc(MIX_FRAME_SIZE) {
|
||||||
|
Ok(pcm) => {
|
||||||
|
slot.last_pcm_frame = pcm;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
for s in slot.last_pcm_frame.iter_mut() {
|
||||||
|
*s *= 0.8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Non-Opus: fade last frame toward silence.
|
||||||
|
for s in slot.last_pcm_frame.iter_mut() {
|
||||||
|
*s *= 0.85;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
JitterResult::Filling => {
|
||||||
|
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run jitter adaptation + prune stale packets.
|
||||||
|
slot.jitter.adapt();
|
||||||
|
slot.jitter.prune_stale();
|
||||||
|
|
||||||
|
// Silent ticks: based on actual network reception, not jitter buffer state.
|
||||||
|
if got_audio || dtmf_forward.iter().any(|(src, _)| src == lid) {
|
||||||
slot.silent_ticks = 0;
|
slot.silent_ticks = 0;
|
||||||
} else {
|
} else {
|
||||||
slot.silent_ticks += 1;
|
slot.silent_ticks += 1;
|
||||||
// After 150 ticks (3 seconds) of silence, zero out to avoid stale audio.
|
}
|
||||||
if slot.silent_ticks > 150 {
|
if slot.silent_ticks > 150 {
|
||||||
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
|
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -290,8 +290,9 @@ async fn browser_to_mixer_loop(
|
|||||||
.send(RtpPacket {
|
.send(RtpPacket {
|
||||||
payload: payload.to_vec(),
|
payload: payload.to_vec(),
|
||||||
payload_type: PT_OPUS,
|
payload_type: PT_OPUS,
|
||||||
marker: false,
|
marker: rtp_packet.header.marker,
|
||||||
timestamp: 0,
|
seq: rtp_packet.header.sequence_number,
|
||||||
|
timestamp: rtp_packet.header.timestamp,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -197,10 +197,11 @@ pub fn compute_digest_auth(
|
|||||||
|
|
||||||
use crate::Endpoint;
|
use crate::Endpoint;
|
||||||
|
|
||||||
/// Parse the audio media port and connection address from an SDP body.
|
/// Parse the audio media port, connection address, and preferred codec from an SDP body.
|
||||||
pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
|
pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
|
||||||
let mut addr: Option<&str> = None;
|
let mut addr: Option<&str> = None;
|
||||||
let mut port: Option<u16> = None;
|
let mut port: Option<u16> = None;
|
||||||
|
let mut codec_pt: Option<u8> = None;
|
||||||
|
|
||||||
let normalized = sdp.replace("\r\n", "\n");
|
let normalized = sdp.replace("\r\n", "\n");
|
||||||
for raw in normalized.split('\n') {
|
for raw in normalized.split('\n') {
|
||||||
@@ -208,10 +209,16 @@ pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
|
|||||||
if let Some(rest) = line.strip_prefix("c=IN IP4 ") {
|
if let Some(rest) = line.strip_prefix("c=IN IP4 ") {
|
||||||
addr = Some(rest.trim());
|
addr = Some(rest.trim());
|
||||||
} else if let Some(rest) = line.strip_prefix("m=audio ") {
|
} else if let Some(rest) = line.strip_prefix("m=audio ") {
|
||||||
|
// m=audio <port> RTP/AVP <pt1> [<pt2> ...]
|
||||||
let parts: Vec<&str> = rest.split_whitespace().collect();
|
let parts: Vec<&str> = rest.split_whitespace().collect();
|
||||||
if !parts.is_empty() {
|
if !parts.is_empty() {
|
||||||
port = parts[0].parse().ok();
|
port = parts[0].parse().ok();
|
||||||
}
|
}
|
||||||
|
// parts[1] is "RTP/AVP" or similar, parts[2..] are payload types.
|
||||||
|
// The first PT is the preferred codec.
|
||||||
|
if parts.len() > 2 {
|
||||||
|
codec_pt = parts[2].parse::<u8>().ok();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -219,6 +226,7 @@ pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
|
|||||||
(Some(a), Some(p)) => Some(Endpoint {
|
(Some(a), Some(p)) => Some(Endpoint {
|
||||||
address: a.to_string(),
|
address: a.to_string(),
|
||||||
port: p,
|
port: p,
|
||||||
|
codec_pt,
|
||||||
}),
|
}),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,9 +9,11 @@ pub mod dialog;
|
|||||||
pub mod helpers;
|
pub mod helpers;
|
||||||
pub mod rewrite;
|
pub mod rewrite;
|
||||||
|
|
||||||
/// Network endpoint (address + port).
|
/// Network endpoint (address + port + optional negotiated codec).
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub struct Endpoint {
|
pub struct Endpoint {
|
||||||
pub address: String,
|
pub address: String,
|
||||||
pub port: u16,
|
pub port: u16,
|
||||||
|
/// First payload type from the SDP `m=audio` line (the preferred codec).
|
||||||
|
pub codec_pt: Option<u8>,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ pub fn rewrite_sdp(body: &str, ip: &str, port: u16) -> (String, Option<Endpoint>
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let original = match (orig_addr, orig_port) {
|
let original = match (orig_addr, orig_port) {
|
||||||
(Some(a), Some(p)) => Some(Endpoint { address: a, port: p }),
|
(Some(a), Some(p)) => Some(Endpoint { address: a, port: p, codec_pt: None }),
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: 'siprouter',
|
name: 'siprouter',
|
||||||
version: '1.17.0',
|
version: '1.19.0',
|
||||||
description: 'undefined'
|
description: 'undefined'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -425,9 +425,9 @@ async function startProxyEngine(): Promise<void> {
|
|||||||
id: data.leg_id,
|
id: data.leg_id,
|
||||||
type: data.kind,
|
type: data.kind,
|
||||||
state: data.state,
|
state: data.state,
|
||||||
codec: null,
|
codec: data.codec ?? null,
|
||||||
rtpPort: null,
|
rtpPort: data.rtpPort ?? null,
|
||||||
remoteMedia: null,
|
remoteMedia: data.remoteMedia ?? null,
|
||||||
metadata: data.metadata || {},
|
metadata: data.metadata || {},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: 'siprouter',
|
name: 'siprouter',
|
||||||
version: '1.17.0',
|
version: '1.19.0',
|
||||||
description: 'undefined'
|
description: 'undefined'
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user