16 Commits

Author SHA1 Message Date
21ffc1d017 v1.20.1
Some checks failed
Docker (tags) / release (push) Failing after 3s
2026-04-11 12:32:46 +00:00
2f16c5efae fix(docker): install required native build tools for Rust dependencies in the build image 2026-04-11 12:32:46 +00:00
254d7f3633 v1.20.0
Some checks failed
Docker (tags) / release (push) Failing after 3m53s
2026-04-11 12:01:54 +00:00
67537664df feat(docker): add multi-arch Docker build and tagged release pipeline 2026-04-11 12:01:54 +00:00
54129dcdae v1.19.2 2026-04-11 08:24:47 +00:00
8c6556dae3 fix(web-ui): normalize lucide icon names across SIP proxy views 2026-04-11 08:24:47 +00:00
291beb1da4 v1.19.1 2026-04-10 21:21:29 +00:00
79147f1e40 fix(readme): refresh documentation for jitter buffering, voicemail, and WebSocket signaling details 2026-04-10 21:21:29 +00:00
c3a63a4092 v1.19.0 2026-04-10 21:15:34 +00:00
7c4756402e feat(proxy-engine,codec-lib): add adaptive RTP jitter buffering with Opus packet loss concealment and stable 20ms resampling 2026-04-10 21:15:34 +00:00
b6950e11d2 v1.18.0 2026-04-10 17:25:34 +00:00
e4935fbf21 feat(readme): expand documentation for voicemail, IVR, audio engine, and API capabilities 2026-04-10 17:25:34 +00:00
f543ff1568 v1.17.2 2026-04-10 17:14:14 +00:00
c63a759689 fix(proxy-engine): use negotiated SDP payload types when wiring SIP legs and enable default nnnoiseless features for telephony denoising 2026-04-10 17:14:14 +00:00
a02146633b v1.17.1 2026-04-10 16:57:07 +00:00
f78639dd19 fix(proxy-engine,codec-lib,sip-proto,ts): preserve negotiated media details and improve RTP audio handling across call legs 2026-04-10 16:57:07 +00:00
36 changed files with 1475 additions and 694 deletions

16
.dockerignore Normal file
View File

@@ -0,0 +1,16 @@
node_modules/
.nogit/
nogit/
.git/
.playwright-mcp/
.vscode/
test/
dist_rust/
dist_ts_web/
rust/target/
sip_trace.log
sip_trace_*.log
proxy.out
proxy_v2.out
*.pid
.server.pid

View File

@@ -0,0 +1,32 @@
name: Docker (tags)
on:
push:
tags:
- '*'
env:
IMAGE: code.foss.global/host.today/ht-docker-node:dbase_dind
NPMCI_LOGIN_DOCKER_GITEA: ${{ github.server_url }}|${{ gitea.repository_owner }}|${{ secrets.GITEA_TOKEN }}
NPMCI_LOGIN_DOCKER_DOCKERREGISTRY: ${{ secrets.NPMCI_LOGIN_DOCKER_DOCKERREGISTRY }}
jobs:
release:
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
container:
image: ${{ env.IMAGE }}
steps:
- uses: actions/checkout@v3
- name: Prepare
run: |
pnpm install -g pnpm
pnpm install -g @git.zone/tsdocker
- name: Release
run: |
tsdocker login
tsdocker build
tsdocker push

View File

@@ -8,5 +8,16 @@
"production": true
}
]
},
"@git.zone/tsrust": {
"targets": ["linux_amd64", "linux_arm64"]
},
"@git.zone/tsdocker": {
"registries": ["code.foss.global"],
"registryRepoMap": {
"code.foss.global": "serve.zone/siprouter",
"dockerregistry.lossless.digital": "serve.zone/siprouter"
},
"platforms": ["linux/amd64", "linux/arm64"]
}
}

74
Dockerfile Normal file
View File

@@ -0,0 +1,74 @@
# gitzone dockerfile_service
## STAGE 1 // BUILD
FROM code.foss.global/host.today/ht-docker-node:lts AS build
# System build tools that the Rust dep tree needs beyond the base image:
# - cmake : used by the `cmake` crate (transitive via ort_sys / a webrtc
# sub-crate) to build a C/C++ library from source when a
# prebuilt-binary download path doesn't apply.
# - pkg-config : used by audiopus_sys and other *-sys crates to locate libs
# on the native target (safe no-op if they vendor their own).
# These are normally pre-installed on dev machines but not in ht-docker-node:lts.
RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
pkg-config \
&& rm -rf /var/lib/apt/lists/*
# buildx sets TARGETARCH automatically for each platform it's building:
# linux/amd64 -> TARGETARCH=amd64
# linux/arm64 -> TARGETARCH=arm64
# We use it to tell tsrust to build ONLY the current container's arch. This
# overrides the `@git.zone/tsrust.targets` list in .smartconfig.json, which is
# right for local dev / CI (where you want both binaries) but wrong for per-
# platform Docker stages (each stage would then also try to cross-compile to
# the OTHER arch — which fails in the arm64 stage because no reverse cross-
# toolchain is installed).
#
# With --target set, tsrust builds a single target natively within whichever
# platform this stage is running under (native on amd64, QEMU-emulated on arm64).
ARG TARGETARCH
COPY ./ /app
WORKDIR /app
RUN pnpm config set store-dir .pnpm-store
RUN rm -rf node_modules && pnpm install
# tsrust --target takes precedence over .smartconfig.json's targets array.
# Writes dist_rust/proxy-engine_linux_amd64 or dist_rust/proxy-engine_linux_arm64.
# The TS layer (ts/proxybridge.ts buildLocalPaths) picks the right one at runtime
# via process.arch.
RUN pnpm exec tsrust --target linux_${TARGETARCH}
# Web bundle (esbuild — pure JS, uses the platform's native esbuild binary
# installed by pnpm above, so no cross-bundling concerns).
RUN pnpm run bundle
# Drop pnpm store to keep the image smaller. node_modules stays because the
# runtime entrypoint is tsx and siprouter has no separate dist_ts/ to run from.
RUN rm -rf .pnpm-store
## STAGE 2 // PRODUCTION
FROM code.foss.global/host.today/ht-docker-node:alpine-node AS production
# gcompat + libstdc++ let the glibc-linked proxy-engine binary run on Alpine.
RUN apk add --no-cache gcompat libstdc++
WORKDIR /app
COPY --from=build /app /app
ENV SIPROUTER_MODE=OCI_CONTAINER
ENV NODE_ENV=production
LABEL org.opencontainers.image.title="siprouter" \
org.opencontainers.image.description="SIP proxy with Rust data plane and WebRTC bridge" \
org.opencontainers.image.source="https://code.foss.global/serve.zone/siprouter"
# 5070 SIP signaling (UDP+TCP)
# 5061 SIP-TLS (optional, UDP+TCP)
# 3060 Web UI / WebSocket (HTTP or HTTPS, auto-detected from .nogit/cert.pem)
# 20000-20200/udp RTP media range (must match config.proxy.rtpPortRange)
EXPOSE 5070/udp 5070/tcp 5061/udp 5061/tcp 3060/tcp 20000-20200/udp
# exec replaces sh as PID 1 with tsx, so SIGINT/SIGTERM reach Node and
# ts/sipproxy.ts' shutdown handler (which calls shutdownProxyEngine) runs cleanly.
CMD ["sh", "-c", "exec ./node_modules/.bin/tsx ts/sipproxy.ts"]

View File

@@ -1,5 +1,62 @@
# Changelog
## 2026-04-11 - 1.20.1 - fix(docker)
install required native build tools for Rust dependencies in the build image
- Add cmake and pkg-config to the Docker build stage so Rust native dependencies can compile successfully in the container
- Document why these tools are needed for transitive Rust crates that build or detect native libraries
## 2026-04-11 - 1.20.0 - feat(docker)
add multi-arch Docker build and tagged release pipeline
- Add a production Dockerfile for building and running the SIP router with the Rust proxy engine and web bundle
- Configure tsdocker and tsrust for linux/amd64 and linux/arm64 image builds and registry mapping
- Add a tag-triggered Gitea workflow to build and push Docker images
- Update runtime binary resolution to load architecture-specific Rust artifacts in Docker and CI environments
- Add Docker-related package scripts, dependency updates, and ignore rules for container builds
## 2026-04-11 - 1.19.2 - fix(web-ui)
normalize lucide icon names across SIP proxy views
- Updates icon identifiers to the expected PascalCase lucide format in app navigation, calls, IVR, overview, providers, and voicemail views.
- Fixes UI icon rendering for stats cards and action menus such as transfer, delete, status, and call direction indicators.
## 2026-04-10 - 1.19.1 - fix(readme)
refresh documentation for jitter buffering, voicemail, and WebSocket signaling details
- Add adaptive jitter buffer and packet loss concealment details to the audio pipeline documentation
- Document voicemail unheard count and heard-state API endpoints
- Update WebSocket event and browser signaling examples to reflect current message types
## 2026-04-10 - 1.19.0 - feat(proxy-engine,codec-lib)
add adaptive RTP jitter buffering with Opus packet loss concealment and stable 20ms resampling
- introduces a per-leg adaptive jitter buffer in the mixer to reorder RTP packets, gate initial playout, and deliver one frame per 20ms tick
- adds Opus PLC support to synthesize missing audio frames when packets are lost, with fade-based fallback handling for non-Opus codecs
- updates i16 and f32 resamplers to use canonical 20ms chunks so cached resamplers preserve filter state and avoid variable-size cache thrashing
## 2026-04-10 - 1.18.0 - feat(readme)
expand documentation for voicemail, IVR, audio engine, and API capabilities
- Updates the feature overview to document voicemail, IVR menus, call recording, enhanced TTS, and the 48kHz float audio engine
- Refreshes the architecture section to describe the TypeScript control plane, Rust proxy-engine data plane, and JSON-over-stdio IPC
- Clarifies REST API and WebSocket coverage with voicemail endpoints, incoming call events, and refined endpoint descriptions
## 2026-04-10 - 1.17.2 - fix(proxy-engine)
use negotiated SDP payload types when wiring SIP legs and enable default nnnoiseless features for telephony denoising
- Select the negotiated codec payload type from SDP answers instead of always using the first offered codec
- Preserve the device leg's preferred payload type from its own INVITE SDP when attaching it to the mixer
- Enable default nnnoiseless features in codec-lib and proxy-engine dependencies
## 2026-04-10 - 1.17.1 - fix(proxy-engine,codec-lib,sip-proto,ts)
preserve negotiated media details and improve RTP audio handling across call legs
- Use native Opus float encode/decode to avoid unnecessary i16 quantization in the f32 audio path.
- Parse full RTP headers including extensions and sequence numbers, then sort inbound packets before decoding to keep codec state stable for out-of-order audio.
- Capture negotiated codec payload types from SDP offers and answers and include codec, RTP port, remote media, and metadata in leg_added events.
- Emit leg_state_changed and leg_removed events more consistently so the dashboard reflects leg lifecycle updates accurately.
## 2026-04-10 - 1.17.0 - feat(proxy-engine)
upgrade the internal audio bus to 48kHz f32 with per-leg denoising and improve SIP leg routing

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,11 +1,14 @@
{
"name": "siprouter",
"version": "1.17.0",
"version": "1.20.1",
"private": true,
"type": "module",
"scripts": {
"bundle": "node node_modules/.pnpm/esbuild@0.27.7/node_modules/esbuild/bin/esbuild ts_web/index.ts --bundle --format=esm --outfile=dist_ts_web/bundle.js --platform=browser --target=es2022 --minify",
"buildRust": "tsrust",
"build": "pnpm run buildRust && pnpm run bundle",
"build:docker": "tsdocker build --verbose",
"release:docker": "tsdocker push --verbose",
"start": "tsx ts/sipproxy.ts",
"restartBackground": "pnpm run buildRust && pnpm run bundle; test -f .server.pid && kill $(cat .server.pid) 2>/dev/null; sleep 1; rm -f sip_trace.log proxy.out && nohup tsx ts/sipproxy.ts > proxy.out 2>&1 & echo $! > .server.pid; sleep 2; cat proxy.out"
},
@@ -14,10 +17,12 @@
"@design.estate/dees-element": "^2.2.4",
"@push.rocks/smartrust": "^1.3.2",
"@push.rocks/smartstate": "^2.3.0",
"tsx": "^4.21.0",
"ws": "^8.20.0"
},
"devDependencies": {
"@git.zone/tsbundle": "^2.10.0",
"@git.zone/tsdocker": "^2.2.4",
"@git.zone/tsrust": "^1.3.2",
"@git.zone/tswatch": "^3.3.2",
"@types/ws": "^8.18.1"

650
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

300
readme.md
View File

@@ -1,6 +1,6 @@
# @serve.zone/siprouter
A production-grade **SIP B2BUA + WebRTC bridge** built with TypeScript and Rust. Routes calls between SIP providers, SIP hardware devices, and browser softphones — with real-time codec transcoding, ML noise suppression, neural TTS announcements, and a slick web dashboard.
A production-grade **SIP B2BUA + WebRTC bridge** built with TypeScript and Rust. Routes calls between SIP providers, SIP hardware devices, and browser softphones — with real-time codec transcoding, adaptive jitter buffering, ML noise suppression, neural TTS, voicemail, IVR menus, and a slick web dashboard.
## Issue Reporting and Security
@@ -12,14 +12,17 @@ For reporting bugs, issues, or security vulnerabilities, please visit [community
siprouter sits between your SIP trunk providers and your endpoints — hardware phones, ATAs, browser softphones — and handles **everything** in between:
- 📞 **SIP B2BUA** — Terminates and re-originates calls with full RFC 3261 dialog state management
- 🌐 **WebRTC Bridge** — Browser-based softphone with bidirectional audio to the SIP network
- 🎛️ **Multi-Provider Trunking** — Register with multiple SIP providers simultaneously (sipgate, easybell, o2, etc.)
- 🔊 **Rust Codec Engine** — Real-time Opus ↔ G.722 ↔ PCMU ↔ PCMA transcoding in native Rust
- 🤖 **ML Noise Suppression** — RNNoise denoiser with per-direction state (to SIP / to browser)
- 🗣️ **Neural TTS** — Kokoro-powered "connecting your call" announcements, pre-encoded for instant playback
- 🔀 **Hub Model Calls** — N-leg calls with dynamic add/remove, transfer, and RTP fan-out
- 🖥️ **Web Dashboard** — Real-time SPA with live call monitoring, browser phone, contact management, provider config
- 📞 **SIP B2BUA** — Terminates and re-originates calls with full RFC 3261 dialog state management, digest auth, and SDP negotiation
- 🌐 **WebRTC Bridge** — Browser-based softphone with bidirectional Opus audio to the SIP network
- 🎛️ **Multi-Provider Trunking** — Register with multiple SIP providers simultaneously (sipgate, easybell, etc.) with automatic failover
- 🎧 **48kHz f32 Audio Engine** — High-fidelity internal audio bus at 48kHz/32-bit float with native Opus float encode/decode, FFT-based resampling, and per-leg ML noise suppression
- 🔀 **N-Leg Mix-Minus Mixer** — Conference-grade mixing with dynamic leg add/remove, transfer, and per-source audio separation
- 🎯 **Adaptive Jitter Buffer** — Per-leg jitter buffering with sequence-based reordering, adaptive depth (60120ms), Opus PLC for lost packets, and hold/resume detection
- 📧 **Voicemail** — Configurable voicemail boxes with TTS greetings, recording, and web playback
- 🔢 **IVR Menus** — DTMF-navigable interactive voice response with nested menus, routing actions, and custom prompts
- 🗣️ **Neural TTS** — Kokoro-powered announcements and greetings with 25+ voice presets, backed by espeak-ng fallback
- 🎙️ **Call Recording** — Per-source separated WAV recording at 48kHz via tool legs
- 🖥️ **Web Dashboard** — Real-time SPA with 9 views: live calls, browser phone, routing, voicemail, IVR, contacts, providers, and streaming logs
---
@@ -35,32 +38,38 @@ siprouter sits between your SIP trunk providers and your endpoints — hardware
┌──────────────────────────────────────┐
│ siprouter │
│ │
┌──────────┐ ┌──────────────────┐
│ Call Hub │ │ Rust Transcoder │
│ │ N legs │──│ Opus/G.722/PCM │ │
│ │ fan-out │ │ + RNNoise │ │
└────┬─────┘ └──────────────────┘
┌────┴─────┐ ┌──────────────────┐
│ SIP Stack│ │ Kokoro TTS │
│ │ Dialog SM│ │ (ONNX Runtime) │ │
└────┬─────┘ └──────────────────┘
┌────┴──────────────────────────┐
│ │ Local Registrar + Provider │
│ │ Registration Engine
└───────────────────────────────┘
└──────────┬──────────────┬────────────┘
TypeScript Control Plane
┌────────────────────────────────┐
│ │ Config · WebRTC Signaling │ │
│ │ REST API · Web Dashboard │ │
│ Voicebox Manager · TTS Cache │
└────────────┬───────────────────┘
JSON-over-stdio IPC
┌────────────┴───────────────────┐
│ │ Rust proxy-engine (data plane) │ │
│ │
│ SIP Stack · Dialog SM · Auth
│ Call Manager · N-Leg Mixer │
│ │ 48kHz f32 Bus · Jitter Buffer │ │
│ │ Codec Engine · RTP Port Pool │ │
│ WebRTC Engine · Kokoro TTS │
│ │ Voicemail · IVR · Recording │ │
│ └────┬──────────────────┬────────┘ │
└───────┤──────────────────┤───────────┘
│ │
┌──────┴──────┐ ─────┴──────┐
┌──────┴──────┐ ┌──────┴──────┐
│ SIP Devices │ │ SIP Trunk │
│ (HT801, etc)│ │ Providers │
└─────────────┘ ────────────┘
│ (HT801 etc) │ Providers
└─────────────┘ └─────────────┘
```
### The Hub Model
### 🧠 Key Design Decisions
Every call is a **hub** with N legs. Each leg is either a `SipLeg` (hardware device or provider) or a `WebRtcLeg` (browser). RTP flows through the hub — each leg's received audio is forwarded to all other legs, with codec transcoding handled transparently by the Rust engine.
- **Hub Model** — Every call is a hub with N legs. Each leg is a `SipLeg` (device/provider) or `WebRtcLeg` (browser). Legs can be dynamically added, removed, or transferred without tearing down the call.
- **Rust Data Plane** — All SIP protocol handling, codec transcoding, mixing, and RTP I/O runs in native Rust for real-time performance. TypeScript handles config, signaling, REST API, and dashboard.
- **48kHz f32 Internal Bus** — Audio is processed at maximum quality internally. Encoding/decoding to wire format (G.722, PCMU, Opus) happens solely at the leg boundary.
- **Per-Session Codec Isolation** — Each call leg gets its own encoder/decoder/resampler/denoiser state — no cross-call corruption.
- **SDP Codec Negotiation** — Outbound encoding uses the codec actually negotiated in SDP answers, not just the first offered codec.
---
@@ -70,15 +79,16 @@ Every call is a **hub** with N legs. Each leg is either a `SipLeg` (hardware dev
- **Node.js** ≥ 20 with `tsx` globally available
- **pnpm** for package management
- **Rust** toolchain (for building the codec engine and TTS)
- **Rust** toolchain (for building the proxy engine)
- **espeak-ng** (optional, for TTS fallback)
### Install & Build
```bash
# Clone and install
# Clone and install dependencies
pnpm install
# Build the Rust binaries (opus-codec + tts-engine)
# Build the Rust proxy-engine binary
pnpm run buildRust
# Bundle the web frontend
@@ -87,57 +97,92 @@ pnpm run bundle
### Configuration
Create `.nogit/config.json` with your setup:
Create `.nogit/config.json`:
```jsonc
{
"proxy": {
"lanIp": "192.168.1.100", // Your server's LAN IP
"lanPort": 5070, // SIP signaling port
"rtpPortRange": [20000, 20200],// RTP relay port pool (even ports)
"webUiPort": 3060 // Dashboard port
"publicIpSeed": "stun.example.com", // STUN server for public IP discovery
"rtpPortRange": { "min": 20000, "max": 20200 }, // RTP port pool (even ports)
"webUiPort": 3060 // Dashboard + REST API port
},
"providers": [
{
"id": "my-trunk",
"name": "My SIP Provider",
"host": "sip.provider.com",
"port": 5060,
"displayName": "My SIP Provider",
"domain": "sip.provider.com",
"outboundProxy": { "address": "sip.provider.com", "port": 5060 },
"username": "user",
"password": "pass",
"codecs": ["G.722", "PCMA", "PCMU"],
"registerExpiry": 3600
"codecs": [9, 0, 8, 101], // G.722, PCMU, PCMA, telephone-event
"registerIntervalSec": 300
}
],
"devices": [
{
"id": "desk-phone",
"name": "Desk Phone",
"type": "sip"
"displayName": "Desk Phone",
"expectedAddress": "192.168.1.50",
"extension": "100"
}
],
"routing": {
"inbound": {
"default": { "target": "all-devices", "ringBrowser": true }
"routes": [
{
"id": "inbound-default",
"name": "Ring all devices",
"priority": 100,
"direction": "inbound",
"match": {},
"action": {
"targets": ["desk-phone"],
"ringBrowsers": true,
"voicemailBox": "main",
"noAnswerTimeout": 25
}
},
{
"id": "outbound-default",
"name": "Route via trunk",
"priority": 100,
"direction": "outbound",
"match": {},
"action": { "provider": "my-trunk" }
}
]
},
"voiceboxes": [
{
"id": "main",
"enabled": true,
"greetingText": "Please leave a message after the beep.",
"greetingVoice": "af_bella",
"noAnswerTimeoutSec": 25,
"maxRecordingSec": 120,
"maxMessages": 50
}
],
"contacts": [
{ "id": "1", "name": "Alice", "number": "+491234567890", "starred": true }
]
}
```
### TTS Setup (Optional)
For neural "connecting your call" announcements, download the Kokoro TTS model:
For neural announcements and voicemail greetings, download the Kokoro TTS model:
```bash
mkdir -p .nogit/tts
# Download the full-quality model (310MB) + voices (27MB)
curl -L -o .nogit/tts/kokoro-v1.0.onnx \
https://github.com/mzdk100/kokoro/releases/download/V1.0/kokoro-v1.0.onnx
curl -L -o .nogit/tts/voices.bin \
https://github.com/mzdk100/kokoro/releases/download/V1.0/voices.bin
```
If the model files aren't present, the announcement feature is simply disabled — everything else works fine.
Without the model files, TTS falls back to `espeak-ng`. Without either, announcements are skipped — everything else works fine.
### Run
@@ -145,7 +190,7 @@ If the model files aren't present, the announcement feature is simply disabled
pnpm start
```
The SIP proxy starts on the configured port and the web dashboard is available at `http://<your-ip>:3060`.
The SIP proxy starts on the configured port and the web dashboard is available at `https://<your-ip>:3060`.
### HTTPS (Optional)
@@ -157,68 +202,93 @@ Place `cert.pem` and `key.pem` in `.nogit/` for TLS on the dashboard.
```
siprouter/
├── ts/ # TypeScript source
├── ts/ # TypeScript control plane
│ ├── sipproxy.ts # Main entry — bootstraps everything
│ ├── config.ts # Config loader & validation
│ ├── registrar.ts # Local SIP registrar for devices
│ ├── providerstate.ts # Per-provider upstream registration engine
│ ├── proxybridge.ts # Rust proxy-engine IPC bridge (smartrust)
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
│ ├── webrtcbridge.ts # WebRTC signaling layer
│ ├── opusbridge.ts # Rust IPC bridge (smartrust)
│ ├── codec.ts # High-level RTP transcoding interface
│ ├── announcement.ts # Neural TTS announcement generator
── sip/ # Zero-dependency SIP protocol library
── message.ts # SIP message parser/builder/mutator
│ ├── dialog.ts # RFC 3261 dialog state machine
│ │ ├── helpers.ts # SDP builder, digest auth, codec registry
│ │ └── rewrite.ts # SIP URI + SDP body rewriting
│ └── call/ # Hub-model call management
│ ├── call-manager.ts # Central registry, factory, routing
│ ├── call.ts # Call hub — owns N legs, media fan-out
│ ├── sip-leg.ts # SIP device/provider connection
│ ├── webrtc-leg.ts # Browser WebRTC connection
│ └── rtp-port-pool.ts # UDP port allocation
│ ├── registrar.ts # Browser softphone registration
│ ├── announcement.ts # TTS announcement generator (espeak-ng / Kokoro)
│ ├── voicebox.ts # Voicemail box management
── call/
── prompt-cache.ts # Named audio prompt WAV management
├── ts_web/ # Web frontend (Lit-based SPA)
│ ├── elements/ # Web components (dashboard, phone, etc.)
│ ├── elements/ # Web components (9 dashboard views)
│ └── state/ # App state, WebRTC client, notifications
├── rust/ # Rust workspace
├── rust/ # Rust workspace (the data plane)
│ └── crates/
│ ├── opus-codec/ # Real-time audio transcoder (Opus/G.722/PCM)
── tts-engine/ # Kokoro neural TTS CLI
│ ├── codec-lib/ # Audio codec library (Opus/G.722/PCMU/PCMA)
── sip-proto/ # Zero-dependency SIP protocol library
│ └── proxy-engine/ # Main binary — SIP engine + mixer + RTP
├── html/ # Static HTML shell
├── .nogit/ # Secrets, config, models (gitignored)
└── dist_rust/ # Compiled Rust binaries (gitignored)
├── .nogit/ # Secrets, config, TTS models (gitignored)
└── dist_rust/ # Compiled Rust binary (gitignored)
```
---
## 🎧 Codec Engine (Rust)
## 🎧 Audio Engine (Rust)
The `opus-codec` binary handles all real-time audio processing via a JSON-over-stdio IPC protocol:
The `proxy-engine` binary handles all real-time audio processing with a **48kHz f32 internal bus** — encoding and decoding happens only at leg boundaries.
| Codec | Payload Type | Sample Rate | Use Case |
|-------|-------------|-------------|----------|
| **Opus** | 111 | 48 kHz | WebRTC browsers |
| **G.722** | 9 | 16 kHz | HD SIP devices |
### Supported Codecs
| Codec | PT | Native Rate | Use Case |
|-------|:--:|:-----------:|----------|
| **Opus** | 111 | 48 kHz | WebRTC browsers (native float encode/decode — zero i16 quantization) |
| **G.722** | 9 | 16 kHz | HD SIP devices & providers |
| **PCMU** (G.711 µ-law) | 0 | 8 kHz | Legacy SIP |
| **PCMA** (G.711 A-law) | 8 | 8 kHz | Legacy SIP |
**Features:**
- Per-call isolated codec sessions (no cross-call state corruption)
- FFT-based sample rate conversion via `rubato`
- **RNNoise ML noise suppression** with per-direction state — denoises audio flowing to SIP separately from audio flowing to the browser
- Raw PCM encoding for TTS frame processing
### Audio Pipeline
```
Inbound: Wire RTP → Jitter Buffer → Decode → Resample to 48kHz → Denoise (RNNoise) → Mix Bus
Outbound: Mix Bus → Mix-Minus → Resample to codec rate → Encode → Wire RTP
```
- **Adaptive jitter buffer** — per-leg `BTreeMap`-based buffer keyed by RTP sequence number. Delivers exactly one frame per 20ms mixer tick in sequence order. Adaptive target depth starts at 3 frames (60ms) and adjusts between 26 frames based on observed network jitter. Handles hold/resume by detecting large forward sequence jumps and resetting cleanly.
- **Packet loss concealment (PLC)** — on missing packets, Opus legs invoke the decoder's built-in PLC (`decode(None)`) to synthesize a smooth fill frame. Non-Opus legs (G.722, PCMU) apply exponential fade (0.85×) toward silence to avoid hard discontinuities.
- **FFT-based resampling** via `rubato` — high-quality sinc interpolation with canonical 20ms chunk sizes to ensure consistent resampler state across frames, preventing filter discontinuities
- **ML noise suppression** via `nnnoiseless` (RNNoise) — per-leg inbound denoising with SIMD acceleration (AVX/SSE). Skipped for WebRTC legs (browsers already denoise via getUserMedia)
- **Mix-minus mixing** — each participant hears everyone except themselves, accumulated in f64 precision
- **RFC 3550 compliant header parsing** — properly handles CSRC lists and header extensions
---
## 🗣️ Neural TTS (Rust)
## 🗣️ Neural TTS
The `tts-engine` binary uses [Kokoro TTS](https://github.com/mzdk100/kokoro) (82M parameter neural model) to synthesize announcements at startup:
Announcements and voicemail greetings are synthesized using [Kokoro TTS](https://github.com/mzdk100/kokoro) — an 82M parameter neural model running via ONNX Runtime directly in the Rust process:
- **24 kHz, 16-bit mono** output
- **25+ voice presets** — American/British, male/female (e.g., `af_bella`, `am_adam`, `bf_emma`, `bm_george`)
- **~800ms** synthesis time for a 3-second announcement
- Pre-encoded to G.722 + Opus for zero-latency RTP playback during call setup
- **~800ms** synthesis time for a 3-second phrase
- Lazy-loaded on first use — no startup cost if TTS is unused
- Falls back to `espeak-ng` if the ONNX model is not available
---
## 📧 Voicemail
- Configurable voicemail boxes with custom TTS greetings (text + voice) or uploaded WAV
- Automatic routing on no-answer timeout (configurable, default 25s)
- Recording with configurable max duration (default 120s) and message count limit (default 50)
- Unheard message tracking for MWI (message waiting indication)
- Web dashboard playback and management
- WAV storage in `.nogit/voicemail/`
---
## 🔢 IVR (Interactive Voice Response)
- DTMF-navigable menus with configurable entries
- Actions: route to extension, route to voicemail, transfer, submenu, hangup, repeat prompt
- Custom TTS prompts per menu
- Nested menu support
---
@@ -228,25 +298,34 @@ The `tts-engine` binary uses [Kokoro TTS](https://github.com/mzdk100/kokoro) (82
| View | Description |
|------|-------------|
| **Overview** | Stats tiles — uptime, providers, devices, active calls |
| **Calls** | Active calls with leg details, codec info, packet counters. Add/remove legs, transfer, hangup |
| **Phone** | Browser softphone — mic/speaker selection, audio meters, dial pad, incoming call popup |
| **Contacts** | Contact management with click-to-call |
| **Providers** | SIP trunk config with registration status |
| **Log** | Live streaming log viewer |
| 📊 **Overview** | Stats tiles — uptime, providers, devices, active calls |
| 📞 **Calls** | Active calls with leg details, codec info, add/remove legs, transfer, hangup |
| ☎️ **Phone** | Browser softphone — mic/speaker selection, audio meters, dial pad, incoming call popup |
| 🔀 **Routes** | Routing rule management — match/action model with priority |
| 📧 **Voicemail** | Voicemail box management + message playback |
| 🔢 **IVR** | IVR menu builder — DTMF entries, TTS prompts, nested menus |
| 👤 **Contacts** | Contact management with click-to-call |
| 🔌 **Providers** | SIP trunk configuration and registration status |
| 📋 **Log** | Live streaming log viewer |
### REST API
| Endpoint | Method | Description |
|----------|--------|-------------|
| `/api/status` | GET | Full system status (providers, devices, calls) |
| `/api/status` | GET | Full system status (providers, devices, calls, history) |
| `/api/call` | POST | Originate a call |
| `/api/hangup` | POST | Hang up a call |
| `/api/call/:id/addleg` | POST | Add a leg to an active call |
| `/api/call/:id/addexternal` | POST | Add an external participant |
| `/api/call/:id/addleg` | POST | Add a device leg to an active call |
| `/api/call/:id/addexternal` | POST | Add an external participant via provider |
| `/api/call/:id/removeleg` | POST | Remove a leg from a call |
| `/api/transfer` | POST | Transfer a call |
| `/api/config` | GET/POST | Read or update configuration (hot-reload) |
| `/api/config` | GET | Read current configuration |
| `/api/config` | POST | Update configuration (hot-reload) |
| `/api/voicemail/:box` | GET | List voicemail messages |
| `/api/voicemail/:box/unheard` | GET | Get unheard message count |
| `/api/voicemail/:box/:id/audio` | GET | Stream voicemail audio |
| `/api/voicemail/:box/:id/heard` | POST | Mark a voicemail message as heard |
| `/api/voicemail/:box/:id` | DELETE | Delete a voicemail message |
### WebSocket Events
@@ -255,6 +334,18 @@ Connect to `/ws` for real-time push:
```jsonc
{ "type": "status", "data": { ... } } // Full status snapshot (1s interval)
{ "type": "log", "data": { "message": "..." } } // Log lines in real-time
{ "type": "call-update", "data": { ... } } // Call state change notification
{ "type": "webrtc-answer", "data": { ... } } // WebRTC SDP answer for browser calls
{ "type": "webrtc-error", "data": { ... } } // WebRTC signaling error
```
Browser → server signaling:
```jsonc
{ "type": "webrtc-offer", "data": { ... } } // Browser sends SDP offer
{ "type": "webrtc-accept", "data": { ... } } // Browser accepts incoming call
{ "type": "webrtc-ice", "data": { ... } } // ICE candidate exchange
{ "type": "webrtc-hangup", "data": { ... } } // Browser hangs up
```
---
@@ -264,7 +355,7 @@ Connect to `/ws` for real-time push:
| Port | Protocol | Purpose |
|------|----------|---------|
| 5070 (configurable) | UDP | SIP signaling |
| 2000020200 (configurable) | UDP | RTP relay (even ports, per-call allocation) |
| 2000020200 (configurable) | UDP | RTP media (even ports, per-call allocation) |
| 3060 (configurable) | TCP | Web dashboard + WebSocket + REST API |
---
@@ -275,28 +366,21 @@ Connect to `/ws` for real-time push:
# Start in dev mode
pnpm start
# Build Rust crates
# Build Rust proxy-engine
pnpm run buildRust
# Bundle web frontend
pnpm run bundle
# Restart background server (build + bundle + restart)
# Build + bundle + restart background server
pnpm run restartBackground
```
### Key Design Decisions
- **Hub Model** — Calls are N-leg hubs, not point-to-point. This enables multi-party, dynamic leg manipulation, and transfer without tearing down the call.
- **Zero-dependency SIP library** — `ts/sip/` is a pure data-level SIP stack (parse/build/mutate/serialize). No transport or timer logic — those live in the application layer.
- **Rust for the hot path** — Codec transcoding and noise suppression run in native Rust for real-time performance. TypeScript handles signaling and orchestration.
- **Per-session codec isolation** — Each call gets its own Opus/G.722 encoder/decoder state in the Rust process, preventing stateful codec prediction from leaking between concurrent calls.
---
## License and Legal Information
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [license](./license) file.
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [LICENSE](./LICENSE) file.
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.

226
rust/Cargo.lock generated
View File

@@ -237,6 +237,17 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "audiopus"
version = "0.3.0-rc.0"
@@ -487,6 +498,31 @@ dependencies = [
"inout",
]
[[package]]
name = "clap"
version = "3.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
dependencies = [
"atty",
"bitflags 1.3.2",
"clap_lex",
"indexmap 1.9.3",
"once_cell",
"strsim",
"termcolor",
"textwrap",
]
[[package]]
name = "clap_lex"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
dependencies = [
"os_str_bytes",
]
[[package]]
name = "cmake"
version = "0.1.58"
@@ -700,6 +736,125 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
[[package]]
name = "dasp"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7381b67da416b639690ac77c73b86a7b5e64a29e31d1f75fb3b1102301ef355a"
dependencies = [
"dasp_envelope",
"dasp_frame",
"dasp_interpolate",
"dasp_peak",
"dasp_ring_buffer",
"dasp_rms",
"dasp_sample",
"dasp_signal",
"dasp_slice",
"dasp_window",
]
[[package]]
name = "dasp_envelope"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ec617ce7016f101a87fe85ed44180839744265fae73bb4aa43e7ece1b7668b6"
dependencies = [
"dasp_frame",
"dasp_peak",
"dasp_ring_buffer",
"dasp_rms",
"dasp_sample",
]
[[package]]
name = "dasp_frame"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2a3937f5fe2135702897535c8d4a5553f8b116f76c1529088797f2eee7c5cd6"
dependencies = [
"dasp_sample",
]
[[package]]
name = "dasp_interpolate"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fc975a6563bb7ca7ec0a6c784ead49983a21c24835b0bc96eea11ee407c7486"
dependencies = [
"dasp_frame",
"dasp_ring_buffer",
"dasp_sample",
]
[[package]]
name = "dasp_peak"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cf88559d79c21f3d8523d91250c397f9a15b5fc72fbb3f87fdb0a37b79915bf"
dependencies = [
"dasp_frame",
"dasp_sample",
]
[[package]]
name = "dasp_ring_buffer"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07d79e19b89618a543c4adec9c5a347fe378a19041699b3278e616e387511ea1"
[[package]]
name = "dasp_rms"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6c5dcb30b7e5014486e2822537ea2beae50b19722ffe2ed7549ab03774575aa"
dependencies = [
"dasp_frame",
"dasp_ring_buffer",
"dasp_sample",
]
[[package]]
name = "dasp_sample"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f"
[[package]]
name = "dasp_signal"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa1ab7d01689c6ed4eae3d38fe1cea08cba761573fbd2d592528d55b421077e7"
dependencies = [
"dasp_envelope",
"dasp_frame",
"dasp_interpolate",
"dasp_peak",
"dasp_ring_buffer",
"dasp_rms",
"dasp_sample",
"dasp_window",
]
[[package]]
name = "dasp_slice"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e1c7335d58e7baedafa516cb361360ff38d6f4d3f9d9d5ee2a2fc8e27178fa1"
dependencies = [
"dasp_frame",
"dasp_sample",
]
[[package]]
name = "dasp_window"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99ded7b88821d2ce4e8b842c9f1c86ac911891ab89443cc1de750cae764c5076"
dependencies = [
"dasp_sample",
]
[[package]]
name = "data-encoding"
version = "2.10.0"
@@ -1214,6 +1369,12 @@ dependencies = [
"subtle",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.15.5"
@@ -1246,6 +1407,15 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "hex"
version = "0.4.3"
@@ -1446,6 +1616,16 @@ dependencies = [
"zstd",
]
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
]
[[package]]
name = "indexmap"
version = "2.14.0"
@@ -1739,7 +1919,13 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "805d5964d1e7a0006a7fdced7dae75084d66d18b35f1dfe81bd76929b1f8da0c"
dependencies = [
"anyhow",
"clap",
"dasp",
"dasp_interpolate",
"dasp_ring_buffer",
"easyfft",
"hound",
"once_cell",
]
@@ -1905,6 +2091,12 @@ dependencies = [
"ureq",
]
[[package]]
name = "os_str_bytes"
version = "6.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
[[package]]
name = "p256"
version = "0.11.1"
@@ -2883,6 +3075,21 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "termcolor"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057"
[[package]]
name = "thiserror"
version = "1.0.69"
@@ -3244,7 +3451,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
dependencies = [
"anyhow",
"indexmap",
"indexmap 2.14.0",
"wasm-encoder",
"wasmparser",
]
@@ -3257,7 +3464,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
dependencies = [
"bitflags 2.11.0",
"hashbrown 0.15.5",
"indexmap",
"indexmap 2.14.0",
"semver",
]
@@ -3515,6 +3722,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
@@ -3564,7 +3780,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
dependencies = [
"anyhow",
"heck",
"indexmap",
"indexmap 2.14.0",
"prettyplease",
"syn 2.0.117",
"wasm-metadata",
@@ -3595,7 +3811,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
dependencies = [
"anyhow",
"bitflags 2.11.0",
"indexmap",
"indexmap 2.14.0",
"log",
"serde",
"serde_derive",
@@ -3614,7 +3830,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
dependencies = [
"anyhow",
"id-arena",
"indexmap",
"indexmap 2.14.0",
"log",
"semver",
"serde",

View File

@@ -7,4 +7,4 @@ edition = "2021"
audiopus = "0.3.0-rc.0"
ezk-g722 = "0.1"
rubato = "0.14"
nnnoiseless = { version = "0.5", default-features = false }
nnnoiseless = "0.5"

View File

@@ -142,8 +142,10 @@ impl TranscodeState {
}
/// High-quality sample rate conversion using rubato FFT resampler.
/// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
/// maintaining proper inter-frame state for continuous audio streams.
///
/// To maintain continuous filter state, the resampler always processes at a
/// canonical chunk size (20ms at the source rate). This prevents cache
/// thrashing from variable input sizes and preserves inter-frame filter state.
pub fn resample(
&mut self,
pcm: &[i16],
@@ -154,28 +156,61 @@ impl TranscodeState {
return Ok(pcm.to_vec());
}
let chunk = pcm.len();
let key = (from_rate, to_rate, chunk);
let canonical_chunk = (from_rate as usize) / 50; // 20ms
let key = (from_rate, to_rate, canonical_chunk);
if !self.resamplers.contains_key(&key) {
let r =
FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
let r = FftFixedIn::<f64>::new(
from_rate as usize,
to_rate as usize,
canonical_chunk,
1,
1,
)
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
self.resamplers.insert(key, r);
}
let resampler = self.resamplers.get_mut(&key).unwrap();
let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
let input = vec![float_in];
let mut output = Vec::with_capacity(
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
);
let mut offset = 0;
while offset < pcm.len() {
let remaining = pcm.len() - offset;
let copy_len = remaining.min(canonical_chunk);
let mut chunk = vec![0.0f64; canonical_chunk];
for i in 0..copy_len {
chunk[i] = pcm[offset + i] as f64 / 32768.0;
}
let input = vec![chunk];
let result = resampler
.process(&input, None)
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
Ok(result[0]
if remaining < canonical_chunk {
let expected =
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
let take = expected.min(result[0].len());
output.extend(
result[0][..take]
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
.collect())
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
);
} else {
output.extend(
result[0]
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
);
}
offset += canonical_chunk;
}
Ok(output)
}
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
@@ -301,23 +336,81 @@ impl TranscodeState {
/// Decode an encoded audio payload to f32 PCM samples in [-1.0, 1.0].
/// Returns (samples, sample_rate).
///
/// For Opus, uses native float decode (no i16 quantization).
/// For G.722/G.711, decodes to i16 then converts (codec is natively i16).
pub fn decode_to_f32(&mut self, data: &[u8], pt: u8) -> Result<(Vec<f32>, u32), String> {
match pt {
PT_OPUS => {
let mut pcm = vec![0.0f32; 5760]; // up to 120ms at 48kHz
let packet =
OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
let out =
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
let n: usize = self
.opus_dec
.decode_float(Some(packet), out, false)
.map_err(|e| format!("opus decode_float: {e}"))?
.into();
pcm.truncate(n);
Ok((pcm, 48000))
}
_ => {
// G.722, PCMU, PCMA: natively i16 codecs — decode then convert.
let (pcm_i16, rate) = self.decode_to_pcm(data, pt)?;
let pcm_f32 = pcm_i16.iter().map(|&s| s as f32 / 32768.0).collect();
Ok((pcm_f32, rate))
}
}
}
/// Opus packet loss concealment — synthesize one frame to fill a gap.
/// Returns f32 PCM at 48kHz. `frame_size` should be 960 for 20ms.
pub fn opus_plc(&mut self, frame_size: usize) -> Result<Vec<f32>, String> {
let mut pcm = vec![0.0f32; frame_size];
let out = MutSignals::try_from(&mut pcm[..])
.map_err(|e| format!("opus plc signals: {e}"))?;
let n: usize = self
.opus_dec
.decode_float(None::<OpusPacket<'_>>, out, false)
.map_err(|e| format!("opus plc: {e}"))?
.into();
pcm.truncate(n);
Ok(pcm)
}
/// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
///
/// For Opus, uses native float encode (no i16 quantization).
/// For G.722/G.711, converts to i16 then encodes (codec is natively i16).
pub fn encode_from_f32(&mut self, pcm: &[f32], pt: u8) -> Result<Vec<u8>, String> {
match pt {
PT_OPUS => {
let mut buf = vec![0u8; 4000];
let n: usize = self
.opus_enc
.encode_float(pcm, &mut buf)
.map_err(|e| format!("opus encode_float: {e}"))?
.into();
buf.truncate(n);
Ok(buf)
}
_ => {
// G.722, PCMU, PCMA: natively i16 codecs.
let pcm_i16: Vec<i16> = pcm
.iter()
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
.collect();
self.encode_from_pcm(&pcm_i16, pt)
}
}
}
/// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
/// Uses a separate cache from the i16 resampler.
///
/// To maintain continuous filter state, the resampler always processes at a
/// canonical chunk size (20ms at the source rate). This prevents cache
/// thrashing from variable input sizes and preserves inter-frame filter state.
pub fn resample_f32(
&mut self,
pcm: &[f32],
@@ -328,23 +421,50 @@ impl TranscodeState {
return Ok(pcm.to_vec());
}
let chunk = pcm.len();
let key = (from_rate, to_rate, chunk);
let canonical_chunk = (from_rate as usize) / 50; // 20ms
let key = (from_rate, to_rate, canonical_chunk);
if !self.resamplers_f32.contains_key(&key) {
let r =
FftFixedIn::<f32>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
let r = FftFixedIn::<f32>::new(
from_rate as usize,
to_rate as usize,
canonical_chunk,
1,
1,
)
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
self.resamplers_f32.insert(key, r);
}
let resampler = self.resamplers_f32.get_mut(&key).unwrap();
let input = vec![pcm.to_vec()];
let mut output = Vec::with_capacity(
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
);
let mut offset = 0;
while offset < pcm.len() {
let remaining = pcm.len() - offset;
let mut chunk = vec![0.0f32; canonical_chunk];
let copy_len = remaining.min(canonical_chunk);
chunk[..copy_len].copy_from_slice(&pcm[offset..offset + copy_len]);
let input = vec![chunk];
let result = resampler
.process(&input, None)
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
Ok(result[0].clone())
if remaining < canonical_chunk {
let expected =
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
output.extend_from_slice(&result[0][..expected.min(result[0].len())]);
} else {
output.extend_from_slice(&result[0]);
}
offset += canonical_chunk;
}
Ok(output)
}
/// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.

View File

@@ -10,7 +10,7 @@ path = "src/main.rs"
[dependencies]
codec-lib = { path = "../codec-lib" }
sip-proto = { path = "../sip-proto" }
nnnoiseless = { version = "0.5", default-features = false }
nnnoiseless = "0.5"
tokio = { version = "1", features = ["full"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"

View File

@@ -20,6 +20,35 @@ use std::net::SocketAddr;
use std::sync::Arc;
use tokio::net::UdpSocket;
/// Emit a `leg_added` event with full leg information.
/// Free function (not a method) to avoid `&self` borrow conflicts when `self.calls` is borrowed.
fn emit_leg_added_event(tx: &OutTx, call_id: &str, leg: &LegInfo) {
let metadata: serde_json::Value = if leg.metadata.is_empty() {
serde_json::json!({})
} else {
serde_json::Value::Object(
leg.metadata
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect(),
)
};
emit_event(
tx,
"leg_added",
serde_json::json!({
"call_id": call_id,
"leg_id": leg.id,
"kind": leg.kind.as_str(),
"state": leg.state.as_str(),
"codec": sip_proto::helpers::codec_name(leg.codec_pt),
"rtpPort": leg.rtp_port,
"remoteMedia": leg.remote_media.map(|a| format!("{}:{}", a.ip(), a.port())),
"metadata": metadata,
}),
);
}
pub struct CallManager {
/// All active calls, keyed by internal call ID.
pub calls: HashMap<String, Call>,
@@ -167,7 +196,17 @@ impl CallManager {
};
// Mutable borrow on call/leg is now released.
let sip_pt = codecs.first().copied().unwrap_or(9);
let mut sip_pt = codecs.first().copied().unwrap_or(9);
// If the message has SDP (e.g., 200 OK answer), use the negotiated codec
// instead of the offered one.
if msg.has_sdp_body() {
if let Some(ep) = parse_sdp_endpoint(&msg.body) {
if let Some(pt) = ep.codec_pt {
sip_pt = pt;
}
}
}
match action {
SipLegAction::None => {}
@@ -265,14 +304,27 @@ impl CallManager {
dev_leg.state = LegState::Connected;
}
}
emit_event(
&self.out_tx,
"leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": dev_leg_id, "state": "connected" }),
);
// Wire device leg to mixer.
// Use the device's preferred codec from its INVITE SDP,
// not the provider's negotiated codec.
let dev_pt = device_invite
.has_sdp_body()
.then(|| parse_sdp_endpoint(&device_invite.body))
.flatten()
.and_then(|ep| ep.codec_pt)
.unwrap_or(sip_pt);
if let Some(dev_remote_addr) = dev_remote {
let dev_channels = create_leg_channels();
spawn_sip_inbound(dev_rtp_socket.clone(), dev_channels.inbound_tx);
spawn_sip_outbound(dev_rtp_socket, dev_remote_addr, dev_channels.outbound_rx);
if let Some(call) = self.calls.get(call_id) {
call.add_leg_to_mixer(&dev_leg_id, sip_pt, dev_channels.inbound_rx, dev_channels.outbound_tx)
call.add_leg_to_mixer(&dev_leg_id, dev_pt, dev_channels.inbound_rx, dev_channels.outbound_tx)
.await;
}
}
@@ -324,6 +376,8 @@ impl CallManager {
leg.state = LegState::Terminated;
}
}
emit_event(&self.out_tx, "leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": leg_id, "state": "terminated" }));
emit_event(&self.out_tx, "call_ended",
serde_json::json!({ "call_id": call_id, "reason": reason, "duration": duration }));
self.terminate_call(call_id).await;
@@ -529,21 +583,30 @@ impl CallManager {
if let Some(leg) = call.legs.get_mut(this_leg_id) {
leg.state = LegState::Ringing;
}
emit_event(&self.out_tx, "leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": this_leg_id, "state": "ringing" }));
} else if code >= 200 && code < 300 {
let mut needs_wiring = false;
if let Some(leg) = call.legs.get_mut(this_leg_id) {
leg.state = LegState::Connected;
// Learn remote media from SDP.
// Learn remote media and negotiated codec from SDP answer.
if msg.has_sdp_body() {
if let Some(ep) = parse_sdp_endpoint(&msg.body) {
if let Ok(addr) = format!("{}:{}", ep.address, ep.port).parse() {
leg.remote_media = Some(addr);
}
// Use the codec from the SDP answer (what the remote actually selected).
if let Some(pt) = ep.codec_pt {
leg.codec_pt = pt;
}
}
}
needs_wiring = true;
}
emit_event(&self.out_tx, "leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": this_leg_id, "state": "connected" }));
if call.state != CallState::Connected {
call.state = CallState::Connected;
emit_event(&self.out_tx, "call_answered", serde_json::json!({ "call_id": call_id }));
@@ -689,15 +752,19 @@ impl CallManager {
call.callee_number = Some(called_number);
call.state = CallState::Ringing;
let codec_pt = provider_config.codecs.first().copied().unwrap_or(9);
let mut codec_pt = provider_config.codecs.first().copied().unwrap_or(9);
// Provider leg — extract media from SDP.
// Provider leg — extract media and negotiated codec from SDP.
let mut provider_media: Option<SocketAddr> = None;
if invite.has_sdp_body() {
if let Some(ep) = parse_sdp_endpoint(&invite.body) {
if let Ok(addr) = format!("{}:{}", ep.address, ep.port).parse() {
provider_media = Some(addr);
}
// Use the codec from the provider's SDP offer (what they actually want to use).
if let Some(pt) = ep.codec_pt {
codec_pt = pt;
}
}
}
@@ -767,6 +834,16 @@ impl CallManager {
// Store the call.
self.calls.insert(call_id.clone(), call);
// Emit leg_added for both initial legs.
if let Some(call) = self.calls.get(&call_id) {
if let Some(leg) = call.legs.get(&provider_leg_id) {
emit_leg_added_event(&self.out_tx, &call_id, leg);
}
if let Some(leg) = call.legs.get(&device_leg_id) {
emit_leg_added_event(&self.out_tx, &call_id, leg);
}
}
Some(call_id)
}
@@ -854,6 +931,14 @@ impl CallManager {
.insert(sip_call_id, (call_id.clone(), leg_id));
self.calls.insert(call_id.clone(), call);
// Emit leg_added for the provider leg.
if let Some(call) = self.calls.get(&call_id) {
for leg in call.legs.values() {
emit_leg_added_event(&self.out_tx, &call_id, leg);
}
}
Some(call_id)
}
@@ -1002,6 +1087,14 @@ impl CallManager {
.insert(provider_sip_call_id, (call_id.clone(), provider_leg_id));
self.calls.insert(call_id.clone(), call);
// Emit leg_added for both initial legs (device + provider).
if let Some(call) = self.calls.get(&call_id) {
for leg in call.legs.values() {
emit_leg_added_event(&self.out_tx, &call_id, leg);
}
}
Some(call_id)
}
@@ -1069,17 +1162,11 @@ impl CallManager {
let call = self.calls.get_mut(call_id).unwrap();
call.legs.insert(leg_id.clone(), leg_info);
emit_event(
&self.out_tx,
"leg_added",
serde_json::json!({
"call_id": call_id,
"leg_id": leg_id,
"kind": "sip-provider",
"state": "inviting",
"number": number,
}),
);
if let Some(call) = self.calls.get(call_id) {
if let Some(leg) = call.legs.get(&leg_id) {
emit_leg_added_event(&self.out_tx, call_id, leg);
}
}
Some(leg_id)
}
@@ -1145,17 +1232,11 @@ impl CallManager {
let call = self.calls.get_mut(call_id).unwrap();
call.legs.insert(leg_id.clone(), leg_info);
emit_event(
&self.out_tx,
"leg_added",
serde_json::json!({
"call_id": call_id,
"leg_id": leg_id,
"kind": "sip-device",
"state": "inviting",
"device_id": device_id,
}),
);
if let Some(call) = self.calls.get(call_id) {
if let Some(leg) = call.legs.get(&leg_id) {
emit_leg_added_event(&self.out_tx, call_id, leg);
}
}
Some(leg_id)
}
@@ -1242,6 +1323,13 @@ impl CallManager {
None => return false,
};
// Emit leg_removed for source call.
emit_event(
&self.out_tx,
"leg_removed",
serde_json::json!({ "call_id": source_call_id, "leg_id": leg_id }),
);
// Update SIP index to point to the target call.
if let Some(sip_cid) = &leg_info.sip_call_id {
self.sip_index.insert(
@@ -1274,15 +1362,12 @@ impl CallManager {
let target_call = self.calls.get_mut(target_call_id).unwrap();
target_call.legs.insert(leg_id.to_string(), leg_info);
emit_event(
&self.out_tx,
"leg_transferred",
serde_json::json!({
"leg_id": leg_id,
"source_call_id": source_call_id,
"target_call_id": target_call_id,
}),
);
// Emit leg_added for target call.
if let Some(target) = self.calls.get(target_call_id) {
if let Some(leg) = target.legs.get(leg_id) {
emit_leg_added_event(&self.out_tx, target_call_id, leg);
}
}
// Check if source call has too few legs remaining.
let source_call = self.calls.get(source_call_id).unwrap();
@@ -1385,6 +1470,11 @@ impl CallManager {
}
}
leg.state = LegState::Terminated;
emit_event(
&self.out_tx,
"leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": leg.id, "state": "terminated" }),
);
}
emit_event(
@@ -1503,6 +1593,13 @@ impl CallManager {
);
self.calls.insert(call_id.to_string(), call);
// Emit leg_added for the provider leg.
if let Some(call) = self.calls.get(call_id) {
for leg in call.legs.values() {
emit_leg_added_event(&self.out_tx, call_id, leg);
}
}
// Build recording path.
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)

View File

@@ -0,0 +1,188 @@
//! Per-leg adaptive jitter buffer for the audio mixer.
//!
//! Sits between inbound RTP packet reception and the mixer's decode step.
//! Reorders packets by sequence number and delivers exactly one frame per
//! 20ms mixer tick, smoothing out network jitter. When a packet is missing,
//! the mixer can invoke codec PLC to conceal the gap.
use crate::mixer::RtpPacket;
use std::collections::BTreeMap;
/// Per-leg jitter buffer. Collects RTP packets keyed by sequence number,
/// delivers one frame per 20ms tick in sequence order.
///
/// Adaptive target depth: starts at 3 frames (60ms), adjusts between
/// 26 frames based on observed jitter.
pub struct JitterBuffer {
/// Packets waiting for playout, keyed by seq number.
buffer: BTreeMap<u16, RtpPacket>,
/// Next expected sequence number for playout.
next_seq: Option<u16>,
/// Target buffer depth in frames (adaptive).
target_depth: u32,
/// Current fill level high-water mark (for adaptation).
max_fill_seen: u32,
/// Ticks since last adaptation adjustment.
adapt_counter: u32,
/// Consecutive ticks where buffer was empty (for ramp-up).
empty_streak: u32,
/// Consecutive ticks where buffer had excess (for ramp-down).
excess_streak: u32,
/// Whether we've started playout (initial fill complete).
playing: bool,
/// Number of frames consumed since start (for stats).
frames_consumed: u64,
/// Number of frames lost (gap in sequence).
frames_lost: u64,
}
/// What the mixer gets back each tick.
pub enum JitterResult {
/// A packet is available for decoding.
Packet(RtpPacket),
/// Packet was expected but missing — invoke PLC.
Missing,
/// Buffer is in initial fill phase — output silence.
Filling,
}
impl JitterBuffer {
pub fn new() -> Self {
Self {
buffer: BTreeMap::new(),
next_seq: None,
target_depth: 3, // 60ms initial target
max_fill_seen: 0,
adapt_counter: 0,
empty_streak: 0,
excess_streak: 0,
playing: false,
frames_consumed: 0,
frames_lost: 0,
}
}
/// Push a received RTP packet into the buffer.
pub fn push(&mut self, pkt: RtpPacket) {
// Ignore duplicates.
if self.buffer.contains_key(&pkt.seq) {
return;
}
// Detect large forward seq jump (hold/resume, SSRC change).
if let Some(next) = self.next_seq {
let jump = pkt.seq.wrapping_sub(next);
if jump > 1000 && jump < 0x8000 {
// Massive forward jump — reset buffer.
self.reset();
self.next_seq = Some(pkt.seq);
}
}
if self.next_seq.is_none() {
self.next_seq = Some(pkt.seq);
}
self.buffer.insert(pkt.seq, pkt);
}
/// Consume one frame for the current 20ms tick.
/// Called once per mixer tick per leg.
pub fn consume(&mut self) -> JitterResult {
// Track fill level for adaptation.
let fill = self.buffer.len() as u32;
if fill > self.max_fill_seen {
self.max_fill_seen = fill;
}
// Initial fill phase: wait until we have target_depth packets.
if !self.playing {
if fill >= self.target_depth {
self.playing = true;
} else {
return JitterResult::Filling;
}
}
let seq = match self.next_seq {
Some(s) => s,
None => return JitterResult::Filling,
};
// Advance next_seq (wrapping u16).
self.next_seq = Some(seq.wrapping_add(1));
// Try to pull the expected sequence number.
if let Some(pkt) = self.buffer.remove(&seq) {
self.frames_consumed += 1;
self.empty_streak = 0;
// Adaptive: if buffer is consistently deep, we can tighten.
if fill > self.target_depth + 2 {
self.excess_streak += 1;
} else {
self.excess_streak = 0;
}
JitterResult::Packet(pkt)
} else {
// Packet missing — PLC needed.
self.frames_lost += 1;
self.empty_streak += 1;
self.excess_streak = 0;
JitterResult::Missing
}
}
/// Run adaptation logic. Call every tick; internally gates to ~1s intervals.
pub fn adapt(&mut self) {
self.adapt_counter += 1;
if self.adapt_counter < 50 {
return;
}
self.adapt_counter = 0;
// If we had many empty ticks, increase depth.
if self.empty_streak > 3 && self.target_depth < 6 {
self.target_depth += 1;
}
// If buffer consistently overfull, decrease depth.
else if self.excess_streak > 25 && self.target_depth > 2 {
self.target_depth -= 1;
}
self.max_fill_seen = 0;
}
/// Discard packets that are too old (seq far behind next_seq).
/// Prevents unbounded memory growth from reordered/late packets.
pub fn prune_stale(&mut self) {
if let Some(next) = self.next_seq {
// Remove anything more than 100 frames behind playout point.
// Use wrapping arithmetic: if (next - seq) > 100, it's stale.
let stale: Vec<u16> = self
.buffer
.keys()
.filter(|&&seq| {
let age = next.wrapping_sub(seq);
age > 100 && age < 0x8000 // < 0x8000 means it's actually behind, not ahead
})
.copied()
.collect();
for seq in stale {
self.buffer.remove(&seq);
}
}
}
/// Reset the buffer (e.g., after re-INVITE / hold-resume).
pub fn reset(&mut self) {
self.buffer.clear();
self.next_seq = None;
self.playing = false;
self.empty_streak = 0;
self.excess_streak = 0;
self.adapt_counter = 0;
}
}

View File

@@ -35,7 +35,8 @@ pub fn create_leg_channels() -> LegChannels {
}
/// Spawn the inbound I/O task for a SIP leg.
/// Reads RTP from the socket, strips the 12-byte header, sends payload to the mixer.
/// Reads RTP from the socket, parses the variable-length header (RFC 3550),
/// and sends the payload to the mixer.
/// Returns the JoinHandle (exits when the inbound_tx channel is dropped).
pub fn spawn_sip_inbound(
rtp_socket: Arc<UdpSocket>,
@@ -51,12 +52,29 @@ pub fn spawn_sip_inbound(
}
let pt = buf[1] & 0x7F;
let marker = (buf[1] & 0x80) != 0;
let seq = u16::from_be_bytes([buf[2], buf[3]]);
let timestamp = u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]);
let payload = buf[12..n].to_vec();
// RFC 3550: header length = 12 + (CC * 4) + optional extension.
let cc = (buf[0] & 0x0F) as usize;
let has_extension = (buf[0] & 0x10) != 0;
let mut offset = 12 + cc * 4;
if has_extension {
if offset + 4 > n {
continue; // Malformed: extension header truncated.
}
let ext_len = u16::from_be_bytes([buf[offset + 2], buf[offset + 3]]) as usize;
offset += 4 + ext_len * 4;
}
if offset >= n {
continue; // No payload after header.
}
let payload = buf[offset..n].to_vec();
if payload.is_empty() {
continue;
}
if inbound_tx.send(RtpPacket { payload, payload_type: pt, marker, timestamp }).await.is_err() {
if inbound_tx.send(RtpPacket { payload, payload_type: pt, marker, seq, timestamp }).await.is_err() {
break; // Channel closed — leg removed.
}
}

View File

@@ -12,6 +12,7 @@ mod call_manager;
mod config;
mod dtmf;
mod ipc;
mod jitter_buffer;
mod leg_io;
mod mixer;
mod provider;
@@ -677,6 +678,10 @@ async fn handle_webrtc_link(
"leg_id": session_id,
"kind": "webrtc",
"state": "connected",
"codec": "Opus",
"rtpPort": 0,
"remoteMedia": null,
"metadata": {},
}));
respond_ok(out_tx, &cmd.id, serde_json::json!({
@@ -1125,8 +1130,11 @@ async fn handle_add_tool_leg(
"call_id": call_id,
"leg_id": tool_leg_id,
"kind": "tool",
"tool_type": tool_type_str,
"state": "connected",
"codec": null,
"rtpPort": 0,
"remoteMedia": null,
"metadata": { "tool_type": tool_type_str },
}),
);

View File

@@ -15,6 +15,7 @@
//! 6. Forward DTMF between participant legs only
use crate::ipc::{emit_event, OutTx};
use crate::jitter_buffer::{JitterBuffer, JitterResult};
use crate::rtp::{build_rtp_header, rtp_clock_increment};
use codec_lib::{codec_sample_rate, new_denoiser, TranscodeState};
use nnnoiseless::DenoiseState;
@@ -35,6 +36,8 @@ pub struct RtpPacket {
pub payload_type: u8,
/// RTP marker bit (first packet of a DTMF event, etc.).
pub marker: bool,
/// RTP sequence number for reordering.
pub seq: u16,
/// RTP timestamp from the original packet header.
pub timestamp: u32,
}
@@ -162,6 +165,8 @@ struct MixerLegSlot {
last_pcm_frame: Vec<f32>,
/// Number of consecutive ticks with no inbound packet.
silent_ticks: u32,
/// Per-leg jitter buffer for packet reordering and timing.
jitter: JitterBuffer,
// RTP output state.
rtp_seq: u16,
rtp_ts: u32,
@@ -236,6 +241,7 @@ async fn mixer_loop(
rtp_ts: 0,
rtp_ssrc: rand::random(),
role: LegRole::Participant,
jitter: JitterBuffer::new(),
},
);
}
@@ -319,35 +325,37 @@ async fn mixer_loop(
continue;
}
// ── 2. Drain inbound packets, decode to 16kHz PCM. ─────────
// ── 2. Drain inbound packets, decode to 48kHz f32 PCM. ────
// DTMF (PT 101) packets are collected separately.
// Audio packets are sorted by sequence number and decoded
// in order to maintain codec state (critical for G.722 ADPCM).
let leg_ids: Vec<String> = legs.keys().cloned().collect();
let mut dtmf_forward: Vec<(String, RtpPacket)> = Vec::new();
for lid in &leg_ids {
let slot = legs.get_mut(lid).unwrap();
// Drain channel — collect DTMF packets separately, keep latest audio.
let mut latest_audio: Option<RtpPacket> = None;
// Step 2a: Drain all pending packets into the jitter buffer.
let mut got_audio = false;
loop {
match slot.inbound_rx.try_recv() {
Ok(pkt) => {
if pkt.payload_type == 101 {
// DTMF telephone-event: collect for processing.
dtmf_forward.push((lid.clone(), pkt));
} else {
latest_audio = Some(pkt);
got_audio = true;
slot.jitter.push(pkt);
}
}
Err(_) => break,
}
}
if let Some(pkt) = latest_audio {
slot.silent_ticks = 0;
// Step 2b: Consume exactly one frame from the jitter buffer.
match slot.jitter.consume() {
JitterResult::Packet(pkt) => {
match slot.transcoder.decode_to_f32(&pkt.payload, pkt.payload_type) {
Ok((pcm, rate)) => {
// Resample to 48kHz mixing rate if needed.
let pcm_48k = if rate == MIX_RATE {
pcm
} else {
@@ -355,29 +363,57 @@ async fn mixer_loop(
.resample_f32(&pcm, rate, MIX_RATE)
.unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
};
// Per-leg inbound denoising at 48kHz.
let denoised = TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k);
// Pad or truncate to exactly MIX_FRAME_SIZE.
let mut frame = denoised;
let processed = if slot.codec_pt != codec_lib::PT_OPUS {
TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k)
} else {
pcm_48k
};
let mut frame = processed;
frame.resize(MIX_FRAME_SIZE, 0.0);
slot.last_pcm_frame = frame;
}
Err(_) => {}
}
}
JitterResult::Missing => {
// Invoke Opus PLC or fade for non-Opus codecs.
if slot.codec_pt == codec_lib::PT_OPUS {
match slot.transcoder.opus_plc(MIX_FRAME_SIZE) {
Ok(pcm) => {
slot.last_pcm_frame = pcm;
}
Err(_) => {
// Decode failed — use silence.
for s in slot.last_pcm_frame.iter_mut() {
*s *= 0.8;
}
}
}
} else {
// Non-Opus: fade last frame toward silence.
for s in slot.last_pcm_frame.iter_mut() {
*s *= 0.85;
}
}
}
JitterResult::Filling => {
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
}
}
} else if dtmf_forward.iter().any(|(src, _)| src == lid) {
// Got DTMF but no audio — don't bump silent_ticks (DTMF counts as activity).
// Run jitter adaptation + prune stale packets.
slot.jitter.adapt();
slot.jitter.prune_stale();
// Silent ticks: based on actual network reception, not jitter buffer state.
if got_audio || dtmf_forward.iter().any(|(src, _)| src == lid) {
slot.silent_ticks = 0;
} else {
slot.silent_ticks += 1;
// After 150 ticks (3 seconds) of silence, zero out to avoid stale audio.
}
if slot.silent_ticks > 150 {
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
}
}
}
// ── 3. Compute total mix from PARTICIPANT legs only. ────────
// Accumulate as f64 to prevent precision loss when summing f32.

View File

@@ -290,8 +290,9 @@ async fn browser_to_mixer_loop(
.send(RtpPacket {
payload: payload.to_vec(),
payload_type: PT_OPUS,
marker: false,
timestamp: 0,
marker: rtp_packet.header.marker,
seq: rtp_packet.header.sequence_number,
timestamp: rtp_packet.header.timestamp,
})
.await;
}

View File

@@ -197,10 +197,11 @@ pub fn compute_digest_auth(
use crate::Endpoint;
/// Parse the audio media port and connection address from an SDP body.
/// Parse the audio media port, connection address, and preferred codec from an SDP body.
pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
let mut addr: Option<&str> = None;
let mut port: Option<u16> = None;
let mut codec_pt: Option<u8> = None;
let normalized = sdp.replace("\r\n", "\n");
for raw in normalized.split('\n') {
@@ -208,10 +209,16 @@ pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
if let Some(rest) = line.strip_prefix("c=IN IP4 ") {
addr = Some(rest.trim());
} else if let Some(rest) = line.strip_prefix("m=audio ") {
// m=audio <port> RTP/AVP <pt1> [<pt2> ...]
let parts: Vec<&str> = rest.split_whitespace().collect();
if !parts.is_empty() {
port = parts[0].parse().ok();
}
// parts[1] is "RTP/AVP" or similar, parts[2..] are payload types.
// The first PT is the preferred codec.
if parts.len() > 2 {
codec_pt = parts[2].parse::<u8>().ok();
}
}
}
@@ -219,6 +226,7 @@ pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
(Some(a), Some(p)) => Some(Endpoint {
address: a.to_string(),
port: p,
codec_pt,
}),
_ => None,
}

View File

@@ -9,9 +9,11 @@ pub mod dialog;
pub mod helpers;
pub mod rewrite;
/// Network endpoint (address + port).
/// Network endpoint (address + port + optional negotiated codec).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Endpoint {
pub address: String,
pub port: u16,
/// First payload type from the SDP `m=audio` line (the preferred codec).
pub codec_pt: Option<u8>,
}

View File

@@ -92,7 +92,7 @@ pub fn rewrite_sdp(body: &str, ip: &str, port: u16) -> (String, Option<Endpoint>
.collect();
let original = match (orig_addr, orig_port) {
(Some(a), Some(p)) => Some(Endpoint { address: a, port: p }),
(Some(a), Some(p)) => Some(Endpoint { address: a, port: p, codec_pt: None }),
_ => None,
};

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: 'siprouter',
version: '1.17.0',
version: '1.20.1',
description: 'undefined'
}

View File

@@ -134,8 +134,22 @@ let logFn: ((msg: string) => void) | undefined;
function buildLocalPaths(): string[] {
const root = process.cwd();
// Map Node's process.arch to tsrust's friendly target name.
// tsrust writes multi-target binaries as <bin>_<os>_<arch>,
// e.g. proxy-engine_linux_amd64 / proxy-engine_linux_arm64.
const archSuffix =
process.arch === 'arm64' ? 'linux_arm64' :
process.arch === 'x64' ? 'linux_amd64' :
null;
const multiTarget = archSuffix
? [path.join(root, 'dist_rust', `proxy-engine_${archSuffix}`)]
: [];
return [
// 1. Multi-target output matching the running host arch (Docker image, CI, multi-target dev).
...multiTarget,
// 2. Single-target (unsuffixed) output — legacy/fallback when tsrust runs without targets.
path.join(root, 'dist_rust', 'proxy-engine'),
// 3. Direct cargo builds for dev iteration.
path.join(root, 'rust', 'target', 'release', 'proxy-engine'),
path.join(root, 'rust', 'target', 'debug', 'proxy-engine'),
];

View File

@@ -425,9 +425,9 @@ async function startProxyEngine(): Promise<void> {
id: data.leg_id,
type: data.kind,
state: data.state,
codec: null,
rtpPort: null,
remoteMedia: null,
codec: data.codec ?? null,
rtpPort: data.rtpPort ?? null,
remoteMedia: data.remoteMedia ?? null,
metadata: data.metadata || {},
});
}

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: 'siprouter',
version: '1.17.0',
version: '1.20.1',
description: 'undefined'
}

View File

@@ -18,7 +18,7 @@ const VIEW_TABS = [
{ name: 'Phone', iconName: 'lucide:headset', element: SipproxyViewPhone },
{ name: 'Routes', iconName: 'lucide:route', element: SipproxyViewRoutes },
{ name: 'Voicemail', iconName: 'lucide:voicemail', element: SipproxyViewVoicemail },
{ name: 'IVR', iconName: 'lucide:list-tree', element: SipproxyViewIvr },
{ name: 'IVR', iconName: 'lucide:ListTree', element: SipproxyViewIvr },
{ name: 'Contacts', iconName: 'lucide:contactRound', element: SipproxyViewContacts },
{ name: 'Providers', iconName: 'lucide:server', element: SipproxyViewProviders },
{ name: 'Log', iconName: 'lucide:scrollText', element: SipproxyViewLog },

View File

@@ -422,7 +422,7 @@ export class SipproxyViewCalls extends DeesElement {
menuOptions: [
{
name: 'Transfer',
iconName: 'lucide:arrow-right-left',
iconName: 'lucide:ArrowRightLeft',
action: async (modalRef: any) => {
if (!targetCallId || !targetLegId) {
deesCatalog.DeesToast.error('Please select both a target call and a leg');
@@ -620,7 +620,7 @@ export class SipproxyViewCalls extends DeesElement {
title: 'Inbound',
value: inboundCount,
type: 'number',
icon: 'lucide:phone-incoming',
icon: 'lucide:PhoneIncoming',
description: 'Incoming calls',
},
{
@@ -628,7 +628,7 @@ export class SipproxyViewCalls extends DeesElement {
title: 'Outbound',
value: outboundCount,
type: 'number',
icon: 'lucide:phone-outgoing',
icon: 'lucide:PhoneOutgoing',
description: 'Outgoing calls',
},
];

View File

@@ -140,7 +140,7 @@ export class SipproxyViewIvr extends DeesElement {
title: 'Total Menus',
value: ivr.menus.length,
type: 'number',
icon: 'lucide:list-tree',
icon: 'lucide:ListTree',
description: 'IVR menu definitions',
},
{
@@ -148,7 +148,7 @@ export class SipproxyViewIvr extends DeesElement {
title: 'Entry Menu',
value: entryMenu?.name || '(none)',
type: 'text' as any,
icon: 'lucide:door-open',
icon: 'lucide:DoorOpen',
description: entryMenu ? `ID: ${entryMenu.id}` : 'No entry menu set',
},
{
@@ -156,7 +156,7 @@ export class SipproxyViewIvr extends DeesElement {
title: 'Status',
value: ivr.enabled ? 'Enabled' : 'Disabled',
type: 'text' as any,
icon: ivr.enabled ? 'lucide:check-circle' : 'lucide:x-circle',
icon: ivr.enabled ? 'lucide:CheckCircle' : 'lucide:XCircle',
color: ivr.enabled ? 'hsl(142.1 76.2% 36.3%)' : 'hsl(0 84.2% 60.2%)',
description: ivr.enabled ? 'IVR is active' : 'IVR is inactive',
},
@@ -228,7 +228,7 @@ export class SipproxyViewIvr extends DeesElement {
},
{
name: 'Set as Entry',
iconName: 'lucide:door-open' as any,
iconName: 'lucide:DoorOpen' as any,
type: ['inRow'] as any,
actionFunc: async ({ item }: { item: IIvrMenu }) => {
await this.setEntryMenu(item.id);
@@ -236,7 +236,7 @@ export class SipproxyViewIvr extends DeesElement {
},
{
name: 'Delete',
iconName: 'lucide:trash-2' as any,
iconName: 'lucide:Trash2' as any,
type: ['inRow'] as any,
actionFunc: async ({ item }: { item: IIvrMenu }) => {
await this.confirmDeleteMenu(item);
@@ -295,7 +295,7 @@ export class SipproxyViewIvr extends DeesElement {
},
{
name: 'Delete',
iconName: 'lucide:trash-2',
iconName: 'lucide:Trash2',
action: async (modalRef: any) => {
const ivr = this.getIvrConfig();
const menus = ivr.menus.filter((m) => m.id !== menu.id);

View File

@@ -107,7 +107,7 @@ export class SipproxyViewOverview extends DeesElement {
title: 'Inbound Calls',
value: inboundCalls,
type: 'number',
icon: 'lucide:phone-incoming',
icon: 'lucide:PhoneIncoming',
description: 'Currently active',
},
{
@@ -115,7 +115,7 @@ export class SipproxyViewOverview extends DeesElement {
title: 'Outbound Calls',
value: outboundCalls,
type: 'number',
icon: 'lucide:phone-outgoing',
icon: 'lucide:PhoneOutgoing',
description: 'Currently active',
},
{

View File

@@ -86,7 +86,7 @@ export class SipproxyViewProviders extends DeesElement {
title: 'Registered',
value: registered,
type: 'number',
icon: 'lucide:check-circle',
icon: 'lucide:CheckCircle',
color: 'hsl(142.1 76.2% 36.3%)',
description: 'Active registrations',
},
@@ -95,7 +95,7 @@ export class SipproxyViewProviders extends DeesElement {
title: 'Unregistered',
value: unregistered,
type: 'number',
icon: 'lucide:alert-circle',
icon: 'lucide:AlertCircle',
color: unregistered > 0 ? 'hsl(0 84.2% 60.2%)' : undefined,
description: unregistered > 0 ? 'Needs attention' : 'All healthy',
},
@@ -153,7 +153,7 @@ export class SipproxyViewProviders extends DeesElement {
},
{
name: 'Delete',
iconName: 'lucide:trash-2',
iconName: 'lucide:Trash2',
type: ['inRow'] as any,
actionFunc: async (actionData: any) => {
await this.confirmDelete(actionData.item);
@@ -579,7 +579,7 @@ export class SipproxyViewProviders extends DeesElement {
},
{
name: 'Delete',
iconName: 'lucide:trash-2',
iconName: 'lucide:Trash2',
action: async (modalRef: any) => {
try {
const result = await appState.apiSaveConfig({

View File

@@ -239,7 +239,7 @@ export class SipproxyViewVoicemail extends DeesElement {
},
{
name: 'Delete',
iconName: 'lucide:trash-2',
iconName: 'lucide:Trash2',
action: async (modalRef: any) => {
try {
await fetch(
@@ -281,7 +281,7 @@ export class SipproxyViewVoicemail extends DeesElement {
title: 'Unheard Messages',
value: unheard,
type: 'number',
icon: 'lucide:bell-ring',
icon: 'lucide:BellRing',
color: unheard > 0 ? 'hsl(0 84.2% 60.2%)' : 'hsl(142.1 76.2% 36.3%)',
description: unheard > 0 ? 'Needs attention' : 'All caught up',
},
@@ -372,7 +372,7 @@ export class SipproxyViewVoicemail extends DeesElement {
},
{
name: 'Delete',
iconName: 'lucide:trash-2',
iconName: 'lucide:Trash2',
type: ['inRow'] as any,
actionFunc: async (actionData: any) => {
await this.deleteMessage(actionData.item as IVoicemailMessage);