From c40c726dc3f13a1f378fa63cfd0fa39f8feda34d Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Sat, 11 Apr 2026 19:20:14 +0000 Subject: [PATCH] fix(readme): improve architecture and call flow documentation with Mermaid diagrams --- changelog.md | 6 +++ readme.md | 98 +++++++++++++++++++++++------------- ts/00_commitinfo_data.ts | 2 +- ts_web/00_commitinfo_data.ts | 2 +- 4 files changed, 70 insertions(+), 38 deletions(-) diff --git a/changelog.md b/changelog.md index ef866eb..d085024 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,11 @@ # Changelog +## 2026-04-11 - 1.20.5 - fix(readme) +improve architecture and call flow documentation with Mermaid diagrams + +- Replace ASCII architecture and audio pipeline diagrams with Mermaid diagrams for better readability +- Document the WebRTC browser call setup sequence, including offer handling and session-to-call linking + ## 2026-04-11 - 1.20.4 - fix(deps) bump @design.estate/dees-catalog to ^3.71.1 diff --git a/readme.md b/readme.md index 9aa2a71..d343fe2 100644 --- a/readme.md +++ b/readme.md @@ -28,39 +28,26 @@ siprouter sits between your SIP trunk providers and your endpoints โ€” hardware ## ๐Ÿ—๏ธ Architecture -``` -โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” -โ”‚ Browser Softphone โ”‚ -โ”‚ (WebRTC via WebSocket signaling) โ”‚ -โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ - โ”‚ Opus/WebRTC - โ–ผ -โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” -โ”‚ siprouter โ”‚ -โ”‚ โ”‚ -โ”‚ TypeScript Control Plane โ”‚ -โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ -โ”‚ โ”‚ Config ยท WebRTC Signaling โ”‚ โ”‚ -โ”‚ โ”‚ REST API ยท Web Dashboard โ”‚ โ”‚ -โ”‚ โ”‚ Voicebox Manager ยท TTS Cache โ”‚ โ”‚ -โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ -โ”‚ JSON-over-stdio IPC โ”‚ -โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ -โ”‚ โ”‚ Rust proxy-engine (data plane) โ”‚ โ”‚ -โ”‚ โ”‚ โ”‚ โ”‚ -โ”‚ โ”‚ SIP Stack ยท Dialog SM ยท Auth โ”‚ โ”‚ -โ”‚ โ”‚ Call Manager ยท N-Leg Mixer โ”‚ โ”‚ -โ”‚ โ”‚ 48kHz f32 Bus ยท Jitter Buffer โ”‚ โ”‚ -โ”‚ โ”‚ Codec Engine ยท RTP Port Pool โ”‚ โ”‚ -โ”‚ โ”‚ WebRTC Engine ยท Kokoro TTS โ”‚ โ”‚ -โ”‚ โ”‚ Voicemail ยท IVR ยท Recording โ”‚ โ”‚ -โ”‚ โ””โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ -โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”คโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”คโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ - โ”‚ โ”‚ - โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ” - โ”‚ SIP Devices โ”‚ โ”‚ SIP Trunk โ”‚ - โ”‚ (HT801 etc) โ”‚ โ”‚ Providers โ”‚ - โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +```mermaid +flowchart TB + Browser["๐ŸŒ Browser Softphone
(WebRTC via WebSocket signaling)"] + Devices["๐Ÿ“ž SIP Devices
(HT801, desk phones, ATAs)"] + Trunks["โ˜Ž๏ธ SIP Trunk Providers
(sipgate, easybell, โ€ฆ)"] + + subgraph Router["siprouter"] + direction TB + subgraph TS["TypeScript Control Plane"] + TSBits["Config ยท WebRTC Signaling
REST API ยท Web Dashboard
Voicebox Manager ยท TTS Cache"] + end + subgraph Rust["Rust proxy-engine (data plane)"] + RustBits["SIP Stack ยท Dialog SM ยท Auth
Call Manager ยท N-Leg Mixer
48kHz f32 Bus ยท Jitter Buffer
Codec Engine ยท RTP Port Pool
WebRTC Engine ยท Kokoro TTS
Voicemail ยท IVR ยท Recording"] + end + TS <-->|"JSON-over-stdio IPC"| Rust + end + + Browser <-->|"Opus / WebRTC"| TS + Rust <-->|"SIP / RTP"| Devices + Rust <-->|"SIP / RTP"| Trunks ``` ### ๐Ÿง  Key Design Decisions @@ -71,6 +58,37 @@ siprouter sits between your SIP trunk providers and your endpoints โ€” hardware - **Per-Session Codec Isolation** โ€” Each call leg gets its own encoder/decoder/resampler/denoiser state โ€” no cross-call corruption. - **SDP Codec Negotiation** โ€” Outbound encoding uses the codec actually negotiated in SDP answers, not just the first offered codec. +### ๐Ÿ“ฒ WebRTC Browser Call Flow + +Browser calls are set up in a strict three-step dance โ€” the WebRTC leg cannot be attached at call-creation time because the browser's session ID is only known once the SDP offer arrives: + +```mermaid +sequenceDiagram + participant B as Browser + participant TS as TypeScript (sipproxy.ts) + participant R as Rust proxy-engine + participant P as SIP Provider + + B->>TS: POST /api/call + TS->>R: make_call (pending call, no WebRTC leg yet) + R-->>TS: call_created + TS-->>B: webrtc-incoming (callId) + + B->>TS: webrtc-offer (sessionId, SDP) + TS->>R: handle_webrtc_offer + R-->>TS: webrtc-answer (SDP) + TS-->>B: webrtc-answer + Note over R: Standalone WebRTC session
(not yet attached to call) + + B->>TS: webrtc_link (callId + sessionId) + TS->>R: link session โ†’ call + R->>R: wire WebRTC leg through mixer + R->>P: SIP INVITE + P-->>R: 200 OK + SDP + R-->>TS: call_answered + Note over B,P: Bidirectional Opus โ†” codec-transcoded
audio flows through the mixer +``` + --- ## ๐Ÿš€ Getting Started @@ -246,9 +264,17 @@ The `proxy-engine` binary handles all real-time audio processing with a **48kHz ### Audio Pipeline -``` -Inbound: Wire RTP โ†’ Jitter Buffer โ†’ Decode โ†’ Resample to 48kHz โ†’ Denoise (RNNoise) โ†’ Mix Bus -Outbound: Mix Bus โ†’ Mix-Minus โ†’ Resample to codec rate โ†’ Encode โ†’ Wire RTP +```mermaid +flowchart LR + subgraph Inbound["Inbound path (per leg)"] + direction LR + IN_RTP["Wire RTP"] --> IN_JB["Jitter Buffer"] --> IN_DEC["Decode"] --> IN_RS["Resample โ†’ 48 kHz"] --> IN_DN["Denoise (RNNoise)"] --> IN_BUS["Mix Bus"] + end + + subgraph Outbound["Outbound path (per leg)"] + direction LR + OUT_BUS["Mix Bus"] --> OUT_MM["Mix-Minus"] --> OUT_RS["Resample โ†’ codec rate"] --> OUT_ENC["Encode"] --> OUT_RTP["Wire RTP"] + end ``` - **Adaptive jitter buffer** โ€” per-leg `BTreeMap`-based buffer keyed by RTP sequence number. Delivers exactly one frame per 20ms mixer tick in sequence order. Adaptive target depth starts at 3 frames (60ms) and adjusts between 2โ€“6 frames based on observed network jitter. Handles hold/resume by detecting large forward sequence jumps and resetting cleanly. diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index 7c3ac7c..e350c80 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: 'siprouter', - version: '1.20.4', + version: '1.20.5', description: 'undefined' } diff --git a/ts_web/00_commitinfo_data.ts b/ts_web/00_commitinfo_data.ts index 7c3ac7c..e350c80 100644 --- a/ts_web/00_commitinfo_data.ts +++ b/ts_web/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: 'siprouter', - version: '1.20.4', + version: '1.20.5', description: 'undefined' }