From a9fdfe5733ea511c593d4ef9e4e15a715092a234 Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Sat, 11 Apr 2026 19:02:52 +0000 Subject: [PATCH] fix(ts-config,proxybridge,voicebox): align voicebox config types and add missing proxy bridge command definitions --- CLAUDE.md | 114 ++++++++++++++++----- changelog.md | 8 ++ rust/.cargo/config.toml | 30 ++++++ rust/.cargo/crosslibs/aarch64/libstdc++.so | 1 + ts/00_commitinfo_data.ts | 2 +- ts/config.ts | 26 ++--- ts/proxybridge.ts | 38 ++++++- ts/sipproxy.ts | 2 +- ts/voicebox.ts | 43 ++++++-- ts_web/00_commitinfo_data.ts | 2 +- 10 files changed, 212 insertions(+), 54 deletions(-) create mode 100644 rust/.cargo/config.toml create mode 120000 rust/.cargo/crosslibs/aarch64/libstdc++.so diff --git a/CLAUDE.md b/CLAUDE.md index 4404402..0649526 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,41 +1,103 @@ # Project Notes -## Architecture: Hub Model (Call as Centerpiece) +## Architecture: Hub Model in Rust (Call as Centerpiece) -All call logic lives in `ts/call/`. The Call is the central entity with N legs. +The call hub lives in the Rust proxy-engine (`rust/crates/proxy-engine/`). TypeScript is the **control plane only** — it configures the engine, sends high-level commands (`hangup`, `make_call`, `webrtc_offer`, etc.), and receives events (`incoming_call`, `call_answered`, `device_registered`, `webrtc_audio_rx`, …). No raw SIP/RTP ever touches TypeScript. -### Key Files -- `ts/call/call-manager.ts` — singleton registry, factory methods, SIP routing -- `ts/call/call.ts` — the hub: owns legs, media forwarding -- `ts/call/sip-leg.ts` — SIP device/provider connection (wraps SipDialog) -- `ts/call/webrtc-leg.ts` — browser WebRTC connection (wraps werift PeerConnection) -- `ts/call/rtp-port-pool.ts` — unified RTP port pool -- `ts/sipproxy.ts` — thin bootstrap wiring everything together -- `ts/webrtcbridge.ts` — browser device registration (signaling only) +The `Call` is still the central entity: it owns N legs and a central mixer task that provides mix-minus audio to all participants. Legs can be `SipProvider`, `SipDevice`, `WebRtc`, or `Tool` (recording/transcription observer). -### WebRTC Browser Call Flow (Critical) +### Key Rust files (`rust/crates/proxy-engine/src/`) -The browser call flow has a specific signaling order that MUST be followed: +- `call_manager.rs` — singleton registry, call factory methods, SIP routing (inbound/outbound/passthrough), B2BUA state machine, inbound route resolution +- `call.rs` — the `Call` hub + `LegInfo` struct, owns legs and the mixer task +- `sip_leg.rs` — full SIP dialog management for B2BUA legs (INVITE, 407 auth retry, BYE, CANCEL, early media) +- `rtp.rs` — RTP port pool (uses `Weak` so calls auto-release ports on drop) + RTP header helpers +- `mixer.rs` — 20 ms-tick mix-minus engine (48 kHz f32 internal, per-leg transcoding via `codec-lib`, per-leg denoising) +- `jitter_buffer.rs` — per-leg reordering/packet-loss compensation +- `leg_io.rs` — spawns inbound/outbound RTP I/O tasks per SIP leg +- `webrtc_engine.rs` — browser WebRTC sessions (werift-rs based), ICE/DTLS/SRTP +- `provider.rs` — SIP trunk registrations, public-IP detection via Via `received=` +- `registrar.rs` — accepts REGISTER from SIP phones, tracks contacts (push-based device status) +- `config.rs` — `AppConfig` deserialized from TS, route resolvers (`resolve_outbound_route`, `resolve_inbound_route`) +- `main.rs` — IPC command dispatcher (`handle_command`), event emitter, top-level SIP packet router +- `sip_transport.rs` — owning wrapper around the main SIP UDP socket +- `voicemail.rs` / `recorder.rs` / `audio_player.rs` / `tts.rs` — media subsystems +- `tool_leg.rs` — per-source observer audio for recording/transcription tools +- `ipc.rs` — event-emission helper used throughout -1. `POST /api/call` with browser deviceId → CallManager creates Call, saves pending state, notifies browser via `webrtc-incoming` -2. Browser sends `webrtc-offer` (with its own `sessionId`) → CallManager creates a **standalone** WebRtcLeg (NOT attached to any call yet) -3. Browser sends `webrtc-accept` (with `callId` + `sessionId`) → CallManager links the standalone WebRtcLeg to the Call, then starts the SIP provider leg +### Key TS files (control plane) -**The WebRtcLeg CANNOT be created at call creation time** because the browser's session ID is unknown until the `webrtc-offer` arrives. +- `ts/sipproxy.ts` — entrypoint, wires the proxy engine bridge + web UI + WebRTC signaling +- `ts/proxybridge.ts` — `@push.rocks/smartrust` bridge to the Rust binary, typed `TProxyCommands` map +- `ts/config.ts` — JSON config loader (`IAppConfig`, `IProviderConfig`, etc.), sent to Rust via `configure` +- `ts/voicebox.ts` — voicemail metadata persistence (WAV files live in `.nogit/voicemail/{boxId}/`) +- `ts/webrtcbridge.ts` — browser WebSocket signaling, browser device registry (`deviceIdToWs`) +- `ts/call/prompt-cache.ts` — the only remaining file under `ts/call/` (IVR prompt caching) -### WebRTC Audio Return Channel (Critical) +### Rust SIP protocol library -The SIP→browser audio path works through the Call hub: +`rust/crates/sip-proto/` is a zero-dependency SIP data library (parse/build/mutate/serialize messages, dialog management, SDP helpers, digest auth). Do not add transport or timer logic there — it's purely data-level. -1. Provider sends RTP to SipLeg's socket -2. SipLeg's `onRtpReceived` fires → Call hub's `forwardRtp` -3. Call hub calls `webrtcLeg.sendRtp(data)` → which calls `forwardToBrowser()` -4. `forwardToBrowser` transcodes (G.722→Opus) and sends via `sender.sendRtp()` (WebRTC PeerConnection) +## Event-push architecture for device status -**`WebRtcLeg.sendRtp()` MUST feed into `forwardToBrowser()`** (the WebRTC PeerConnection path), NOT send to a UDP address. This was a bug that caused one-way audio. +Device status flows **via push events**, not pull-based IPC queries: -The browser→SIP direction works independently: `ontrack.onReceiveRtp` → `forwardToSip()` → transcodes → sends directly to provider's media endpoint via UDP. +1. Rust emits `device_registered` when a phone REGISTERs +2. TS `sipproxy.ts` maintains a `deviceStatuses` Map, updated from the event +3. Map snapshot goes into the WebSocket `status` broadcast +4. Web UI (`ts_web/elements/sipproxy-devices.ts`) reads it from the push stream -### SIP Protocol Library +There used to be a `get_status` pull IPC for this, but it was never called from TS and has been removed. If a new dashboard ever needs a pull-based snapshot, the push Map is the right source to read from. -`ts/sip/` is a zero-dependency SIP protocol library. Do not add transport or timer logic there — it's purely data-level (parse/build/mutate/serialize). +## Inbound routing (wired in Commit 4 of the cleanup PR) + +Inbound route resolution goes through `config.resolve_inbound_route(provider_id, called_number, caller_number)` inside `create_inbound_call` (call_manager.rs). The result carries a `ring_browsers` flag that propagates to the `incoming_call` event; `ts/sipproxy.ts` gates the `webrtc-incoming` browser fan-out behind that flag. + +**Known limitations / TODOs** (documented in code at `create_inbound_call`): +- Multi-target inbound fork is not yet implemented — only the first registered device from `route.device_ids` is rung. +- `ring_browsers` is **informational only**: browsers see a toast but do not race the SIP device to answer. True first-to-answer-wins requires a multi-leg fork + per-leg CANCEL, which is not built yet. +- `voicemail_box`, `ivr_menu_id`, `no_answer_timeout` are resolved but not yet honored downstream. + +## WebRTC Browser Call Flow (Critical) + +The browser call signaling order is strict: + +1. Browser initiates outbound via a TS API (e.g. `POST /api/call`) — TS creates a pending call in the Rust engine via `make_call` and notifies the browser with a `webrtc-incoming` push. +2. Browser sends `webrtc-offer` (with its own `sessionId`) → Rust `handle_webrtc_offer` creates a **standalone** WebRTC session (NOT attached to any call yet). +3. Browser sends `webrtc_link` (with `callId` + `sessionId`) → Rust links the standalone session to the Call and wires the WebRTC leg through the mixer. + +**The WebRTC leg cannot be fully attached at call-creation time** because the browser's session ID is unknown until the `webrtc-offer` arrives. + +### WebRTC audio return channel (Critical) + +The SIP→browser audio path goes through the mixer, not a direct RTP relay: + +1. Provider sends RTP → received on the provider leg's UDP socket (`leg_io::spawn_sip_inbound`) +2. Packet flows through `jitter_buffer` → mixer's inbound mpsc channel +3. Mixer decodes/resamples/denoises, computes mix-minus per leg +4. WebRTC leg receives its mix-minus frame, encodes to Opus, and pushes via the WebRTC engine's peer connection sender + +Browser→SIP works symmetrically: `ontrack.onReceiveRtp` → WebRTC leg's outbound mpsc → mixer → other legs' inbound channels. + +## SDP/Record-Route NAT (fixed in Commit 3 of the cleanup PR) + +The proxy tracks a `public_ip: Option` on every `LegInfo` (populated from provider-leg construction sites). When `route_passthrough_message` rewrites SDP (`c=` line) or emits a `Record-Route`, it picks `advertise_ip` based on the destination leg's kind: + +- `SipProvider` → `other.public_ip.unwrap_or(lan_ip)` (provider reaches us via public IP) +- `SipDevice` / `WebRtc` / `Tool` / `Media` → `lan_ip` (everything else is LAN or proxy-internal) + +This fixed a real NAT-traversal bug where the proxy advertised its RFC1918 LAN IP to the provider in SDP, causing one-way or no audio for device-originated inbound traffic behind NAT. + +## Build & development + +- **Build:** `pnpm run buildRust` (never `cargo build` directly — tsrust cross-compiles for both `x86_64-unknown-linux-gnu` and `aarch64-unknown-linux-gnu`) +- **Cross-compile setup:** the aarch64 target requires `gcc-aarch64-linux-gnu` + `libstdc++6-arm64-cross` (Debian/Ubuntu). See `rust/.cargo/config.toml` for the linker wiring. A committed symlink at `rust/.cargo/crosslibs/aarch64/libstdc++.so` → `/usr/aarch64-linux-gnu/lib/libstdc++.so.6` avoids needing the `libstdc++-13-dev-arm64-cross` package. +- **Bundle web UI:** `pnpm run bundle` (esbuild, output: `dist_ts_web/bundle.js`) +- **Full build:** `pnpm run build` (= `buildRust && bundle`) +- **Start server:** `pnpm run start` (runs `tsx ts/sipproxy.ts`) + +## Persistent files + +- `.nogit/config.json` — app config (providers, devices, routes, voiceboxes, IVR menus) +- `.nogit/voicemail/{boxId}/` — voicemail WAV files + `messages.json` index +- `.nogit/prompts/` — cached TTS prompts for IVR menus diff --git a/changelog.md b/changelog.md index 3574c8f..d94b38d 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,13 @@ # Changelog +## 2026-04-11 - 1.20.3 - fix(ts-config,proxybridge,voicebox) +align voicebox config types and add missing proxy bridge command definitions + +- Reuses the canonical IVoiceboxConfig type from voicebox.ts in config.ts to eliminate duplicated type definitions and optionality mismatches. +- Makes voicemail timing and limits optional in voicebox config so defaults can be applied consistently during initialization. +- Adds VoiceboxManager.addMessage and updates recording handling to use it directly for persisted voicemail metadata. +- Extends proxy bridge command typings with add_leg, remove_leg, and WebRTC signaling commands, and tightens sendCommand typing. + ## 2026-04-11 - 1.20.2 - fix(proxy-engine) fix inbound route browser ringing and provider-facing SDP advertisement while preventing RTP port exhaustion diff --git a/rust/.cargo/config.toml b/rust/.cargo/config.toml new file mode 100644 index 0000000..ff42eeb --- /dev/null +++ b/rust/.cargo/config.toml @@ -0,0 +1,30 @@ +# Cross-compile configuration for the proxy-engine crate. +# +# tsrust builds for both x86_64-unknown-linux-gnu and aarch64-unknown-linux-gnu +# from an x86_64 host. Without this config, cargo invokes the host `cc` to +# link aarch64 objects and fails with +# rust-lld: error: is incompatible with elf64-x86-64 +# +# Required Debian/Ubuntu packages for the aarch64 target to work: +# sudo apt install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu \ +# libc6-dev-arm64-cross libstdc++6-arm64-cross +# +# The `libstdc++.so` dev symlink (needed by the -lstdc++ flag that the +# kokoro-tts/ort build scripts emit) is provided by this repo at +# ./crosslibs/aarch64/libstdc++.so, pointing at the versioned shared +# library installed by `libstdc++6-arm64-cross`. This avoids requiring +# the `libstdc++-13-dev-arm64-cross` package, which is not always +# installed alongside the runtime. + +[target.aarch64-unknown-linux-gnu] +linker = "aarch64-linux-gnu-gcc" +rustflags = ["-C", "link-arg=-L.cargo/crosslibs/aarch64"] + +# Tell cc-rs-based build scripts (ring, zstd-sys, audiopus_sys, ort-sys) to +# use the aarch64 cross toolchain when compiling C sources for the aarch64 +# target. Without these, they'd default to the host `cc` and produce x86_64 +# objects that the aarch64 linker then rejects. +[env] +CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc" +CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++" +AR_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-ar" diff --git a/rust/.cargo/crosslibs/aarch64/libstdc++.so b/rust/.cargo/crosslibs/aarch64/libstdc++.so new file mode 120000 index 0000000..929d76b --- /dev/null +++ b/rust/.cargo/crosslibs/aarch64/libstdc++.so @@ -0,0 +1 @@ +/usr/aarch64-linux-gnu/lib/libstdc++.so.6 \ No newline at end of file diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index 81222c6..8ab7e5c 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: 'siprouter', - version: '1.20.2', + version: '1.20.3', description: 'undefined' } diff --git a/ts/config.ts b/ts/config.ts index bcc6cb1..6f7a8bc 100644 --- a/ts/config.ts +++ b/ts/config.ts @@ -8,6 +8,7 @@ import fs from 'node:fs'; import path from 'node:path'; +import type { IVoiceboxConfig } from './voicebox.js'; // --------------------------------------------------------------------------- // Shared types (previously in ts/sip/types.ts, now inlined) @@ -160,24 +161,13 @@ export interface IContact { // Voicebox configuration // --------------------------------------------------------------------------- -export interface IVoiceboxConfig { - /** Unique ID — typically matches device ID or extension. */ - id: string; - /** Whether this voicebox is active. */ - enabled: boolean; - /** Custom TTS greeting text. */ - greetingText?: string; - /** TTS voice ID (default 'af_bella'). */ - greetingVoice?: string; - /** Path to uploaded WAV greeting (overrides TTS). */ - greetingWavPath?: string; - /** Seconds to wait before routing to voicemail (default 25). */ - noAnswerTimeoutSec?: number; - /** Maximum recording duration in seconds (default 120). */ - maxRecordingSec?: number; - /** Maximum stored messages per box (default 50). */ - maxMessages?: number; -} +// Canonical definition lives in voicebox.ts (imported at the top of this +// file) — re-exported here so consumers can import everything from a +// single config module without pulling in the voicebox implementation. +// This used to be a duplicated interface and caused +// "number | undefined is not assignable to number" type errors when +// passing config.voiceboxes into VoiceboxManager.init(). +export type { IVoiceboxConfig }; // --------------------------------------------------------------------------- // IVR configuration diff --git a/ts/proxybridge.ts b/ts/proxybridge.ts index 96e87e1..2c15863 100644 --- a/ts/proxybridge.ts +++ b/ts/proxybridge.ts @@ -41,6 +41,14 @@ type TProxyCommands = { params: { call_id: string }; result: { file_path: string; duration_ms: number }; }; + add_leg: { + params: { call_id: string; number: string; provider_id?: string }; + result: { leg_id: string }; + }; + remove_leg: { + params: { call_id: string; leg_id: string }; + result: Record; + }; add_device_leg: { params: { call_id: string; device_id: string }; result: { leg_id: string }; @@ -83,6 +91,34 @@ type TProxyCommands = { params: { model: string; voices: string; voice: string; text: string; output: string }; result: { output: string }; }; + // WebRTC signaling — bridged from the browser via the TS control plane. + webrtc_offer: { + params: { session_id: string; sdp: string }; + result: { sdp: string }; + }; + webrtc_ice: { + params: { + session_id: string; + candidate: string; + sdp_mid?: string; + sdp_mline_index?: number; + }; + result: Record; + }; + webrtc_link: { + params: { + session_id: string; + call_id: string; + provider_media_addr: string; + provider_media_port: number; + sip_pt?: number; + }; + result: Record; + }; + webrtc_close: { + params: { session_id: string }; + result: Record; + }; }; // --------------------------------------------------------------------------- @@ -522,7 +558,7 @@ export async function sendProxyCommand( params: TProxyCommands[K]['params'], ): Promise { if (!bridge || !initialized) throw new Error('proxy engine not initialized'); - return bridge.sendCommand(method as string, params as any) as any; + return bridge.sendCommand(method, params) as Promise; } /** Shut down the proxy engine. */ diff --git a/ts/sipproxy.ts b/ts/sipproxy.ts index 2f75de4..7769004 100644 --- a/ts/sipproxy.ts +++ b/ts/sipproxy.ts @@ -501,7 +501,7 @@ async function startProxyEngine(): Promise { onProxyEvent('recording_done', (data: any) => { log(`[voicemail] recording done: ${data.file_path} (${data.duration_ms}ms) caller=${data.caller_number}`); // Save voicemail metadata via VoiceboxManager. - voiceboxManager.addMessage?.('default', { + voiceboxManager.addMessage('default', { callerNumber: data.caller_number || 'Unknown', callerName: null, fileName: data.file_path, diff --git a/ts/voicebox.ts b/ts/voicebox.ts index 04ecc3c..c54b0d3 100644 --- a/ts/voicebox.ts +++ b/ts/voicebox.ts @@ -29,12 +29,14 @@ export interface IVoiceboxConfig { greetingVoice?: string; /** Path to uploaded WAV greeting (overrides TTS). */ greetingWavPath?: string; - /** Seconds to wait before routing to voicemail (default 25). */ - noAnswerTimeoutSec: number; - /** Maximum recording duration in seconds (default 120). */ - maxRecordingSec: number; - /** Maximum stored messages per box (default 50). */ - maxMessages: number; + /** Seconds to wait before routing to voicemail. Defaults to 25 when + * absent — both the config loader and `VoiceboxManager.init` apply + * the default via `??=`. */ + noAnswerTimeoutSec?: number; + /** Maximum recording duration in seconds. Defaults to 120. */ + maxRecordingSec?: number; + /** Maximum stored messages per box. Defaults to 50. */ + maxMessages?: number; } export interface IVoicemailMessage { @@ -148,6 +150,35 @@ export class VoiceboxManager { // Message CRUD // ------------------------------------------------------------------------- + /** + * Convenience wrapper around `saveMessage` — used by the `recording_done` + * event handler, which has a raw recording path + caller info and needs + * to persist metadata. Generates `id`, sets `timestamp = now`, defaults + * `heard = false`, and normalizes `fileName` to a basename (the WAV is + * expected to already live in the box's directory). + */ + addMessage( + boxId: string, + info: { + callerNumber: string; + callerName?: string | null; + fileName: string; + durationMs: number; + }, + ): void { + const msg: IVoicemailMessage = { + id: crypto.randomUUID(), + boxId, + callerNumber: info.callerNumber, + callerName: info.callerName ?? undefined, + timestamp: Date.now(), + durationMs: info.durationMs, + fileName: path.basename(info.fileName), + heard: false, + }; + this.saveMessage(msg); + } + /** * Save a new voicemail message. * The WAV file should already exist at the expected path. diff --git a/ts_web/00_commitinfo_data.ts b/ts_web/00_commitinfo_data.ts index 81222c6..8ab7e5c 100644 --- a/ts_web/00_commitinfo_data.ts +++ b/ts_web/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: 'siprouter', - version: '1.20.2', + version: '1.20.3', description: 'undefined' }