Compare commits
53 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3c010a3b1b | |||
| 88768f0586 | |||
| 0d82a626b5 | |||
| 30d056f376 | |||
| 89ae12318e | |||
| feb3514de4 | |||
| adfc4726fd | |||
| 06c86d7e81 | |||
| cff70ab179 | |||
| 51f7560730 | |||
| 5a280c5c41 | |||
| 59d8c2557c | |||
| cfadd7a2b6 | |||
| 80f710f6d8 | |||
| 9ea57cd659 | |||
| c40c726dc3 | |||
| 37ba7501fa | |||
| 24924a1aea | |||
| 7ed76a9488 | |||
| a9fdfe5733 | |||
| 6fcdf4291a | |||
| 81441e7853 | |||
| 21ffc1d017 | |||
| 2f16c5efae | |||
| 254d7f3633 | |||
| 67537664df | |||
| 54129dcdae | |||
| 8c6556dae3 | |||
| 291beb1da4 | |||
| 79147f1e40 | |||
| c3a63a4092 | |||
| 7c4756402e | |||
| b6950e11d2 | |||
| e4935fbf21 | |||
| f543ff1568 | |||
| c63a759689 | |||
| a02146633b | |||
| f78639dd19 | |||
| 2aca5f1510 | |||
| 73b28f5f57 | |||
| 10ad432a4c | |||
| 66112091a2 | |||
| c9ae747c95 | |||
| 45f9b9c15c | |||
| 7d59361352 | |||
| 6a130db7c7 | |||
| 93f671f1f9 | |||
| 36eab44e28 | |||
| 9e5aa35fee | |||
| 82f2742db5 | |||
| 239e2ac81d | |||
| ad253f823f | |||
| 3132ba8cbb |
16
.dockerignore
Normal file
16
.dockerignore
Normal file
@@ -0,0 +1,16 @@
|
||||
node_modules/
|
||||
.nogit/
|
||||
nogit/
|
||||
.git/
|
||||
.playwright-mcp/
|
||||
.vscode/
|
||||
test/
|
||||
dist_rust/
|
||||
dist_ts_web/
|
||||
rust/target/
|
||||
sip_trace.log
|
||||
sip_trace_*.log
|
||||
proxy.out
|
||||
proxy_v2.out
|
||||
*.pid
|
||||
.server.pid
|
||||
32
.gitea/workflows/docker_tags.yaml
Normal file
32
.gitea/workflows/docker_tags.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
name: Docker (tags)
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
env:
|
||||
IMAGE: code.foss.global/host.today/ht-docker-node:dbase_dind
|
||||
NPMCI_LOGIN_DOCKER_GITEA: ${{ github.server_url }}|${{ gitea.repository_owner }}|${{ secrets.GITEA_TOKEN }}
|
||||
NPMCI_LOGIN_DOCKER_DOCKERREGISTRY: ${{ secrets.NPMCI_LOGIN_DOCKER_DOCKERREGISTRY }}
|
||||
|
||||
jobs:
|
||||
release:
|
||||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ env.IMAGE }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
pnpm install -g pnpm
|
||||
pnpm install -g @git.zone/tsdocker
|
||||
|
||||
- name: Release
|
||||
run: |
|
||||
tsdocker login
|
||||
tsdocker build
|
||||
tsdocker push
|
||||
@@ -8,5 +8,16 @@
|
||||
"production": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"@git.zone/tsrust": {
|
||||
"targets": ["linux_amd64", "linux_arm64"]
|
||||
},
|
||||
"@git.zone/tsdocker": {
|
||||
"registries": ["code.foss.global"],
|
||||
"registryRepoMap": {
|
||||
"code.foss.global": "serve.zone/siprouter",
|
||||
"dockerregistry.lossless.digital": "serve.zone/siprouter"
|
||||
},
|
||||
"platforms": ["linux/amd64", "linux/arm64"]
|
||||
}
|
||||
}
|
||||
|
||||
114
CLAUDE.md
114
CLAUDE.md
@@ -1,41 +1,103 @@
|
||||
# Project Notes
|
||||
|
||||
## Architecture: Hub Model (Call as Centerpiece)
|
||||
## Architecture: Hub Model in Rust (Call as Centerpiece)
|
||||
|
||||
All call logic lives in `ts/call/`. The Call is the central entity with N legs.
|
||||
The call hub lives in the Rust proxy-engine (`rust/crates/proxy-engine/`). TypeScript is the **control plane only** — it configures the engine, sends high-level commands (`hangup`, `make_call`, `webrtc_offer`, etc.), and receives events (`incoming_call`, `call_answered`, `device_registered`, `webrtc_audio_rx`, …). No raw SIP/RTP ever touches TypeScript.
|
||||
|
||||
### Key Files
|
||||
- `ts/call/call-manager.ts` — singleton registry, factory methods, SIP routing
|
||||
- `ts/call/call.ts` — the hub: owns legs, media forwarding
|
||||
- `ts/call/sip-leg.ts` — SIP device/provider connection (wraps SipDialog)
|
||||
- `ts/call/webrtc-leg.ts` — browser WebRTC connection (wraps werift PeerConnection)
|
||||
- `ts/call/rtp-port-pool.ts` — unified RTP port pool
|
||||
- `ts/sipproxy.ts` — thin bootstrap wiring everything together
|
||||
- `ts/webrtcbridge.ts` — browser device registration (signaling only)
|
||||
The `Call` is still the central entity: it owns N legs and a central mixer task that provides mix-minus audio to all participants. Legs can be `SipProvider`, `SipDevice`, `WebRtc`, or `Tool` (recording/transcription observer).
|
||||
|
||||
### WebRTC Browser Call Flow (Critical)
|
||||
### Key Rust files (`rust/crates/proxy-engine/src/`)
|
||||
|
||||
The browser call flow has a specific signaling order that MUST be followed:
|
||||
- `call_manager.rs` — singleton registry, call factory methods, SIP routing (inbound/outbound/passthrough), B2BUA state machine, inbound route resolution
|
||||
- `call.rs` — the `Call` hub + `LegInfo` struct, owns legs and the mixer task
|
||||
- `sip_leg.rs` — full SIP dialog management for B2BUA legs (INVITE, 407 auth retry, BYE, CANCEL, early media)
|
||||
- `rtp.rs` — RTP port pool (uses `Weak<UdpSocket>` so calls auto-release ports on drop) + RTP header helpers
|
||||
- `mixer.rs` — 20 ms-tick mix-minus engine (48 kHz f32 internal, per-leg transcoding via `codec-lib`, per-leg denoising)
|
||||
- `jitter_buffer.rs` — per-leg reordering/packet-loss compensation
|
||||
- `leg_io.rs` — spawns inbound/outbound RTP I/O tasks per SIP leg
|
||||
- `webrtc_engine.rs` — browser WebRTC sessions (werift-rs based), ICE/DTLS/SRTP
|
||||
- `provider.rs` — SIP trunk registrations, public-IP detection via Via `received=`
|
||||
- `registrar.rs` — accepts REGISTER from SIP phones, tracks contacts (push-based device status)
|
||||
- `config.rs` — `AppConfig` deserialized from TS, route resolvers (`resolve_outbound_route`, `resolve_inbound_route`)
|
||||
- `main.rs` — IPC command dispatcher (`handle_command`), event emitter, top-level SIP packet router
|
||||
- `sip_transport.rs` — owning wrapper around the main SIP UDP socket
|
||||
- `voicemail.rs` / `recorder.rs` / `audio_player.rs` / `tts.rs` — media subsystems
|
||||
- `tool_leg.rs` — per-source observer audio for recording/transcription tools
|
||||
- `ipc.rs` — event-emission helper used throughout
|
||||
|
||||
1. `POST /api/call` with browser deviceId → CallManager creates Call, saves pending state, notifies browser via `webrtc-incoming`
|
||||
2. Browser sends `webrtc-offer` (with its own `sessionId`) → CallManager creates a **standalone** WebRtcLeg (NOT attached to any call yet)
|
||||
3. Browser sends `webrtc-accept` (with `callId` + `sessionId`) → CallManager links the standalone WebRtcLeg to the Call, then starts the SIP provider leg
|
||||
### Key TS files (control plane)
|
||||
|
||||
**The WebRtcLeg CANNOT be created at call creation time** because the browser's session ID is unknown until the `webrtc-offer` arrives.
|
||||
- `ts/sipproxy.ts` — entrypoint, wires the proxy engine bridge + web UI + WebRTC signaling
|
||||
- `ts/proxybridge.ts` — `@push.rocks/smartrust` bridge to the Rust binary, typed `TProxyCommands` map
|
||||
- `ts/config.ts` — JSON config loader (`IAppConfig`, `IProviderConfig`, etc.), sent to Rust via `configure`
|
||||
- `ts/voicebox.ts` — voicemail metadata persistence (WAV files live in `.nogit/voicemail/{boxId}/`)
|
||||
- `ts/webrtcbridge.ts` — browser WebSocket signaling, browser device registry (`deviceIdToWs`)
|
||||
- `ts/call/prompt-cache.ts` — the only remaining file under `ts/call/` (IVR prompt caching)
|
||||
|
||||
### WebRTC Audio Return Channel (Critical)
|
||||
### Rust SIP protocol library
|
||||
|
||||
The SIP→browser audio path works through the Call hub:
|
||||
`rust/crates/sip-proto/` is a zero-dependency SIP data library (parse/build/mutate/serialize messages, dialog management, SDP helpers, digest auth). Do not add transport or timer logic there — it's purely data-level.
|
||||
|
||||
1. Provider sends RTP to SipLeg's socket
|
||||
2. SipLeg's `onRtpReceived` fires → Call hub's `forwardRtp`
|
||||
3. Call hub calls `webrtcLeg.sendRtp(data)` → which calls `forwardToBrowser()`
|
||||
4. `forwardToBrowser` transcodes (G.722→Opus) and sends via `sender.sendRtp()` (WebRTC PeerConnection)
|
||||
## Event-push architecture for device status
|
||||
|
||||
**`WebRtcLeg.sendRtp()` MUST feed into `forwardToBrowser()`** (the WebRTC PeerConnection path), NOT send to a UDP address. This was a bug that caused one-way audio.
|
||||
Device status flows **via push events**, not pull-based IPC queries:
|
||||
|
||||
The browser→SIP direction works independently: `ontrack.onReceiveRtp` → `forwardToSip()` → transcodes → sends directly to provider's media endpoint via UDP.
|
||||
1. Rust emits `device_registered` when a phone REGISTERs
|
||||
2. TS `sipproxy.ts` maintains a `deviceStatuses` Map, updated from the event
|
||||
3. Map snapshot goes into the WebSocket `status` broadcast
|
||||
4. Web UI (`ts_web/elements/sipproxy-devices.ts`) reads it from the push stream
|
||||
|
||||
### SIP Protocol Library
|
||||
There used to be a `get_status` pull IPC for this, but it was never called from TS and has been removed. If a new dashboard ever needs a pull-based snapshot, the push Map is the right source to read from.
|
||||
|
||||
`ts/sip/` is a zero-dependency SIP protocol library. Do not add transport or timer logic there — it's purely data-level (parse/build/mutate/serialize).
|
||||
## Inbound routing (wired in Commit 4 of the cleanup PR)
|
||||
|
||||
Inbound route resolution goes through `config.resolve_inbound_route(provider_id, called_number, caller_number)` inside `create_inbound_call` (call_manager.rs). The result carries a `ring_browsers` flag that propagates to the `incoming_call` event; `ts/sipproxy.ts` gates the `webrtc-incoming` browser fan-out behind that flag.
|
||||
|
||||
**Known limitations / TODOs** (documented in code at `create_inbound_call`):
|
||||
- Multi-target inbound fork is not yet implemented — only the first registered device from `route.device_ids` is rung.
|
||||
- `ring_browsers` is **informational only**: browsers see a toast but do not race the SIP device to answer. True first-to-answer-wins requires a multi-leg fork + per-leg CANCEL, which is not built yet.
|
||||
- `voicemail_box`, `ivr_menu_id`, `no_answer_timeout` are resolved but not yet honored downstream.
|
||||
|
||||
## WebRTC Browser Call Flow (Critical)
|
||||
|
||||
The browser call signaling order is strict:
|
||||
|
||||
1. Browser initiates outbound via a TS API (e.g. `POST /api/call`) — TS creates a pending call in the Rust engine via `make_call` and notifies the browser with a `webrtc-incoming` push.
|
||||
2. Browser sends `webrtc-offer` (with its own `sessionId`) → Rust `handle_webrtc_offer` creates a **standalone** WebRTC session (NOT attached to any call yet).
|
||||
3. Browser sends `webrtc_link` (with `callId` + `sessionId`) → Rust links the standalone session to the Call and wires the WebRTC leg through the mixer.
|
||||
|
||||
**The WebRTC leg cannot be fully attached at call-creation time** because the browser's session ID is unknown until the `webrtc-offer` arrives.
|
||||
|
||||
### WebRTC audio return channel (Critical)
|
||||
|
||||
The SIP→browser audio path goes through the mixer, not a direct RTP relay:
|
||||
|
||||
1. Provider sends RTP → received on the provider leg's UDP socket (`leg_io::spawn_sip_inbound`)
|
||||
2. Packet flows through `jitter_buffer` → mixer's inbound mpsc channel
|
||||
3. Mixer decodes/resamples/denoises, computes mix-minus per leg
|
||||
4. WebRTC leg receives its mix-minus frame, encodes to Opus, and pushes via the WebRTC engine's peer connection sender
|
||||
|
||||
Browser→SIP works symmetrically: `ontrack.onReceiveRtp` → WebRTC leg's outbound mpsc → mixer → other legs' inbound channels.
|
||||
|
||||
## SDP/Record-Route NAT (fixed in Commit 3 of the cleanup PR)
|
||||
|
||||
The proxy tracks a `public_ip: Option<String>` on every `LegInfo` (populated from provider-leg construction sites). When `route_passthrough_message` rewrites SDP (`c=` line) or emits a `Record-Route`, it picks `advertise_ip` based on the destination leg's kind:
|
||||
|
||||
- `SipProvider` → `other.public_ip.unwrap_or(lan_ip)` (provider reaches us via public IP)
|
||||
- `SipDevice` / `WebRtc` / `Tool` / `Media` → `lan_ip` (everything else is LAN or proxy-internal)
|
||||
|
||||
This fixed a real NAT-traversal bug where the proxy advertised its RFC1918 LAN IP to the provider in SDP, causing one-way or no audio for device-originated inbound traffic behind NAT.
|
||||
|
||||
## Build & development
|
||||
|
||||
- **Build:** `pnpm run buildRust` (never `cargo build` directly — tsrust cross-compiles for both `x86_64-unknown-linux-gnu` and `aarch64-unknown-linux-gnu`)
|
||||
- **Cross-compile setup:** the aarch64 target requires `gcc-aarch64-linux-gnu` + `libstdc++6-arm64-cross` (Debian/Ubuntu). See `rust/.cargo/config.toml` for the linker wiring. A committed symlink at `rust/.cargo/crosslibs/aarch64/libstdc++.so` → `/usr/aarch64-linux-gnu/lib/libstdc++.so.6` avoids needing the `libstdc++-13-dev-arm64-cross` package.
|
||||
- **Bundle web UI:** `pnpm run bundle` (esbuild, output: `dist_ts_web/bundle.js`)
|
||||
- **Full build:** `pnpm run build` (= `buildRust && bundle`)
|
||||
- **Start server:** `pnpm run start` (runs `tsx ts/sipproxy.ts`)
|
||||
|
||||
## Persistent files
|
||||
|
||||
- `.nogit/config.json` — app config (providers, devices, routes, voiceboxes, IVR menus)
|
||||
- `.nogit/voicemail/{boxId}/` — voicemail WAV files + `messages.json` index
|
||||
- `.nogit/prompts/` — cached TTS prompts for IVR menus
|
||||
|
||||
74
Dockerfile
Normal file
74
Dockerfile
Normal file
@@ -0,0 +1,74 @@
|
||||
# gitzone dockerfile_service
|
||||
## STAGE 1 // BUILD
|
||||
FROM code.foss.global/host.today/ht-docker-node:lts AS build
|
||||
|
||||
# System build tools that the Rust dep tree needs beyond the base image:
|
||||
# - cmake : used by the `cmake` crate (transitive via ort_sys / a webrtc
|
||||
# sub-crate) to build a C/C++ library from source when a
|
||||
# prebuilt-binary download path doesn't apply.
|
||||
# - pkg-config : used by audiopus_sys and other *-sys crates to locate libs
|
||||
# on the native target (safe no-op if they vendor their own).
|
||||
# These are normally pre-installed on dev machines but not in ht-docker-node:lts.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cmake \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# buildx sets TARGETARCH automatically for each platform it's building:
|
||||
# linux/amd64 -> TARGETARCH=amd64
|
||||
# linux/arm64 -> TARGETARCH=arm64
|
||||
# We use it to tell tsrust to build ONLY the current container's arch. This
|
||||
# overrides the `@git.zone/tsrust.targets` list in .smartconfig.json, which is
|
||||
# right for local dev / CI (where you want both binaries) but wrong for per-
|
||||
# platform Docker stages (each stage would then also try to cross-compile to
|
||||
# the OTHER arch — which fails in the arm64 stage because no reverse cross-
|
||||
# toolchain is installed).
|
||||
#
|
||||
# With --target set, tsrust builds a single target natively within whichever
|
||||
# platform this stage is running under (native on amd64, QEMU-emulated on arm64).
|
||||
ARG TARGETARCH
|
||||
|
||||
COPY ./ /app
|
||||
WORKDIR /app
|
||||
RUN pnpm config set store-dir .pnpm-store
|
||||
RUN rm -rf node_modules && pnpm install
|
||||
|
||||
# tsrust --target takes precedence over .smartconfig.json's targets array.
|
||||
# Writes dist_rust/proxy-engine_linux_amd64 or dist_rust/proxy-engine_linux_arm64.
|
||||
# The TS layer (ts/proxybridge.ts buildLocalPaths) picks the right one at runtime
|
||||
# via process.arch.
|
||||
RUN pnpm exec tsrust --target linux_${TARGETARCH}
|
||||
|
||||
# Web bundle (esbuild — pure JS, uses the platform's native esbuild binary
|
||||
# installed by pnpm above, so no cross-bundling concerns).
|
||||
RUN pnpm run bundle
|
||||
|
||||
# Drop pnpm store to keep the image smaller. node_modules stays because the
|
||||
# runtime entrypoint is tsx and siprouter has no separate dist_ts/ to run from.
|
||||
RUN rm -rf .pnpm-store
|
||||
|
||||
## STAGE 2 // PRODUCTION
|
||||
FROM code.foss.global/host.today/ht-docker-node:alpine-node AS production
|
||||
|
||||
# gcompat + libstdc++ let the glibc-linked proxy-engine binary run on Alpine.
|
||||
RUN apk add --no-cache gcompat libstdc++
|
||||
|
||||
WORKDIR /app
|
||||
COPY --from=build /app /app
|
||||
|
||||
ENV SIPROUTER_MODE=OCI_CONTAINER
|
||||
ENV NODE_ENV=production
|
||||
|
||||
LABEL org.opencontainers.image.title="siprouter" \
|
||||
org.opencontainers.image.description="SIP proxy with Rust data plane and WebRTC bridge" \
|
||||
org.opencontainers.image.source="https://code.foss.global/serve.zone/siprouter"
|
||||
|
||||
# 5070 SIP signaling (UDP+TCP)
|
||||
# 5061 SIP-TLS (optional, UDP+TCP)
|
||||
# 3060 Web UI / WebSocket (HTTP or HTTPS, auto-detected from .nogit/cert.pem)
|
||||
# 20000-20200/udp RTP media range (must match config.proxy.rtpPortRange)
|
||||
EXPOSE 5070/udp 5070/tcp 5061/udp 5061/tcp 3060/tcp 20000-20200/udp
|
||||
|
||||
# exec replaces sh as PID 1 with tsx, so SIGINT/SIGTERM reach Node and
|
||||
# ts/sipproxy.ts' shutdown handler (which calls shutdownProxyEngine) runs cleanly.
|
||||
CMD ["sh", "-c", "exec ./node_modules/.bin/tsx ts/sipproxy.ts"]
|
||||
200
changelog.md
200
changelog.md
@@ -1,5 +1,205 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-04-14 - 1.25.2 - fix(proxy-engine)
|
||||
improve inbound SIP routing diagnostics and enrich leg media state reporting
|
||||
|
||||
- Extract inbound called numbers from DID-related SIP headers when the request URI contains a provider account username.
|
||||
- Emit detailed sip_unhandled diagnostics for inbound route misses, missing devices, and RTP allocation failures.
|
||||
- Include codec, RTP port, remote media, and metadata in leg state change events and preserve those fields in runtime status/history views.
|
||||
- Match hostname-based providers against resolved inbound source IPs to accept provider traffic sent from resolved addresses.
|
||||
- Invalidate cached TTS WAV metadata across engine restarts and vendor the kokoro-tts crate via a local patch.
|
||||
|
||||
## 2026-04-14 - 1.25.1 - fix(proxy-engine)
|
||||
respect explicit inbound route targets and store voicemail in the configured mailbox
|
||||
|
||||
- Prevent inbound routes with an explicit empty target list from ringing arbitrary registered devices by distinguishing omitted targets from empty targets.
|
||||
- Route unrouted or no-target inbound calls to voicemail with a generated unrouted greeting instead of falling back to random devices.
|
||||
- Pass voicemail box identifiers through proxy events and runtime handling so recordings are saved and indexed under the correct mailbox instead of always using default.
|
||||
|
||||
## 2026-04-14 - 1.25.0 - feat(proxy-engine)
|
||||
add live TTS streaming interactions and incoming number range support
|
||||
|
||||
- add a new start_tts_interaction command and bridge API to begin IVR or leg interactions before full TTS rendering completes
|
||||
- stream synthesized TTS chunks into the mixer with cancellation handling so prompts can stop cleanly on digit match, leg removal, or shutdown
|
||||
- extract PCM-to-mixer frame conversion for reusable live prompt processing
|
||||
- extend routing pattern matching to support numeric number ranges like start..end, including + prefixed values
|
||||
- add incomingNumbers config typing and frontend config update support for single, range, and regex number modes
|
||||
|
||||
## 2026-04-14 - 1.24.0 - feat(routing)
|
||||
require explicit inbound DID routes and normalize SIP identities for provider-based number matching
|
||||
|
||||
- Inbound route resolution now returns no match unless a configured inbound route explicitly matches the provider and called number.
|
||||
- Normalized routing identities were added for SIP/TEL URIs so inbound DIDs and outbound dialed numbers match consistently across provider-specific formats.
|
||||
- Call handling and incoming call events now use normalized numbers, improving routing accuracy for shared trunk providers.
|
||||
- Route configuration docs and the web route editor were updated to support explicit inbound DID ownership, voicemail fallback, and IVR selection.
|
||||
- Mixer RTP handling was enhanced to better support variable packet durations, timestamp-based gap fill, and non-blocking output drop reporting.
|
||||
|
||||
## 2026-04-14 - 1.23.0 - feat(runtime)
|
||||
refactor runtime state and proxy event handling for typed WebRTC linking and shared status models
|
||||
|
||||
- extract proxy event handling into dedicated runtime modules for status tracking and WebRTC session-to-call linking
|
||||
- introduce shared typed proxy event and status interfaces used by both backend and web UI
|
||||
- update web UI server initialization to use structured options and await async config save hooks
|
||||
- simplify browser signaling by routing WebRTC offer/ICE handling through frontend-to-Rust integration
|
||||
- align device status rendering with the new address/port fields in dashboard views
|
||||
|
||||
## 2026-04-12 - 1.22.0 - feat(proxy-engine)
|
||||
add on-demand TTS caching for voicemail and IVR prompts
|
||||
|
||||
- Route inbound calls directly to configured IVR menus and track them with a dedicated IVR call state
|
||||
- Generate voicemail greetings and IVR menu prompts inside the Rust proxy engine on demand instead of precomputing prompts in TypeScript
|
||||
- Add cacheable TTS output with sidecar metadata and enable Kokoro CMUdict support for improved prompt generation
|
||||
- Extend proxy configuration to include voiceboxes and IVR menus, and update documentation to reflect Kokoro-only prompt generation
|
||||
|
||||
## 2026-04-11 - 1.21.0 - feat(providers)
|
||||
replace provider creation modal with a guided multi-step setup flow
|
||||
|
||||
- Adds a stepper-based provider creation flow with provider type selection, connection, credentials, advanced settings, and review steps.
|
||||
- Applies built-in templates for Sipgate and O2/Alice from the selected provider type instead of separate add actions.
|
||||
- Adds a final review step with generated provider ID preview and duplicate ID collision handling before saving.
|
||||
|
||||
## 2026-04-11 - 1.20.5 - fix(readme)
|
||||
improve architecture and call flow documentation with Mermaid diagrams
|
||||
|
||||
- Replace ASCII architecture and audio pipeline diagrams with Mermaid diagrams for better readability
|
||||
- Document the WebRTC browser call setup sequence, including offer handling and session-to-call linking
|
||||
|
||||
## 2026-04-11 - 1.20.4 - fix(deps)
|
||||
bump @design.estate/dees-catalog to ^3.71.1
|
||||
|
||||
- Updates the @design.estate/dees-catalog dependency from ^3.70.0 to ^3.71.1 in package.json.
|
||||
|
||||
## 2026-04-11 - 1.20.3 - fix(ts-config,proxybridge,voicebox)
|
||||
align voicebox config types and add missing proxy bridge command definitions
|
||||
|
||||
- Reuses the canonical IVoiceboxConfig type from voicebox.ts in config.ts to eliminate duplicated type definitions and optionality mismatches.
|
||||
- Makes voicemail timing and limits optional in voicebox config so defaults can be applied consistently during initialization.
|
||||
- Adds VoiceboxManager.addMessage and updates recording handling to use it directly for persisted voicemail metadata.
|
||||
- Extends proxy bridge command typings with add_leg, remove_leg, and WebRTC signaling commands, and tightens sendCommand typing.
|
||||
|
||||
## 2026-04-11 - 1.20.2 - fix(proxy-engine)
|
||||
fix inbound route browser ringing and provider-facing SDP advertisement while preventing RTP port exhaustion
|
||||
|
||||
- Honor inbound routing `ringBrowsers` when emitting incoming call events so browser toast notifications can be suppressed per route.
|
||||
- Rewrite SDP and Record-Route using the destination leg's routable address, using `public_ip` for provider legs and LAN IP for device and internal legs.
|
||||
- Store provider leg public IP metadata on legs to support correct per-destination SIP message rewriting.
|
||||
- Change the RTP port pool to track sockets with `Weak<UdpSocket>` so ports are reclaimed automatically after calls end, avoiding leaked allocations and eventual 503 failures on new calls.
|
||||
- Remove unused dashboard/status, DTMF, relay, and transport helper code paths as part of engine cleanup.
|
||||
|
||||
## 2026-04-11 - 1.20.1 - fix(docker)
|
||||
install required native build tools for Rust dependencies in the build image
|
||||
|
||||
- Add cmake and pkg-config to the Docker build stage so Rust native dependencies can compile successfully in the container
|
||||
- Document why these tools are needed for transitive Rust crates that build or detect native libraries
|
||||
|
||||
## 2026-04-11 - 1.20.0 - feat(docker)
|
||||
add multi-arch Docker build and tagged release pipeline
|
||||
|
||||
- Add a production Dockerfile for building and running the SIP router with the Rust proxy engine and web bundle
|
||||
- Configure tsdocker and tsrust for linux/amd64 and linux/arm64 image builds and registry mapping
|
||||
- Add a tag-triggered Gitea workflow to build and push Docker images
|
||||
- Update runtime binary resolution to load architecture-specific Rust artifacts in Docker and CI environments
|
||||
- Add Docker-related package scripts, dependency updates, and ignore rules for container builds
|
||||
|
||||
## 2026-04-11 - 1.19.2 - fix(web-ui)
|
||||
normalize lucide icon names across SIP proxy views
|
||||
|
||||
- Updates icon identifiers to the expected PascalCase lucide format in app navigation, calls, IVR, overview, providers, and voicemail views.
|
||||
- Fixes UI icon rendering for stats cards and action menus such as transfer, delete, status, and call direction indicators.
|
||||
|
||||
## 2026-04-10 - 1.19.1 - fix(readme)
|
||||
refresh documentation for jitter buffering, voicemail, and WebSocket signaling details
|
||||
|
||||
- Add adaptive jitter buffer and packet loss concealment details to the audio pipeline documentation
|
||||
- Document voicemail unheard count and heard-state API endpoints
|
||||
- Update WebSocket event and browser signaling examples to reflect current message types
|
||||
|
||||
## 2026-04-10 - 1.19.0 - feat(proxy-engine,codec-lib)
|
||||
add adaptive RTP jitter buffering with Opus packet loss concealment and stable 20ms resampling
|
||||
|
||||
- introduces a per-leg adaptive jitter buffer in the mixer to reorder RTP packets, gate initial playout, and deliver one frame per 20ms tick
|
||||
- adds Opus PLC support to synthesize missing audio frames when packets are lost, with fade-based fallback handling for non-Opus codecs
|
||||
- updates i16 and f32 resamplers to use canonical 20ms chunks so cached resamplers preserve filter state and avoid variable-size cache thrashing
|
||||
|
||||
## 2026-04-10 - 1.18.0 - feat(readme)
|
||||
expand documentation for voicemail, IVR, audio engine, and API capabilities
|
||||
|
||||
- Updates the feature overview to document voicemail, IVR menus, call recording, enhanced TTS, and the 48kHz float audio engine
|
||||
- Refreshes the architecture section to describe the TypeScript control plane, Rust proxy-engine data plane, and JSON-over-stdio IPC
|
||||
- Clarifies REST API and WebSocket coverage with voicemail endpoints, incoming call events, and refined endpoint descriptions
|
||||
|
||||
## 2026-04-10 - 1.17.2 - fix(proxy-engine)
|
||||
use negotiated SDP payload types when wiring SIP legs and enable default nnnoiseless features for telephony denoising
|
||||
|
||||
- Select the negotiated codec payload type from SDP answers instead of always using the first offered codec
|
||||
- Preserve the device leg's preferred payload type from its own INVITE SDP when attaching it to the mixer
|
||||
- Enable default nnnoiseless features in codec-lib and proxy-engine dependencies
|
||||
|
||||
## 2026-04-10 - 1.17.1 - fix(proxy-engine,codec-lib,sip-proto,ts)
|
||||
preserve negotiated media details and improve RTP audio handling across call legs
|
||||
|
||||
- Use native Opus float encode/decode to avoid unnecessary i16 quantization in the f32 audio path.
|
||||
- Parse full RTP headers including extensions and sequence numbers, then sort inbound packets before decoding to keep codec state stable for out-of-order audio.
|
||||
- Capture negotiated codec payload types from SDP offers and answers and include codec, RTP port, remote media, and metadata in leg_added events.
|
||||
- Emit leg_state_changed and leg_removed events more consistently so the dashboard reflects leg lifecycle updates accurately.
|
||||
|
||||
## 2026-04-10 - 1.17.0 - feat(proxy-engine)
|
||||
upgrade the internal audio bus to 48kHz f32 with per-leg denoising and improve SIP leg routing
|
||||
|
||||
- switch mixer, prompt playback, and tool leg audio handling from 16kHz i16 to 48kHz f32 for higher-quality internal processing
|
||||
- add f32 decode/encode and resampling support plus standalone RNNoise denoiser creation in codec-lib
|
||||
- apply per-leg inbound noise suppression in the mixer before mix-minus generation
|
||||
- fix passthrough call routing by matching the actual leg from the signaling source address when Call-IDs are shared
|
||||
- correct dialed number extraction from bare SIP request URIs by parsing the user part directly
|
||||
|
||||
## 2026-04-10 - 1.16.0 - feat(proxy-engine)
|
||||
integrate Kokoro TTS generation into proxy-engine and simplify TypeScript prompt handling to use cached WAV files
|
||||
|
||||
- adds a generate_tts command to proxy-engine with lazy-loaded Kokoro model support and WAV output generation
|
||||
- removes standalone opus-codec and tts-engine workspace binaries by consolidating TTS generation into proxy-engine
|
||||
- updates announcement and prompt cache flows to generate and cache WAV files on disk instead of pre-encoding RTP frames in TypeScript
|
||||
|
||||
## 2026-04-10 - 1.15.0 - feat(proxy-engine)
|
||||
add device leg, leg transfer, and leg replacement call controls
|
||||
|
||||
- adds proxy-engine commands and call manager support for inviting a registered SIP device into an active call
|
||||
- supports transferring an existing leg between calls while preserving the active connection and updating mixer routing
|
||||
- supports replacing a call leg by removing the current leg and dialing a new outbound destination
|
||||
- wires the frontend add-leg API and TypeScript bridge to the new device leg and leg control commands
|
||||
|
||||
## 2026-04-10 - 1.14.0 - feat(proxy-engine)
|
||||
add multiparty call mixing with dynamic SIP and WebRTC leg management
|
||||
|
||||
- replace passthrough call handling with a mixer-backed call model that tracks multiple legs and exposes leg status in call state output
|
||||
- add mixer and leg I/O infrastructure to bridge SIP RTP and WebRTC audio through channel-based mix-minus processing
|
||||
- introduce add_leg and remove_leg proxy commands and wire frontend bridge APIs to manage external call legs
|
||||
- emit leg lifecycle events for observability and mark unimplemented device-leg and transfer HTTP endpoints with 501 responses
|
||||
|
||||
## 2026-04-10 - 1.13.0 - feat(proxy-engine,webrtc)
|
||||
add B2BUA SIP leg handling and WebRTC call bridging for outbound calls
|
||||
|
||||
- introduce a new SipLeg module to manage outbound provider dialogs, including INVITE lifecycle, digest auth retries, ACK handling, media endpoint tracking, and termination
|
||||
- store outbound dashboard calls as B2BUA calls in the call manager and emit provider media details on call_answered for bridge setup
|
||||
- separate SIP and WebRTC engine locking to avoid contention and deadlocks while linking sessions to call RTP sockets
|
||||
- add bidirectional RTP bridging between provider SIP media and browser WebRTC audio using the allocated RTP socket
|
||||
- wire browser webrtc-accept events in the frontend and sipproxy so session-to-call linking can occur when media and acceptance arrive in either order
|
||||
|
||||
## 2026-04-10 - 1.12.0 - feat(proxy-engine)
|
||||
add Rust-based outbound calling, WebRTC bridging, and voicemail handling
|
||||
|
||||
- adds outbound call origination through the Rust proxy engine with dashboard make_call support
|
||||
- routes unanswered inbound calls to voicemail, including greeting playback, beep generation, and WAV message recording
|
||||
- introduces Rust WebRTC session handling and SIP audio bridging, replacing the previous TypeScript WebRTC path
|
||||
- moves SIP registration and routing responsibilities further into the Rust proxy engine and removes legacy TypeScript call/SIP modules
|
||||
|
||||
## 2026-04-10 - 1.11.0 - feat(rust-proxy-engine)
|
||||
add a Rust SIP proxy engine with shared SIP and codec libraries
|
||||
|
||||
- add new Rust workspace crates for proxy-engine, sip-proto, and codec-lib
|
||||
- move transcoding logic out of opus-codec into reusable codec-lib and keep opus-codec as a thin CLI wrapper
|
||||
- implement SIP message parsing, dialog handling, SDP/URI rewriting, provider registration, device registration, call management, RTP relay, and DTMF detection in Rust
|
||||
- add a TypeScript proxy bridge and update the SIP proxy entrypoint to spawn and configure the Rust engine as the SIP data plane
|
||||
|
||||
## 2026-04-10 - 1.10.0 - feat(call, voicemail, ivr)
|
||||
add voicemail and IVR call flows with DTMF handling, prompt playback, recording, and dashboard management
|
||||
|
||||
|
||||
BIN
nogit/voicemail/default/msg-1775825168199.wav
Normal file
BIN
nogit/voicemail/default/msg-1775825168199.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840000387.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840000387.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840014276.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840014276.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840439400.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840439400.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840447441.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840447441.wav
Normal file
Binary file not shown.
BIN
nogit/voicemail/default/msg-1775840454835.wav
Normal file
BIN
nogit/voicemail/default/msg-1775840454835.wav
Normal file
Binary file not shown.
10
package.json
10
package.json
@@ -1,24 +1,28 @@
|
||||
{
|
||||
"name": "siprouter",
|
||||
"version": "1.10.0",
|
||||
"version": "1.25.2",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"bundle": "node node_modules/.pnpm/esbuild@0.27.7/node_modules/esbuild/bin/esbuild ts_web/index.ts --bundle --format=esm --outfile=dist_ts_web/bundle.js --platform=browser --target=es2022 --minify",
|
||||
"buildRust": "tsrust",
|
||||
"build": "pnpm run buildRust && pnpm run bundle",
|
||||
"build:docker": "tsdocker build --verbose",
|
||||
"release:docker": "tsdocker push --verbose",
|
||||
"start": "tsx ts/sipproxy.ts",
|
||||
"restartBackground": "pnpm run buildRust && pnpm run bundle; test -f .server.pid && kill $(cat .server.pid) 2>/dev/null; sleep 1; rm -f sip_trace.log proxy.out && nohup tsx ts/sipproxy.ts > proxy.out 2>&1 & echo $! > .server.pid; sleep 2; cat proxy.out"
|
||||
},
|
||||
"dependencies": {
|
||||
"@design.estate/dees-catalog": "^3.70.0",
|
||||
"@design.estate/dees-catalog": "^3.77.0",
|
||||
"@design.estate/dees-element": "^2.2.4",
|
||||
"@push.rocks/smartrust": "^1.3.2",
|
||||
"@push.rocks/smartstate": "^2.3.0",
|
||||
"werift": "^0.22.9",
|
||||
"tsx": "^4.21.0",
|
||||
"ws": "^8.20.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@git.zone/tsbundle": "^2.10.0",
|
||||
"@git.zone/tsdocker": "^2.2.4",
|
||||
"@git.zone/tsrust": "^1.3.2",
|
||||
"@git.zone/tswatch": "^3.3.2",
|
||||
"@types/ws": "^8.18.1"
|
||||
|
||||
686
pnpm-lock.yaml
generated
686
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
388
readme.md
388
readme.md
@@ -1,6 +1,6 @@
|
||||
# @serve.zone/siprouter
|
||||
|
||||
A production-grade **SIP B2BUA + WebRTC bridge** built with TypeScript and Rust. Routes calls between SIP providers, SIP hardware devices, and browser softphones — with real-time codec transcoding, ML noise suppression, neural TTS announcements, and a slick web dashboard.
|
||||
A production-grade **SIP B2BUA + WebRTC bridge** built with TypeScript and Rust. Routes calls between SIP providers, SIP hardware devices, and browser softphones — with real-time codec transcoding, adaptive jitter buffering, ML noise suppression, neural TTS, voicemail, IVR menus, and a slick web dashboard.
|
||||
|
||||
## Issue Reporting and Security
|
||||
|
||||
@@ -12,55 +12,82 @@ For reporting bugs, issues, or security vulnerabilities, please visit [community
|
||||
|
||||
siprouter sits between your SIP trunk providers and your endpoints — hardware phones, ATAs, browser softphones — and handles **everything** in between:
|
||||
|
||||
- 📞 **SIP B2BUA** — Terminates and re-originates calls with full RFC 3261 dialog state management
|
||||
- 🌐 **WebRTC Bridge** — Browser-based softphone with bidirectional audio to the SIP network
|
||||
- 🎛️ **Multi-Provider Trunking** — Register with multiple SIP providers simultaneously (sipgate, easybell, o2, etc.)
|
||||
- 🔊 **Rust Codec Engine** — Real-time Opus ↔ G.722 ↔ PCMU ↔ PCMA transcoding in native Rust
|
||||
- 🤖 **ML Noise Suppression** — RNNoise denoiser with per-direction state (to SIP / to browser)
|
||||
- 🗣️ **Neural TTS** — Kokoro-powered "connecting your call" announcements, pre-encoded for instant playback
|
||||
- 🔀 **Hub Model Calls** — N-leg calls with dynamic add/remove, transfer, and RTP fan-out
|
||||
- 🖥️ **Web Dashboard** — Real-time SPA with live call monitoring, browser phone, contact management, provider config
|
||||
- 📞 **SIP B2BUA** — Terminates and re-originates calls with full RFC 3261 dialog state management, digest auth, and SDP negotiation
|
||||
- 🌐 **WebRTC Bridge** — Browser-based softphone with bidirectional Opus audio to the SIP network
|
||||
- 🎛️ **Multi-Provider Trunking** — Register with multiple SIP providers simultaneously (sipgate, easybell, etc.) with automatic failover
|
||||
- 🎧 **48kHz f32 Audio Engine** — High-fidelity internal audio bus at 48kHz/32-bit float with native Opus float encode/decode, FFT-based resampling, and per-leg ML noise suppression
|
||||
- 🔀 **N-Leg Mix-Minus Mixer** — Conference-grade mixing with dynamic leg add/remove, transfer, and per-source audio separation
|
||||
- 🎯 **Adaptive Jitter Buffer** — Per-leg jitter buffering with sequence-based reordering, adaptive depth (60–120ms), Opus PLC for lost packets, and hold/resume detection
|
||||
- 📧 **Voicemail** — Configurable voicemail boxes with TTS greetings, recording, and web playback
|
||||
- 🔢 **IVR Menus** — DTMF-navigable interactive voice response with nested menus, routing actions, and custom prompts
|
||||
- 🗣️ **Neural TTS** — Kokoro-powered greetings and IVR prompts with 25+ voice presets
|
||||
- 🎙️ **Call Recording** — Per-source separated WAV recording at 48kHz via tool legs
|
||||
- 🖥️ **Web Dashboard** — Real-time SPA with 9 views: live calls, browser phone, routing, voicemail, IVR, contacts, providers, and streaming logs
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────┐
|
||||
│ Browser Softphone │
|
||||
│ (WebRTC via WebSocket signaling) │
|
||||
└──────────────┬──────────────────────┘
|
||||
│ Opus/WebRTC
|
||||
▼
|
||||
┌──────────────────────────────────────┐
|
||||
│ siprouter │
|
||||
│ │
|
||||
│ ┌──────────┐ ┌──────────────────┐ │
|
||||
│ │ Call Hub │ │ Rust Transcoder │ │
|
||||
│ │ N legs │──│ Opus/G.722/PCM │ │
|
||||
│ │ fan-out │ │ + RNNoise │ │
|
||||
│ └────┬─────┘ └──────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────┴─────┐ ┌──────────────────┐ │
|
||||
│ │ SIP Stack│ │ Kokoro TTS │ │
|
||||
│ │ Dialog SM│ │ (ONNX Runtime) │ │
|
||||
│ └────┬─────┘ └──────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────┴──────────────────────────┐ │
|
||||
│ │ Local Registrar + Provider │ │
|
||||
│ │ Registration Engine │ │
|
||||
│ └───────────────────────────────┘ │
|
||||
└──────────┬──────────────┬────────────┘
|
||||
│ │
|
||||
┌──────┴──────┐ ┌─────┴──────┐
|
||||
│ SIP Devices │ │ SIP Trunk │
|
||||
│ (HT801, etc)│ │ Providers │
|
||||
└─────────────┘ └────────────┘
|
||||
```mermaid
|
||||
flowchart TB
|
||||
Browser["🌐 Browser Softphone<br/>(WebRTC via WebSocket signaling)"]
|
||||
Devices["📞 SIP Devices<br/>(HT801, desk phones, ATAs)"]
|
||||
Trunks["☎️ SIP Trunk Providers<br/>(sipgate, easybell, …)"]
|
||||
|
||||
subgraph Router["siprouter"]
|
||||
direction TB
|
||||
subgraph TS["TypeScript Control Plane"]
|
||||
TSBits["Config · WebRTC Signaling<br/>REST API · Web Dashboard<br/>Voicebox Manager · TTS Cache"]
|
||||
end
|
||||
subgraph Rust["Rust proxy-engine (data plane)"]
|
||||
RustBits["SIP Stack · Dialog SM · Auth<br/>Call Manager · N-Leg Mixer<br/>48kHz f32 Bus · Jitter Buffer<br/>Codec Engine · RTP Port Pool<br/>WebRTC Engine · Kokoro TTS<br/>Voicemail · IVR · Recording"]
|
||||
end
|
||||
TS <-->|"JSON-over-stdio IPC"| Rust
|
||||
end
|
||||
|
||||
Browser <-->|"Opus / WebRTC"| TS
|
||||
Rust <-->|"SIP / RTP"| Devices
|
||||
Rust <-->|"SIP / RTP"| Trunks
|
||||
```
|
||||
|
||||
### The Hub Model
|
||||
### 🧠 Key Design Decisions
|
||||
|
||||
Every call is a **hub** with N legs. Each leg is either a `SipLeg` (hardware device or provider) or a `WebRtcLeg` (browser). RTP flows through the hub — each leg's received audio is forwarded to all other legs, with codec transcoding handled transparently by the Rust engine.
|
||||
- **Hub Model** — Every call is a hub with N legs. Each leg is a `SipLeg` (device/provider) or `WebRtcLeg` (browser). Legs can be dynamically added, removed, or transferred without tearing down the call.
|
||||
- **Rust Data Plane** — All SIP protocol handling, codec transcoding, mixing, and RTP I/O runs in native Rust for real-time performance. TypeScript handles config, signaling, REST API, and dashboard.
|
||||
- **48kHz f32 Internal Bus** — Audio is processed at maximum quality internally. Encoding/decoding to wire format (G.722, PCMU, Opus) happens solely at the leg boundary.
|
||||
- **Per-Session Codec Isolation** — Each call leg gets its own encoder/decoder/resampler/denoiser state — no cross-call corruption.
|
||||
- **SDP Codec Negotiation** — Outbound encoding uses the codec actually negotiated in SDP answers, not just the first offered codec.
|
||||
|
||||
### 📲 WebRTC Browser Call Flow
|
||||
|
||||
Browser calls are set up in a strict three-step dance — the WebRTC leg cannot be attached at call-creation time because the browser's session ID is only known once the SDP offer arrives:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant B as Browser
|
||||
participant TS as TypeScript (sipproxy.ts)
|
||||
participant R as Rust proxy-engine
|
||||
participant P as SIP Provider
|
||||
|
||||
B->>TS: POST /api/call
|
||||
TS->>R: make_call (pending call, no WebRTC leg yet)
|
||||
R-->>TS: call_created
|
||||
TS-->>B: webrtc-incoming (callId)
|
||||
|
||||
B->>TS: webrtc-offer (sessionId, SDP)
|
||||
TS->>R: handle_webrtc_offer
|
||||
R-->>TS: webrtc-answer (SDP)
|
||||
TS-->>B: webrtc-answer
|
||||
Note over R: Standalone WebRTC session<br/>(not yet attached to call)
|
||||
|
||||
B->>TS: webrtc_link (callId + sessionId)
|
||||
TS->>R: link session → call
|
||||
R->>R: wire WebRTC leg through mixer
|
||||
R->>P: SIP INVITE
|
||||
P-->>R: 200 OK + SDP
|
||||
R-->>TS: call_answered
|
||||
Note over B,P: Bidirectional Opus ↔ codec-transcoded<br/>audio flows through the mixer
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -70,15 +97,15 @@ Every call is a **hub** with N legs. Each leg is either a `SipLeg` (hardware dev
|
||||
|
||||
- **Node.js** ≥ 20 with `tsx` globally available
|
||||
- **pnpm** for package management
|
||||
- **Rust** toolchain (for building the codec engine and TTS)
|
||||
- **Rust** toolchain (for building the proxy engine)
|
||||
|
||||
### Install & Build
|
||||
|
||||
```bash
|
||||
# Clone and install
|
||||
# Clone and install dependencies
|
||||
pnpm install
|
||||
|
||||
# Build the Rust binaries (opus-codec + tts-engine)
|
||||
# Build the Rust proxy-engine binary
|
||||
pnpm run buildRust
|
||||
|
||||
# Bundle the web frontend
|
||||
@@ -87,57 +114,111 @@ pnpm run bundle
|
||||
|
||||
### Configuration
|
||||
|
||||
Create `.nogit/config.json` with your setup:
|
||||
Create `.nogit/config.json`:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"proxy": {
|
||||
"lanIp": "192.168.1.100", // Your server's LAN IP
|
||||
"lanPort": 5070, // SIP signaling port
|
||||
"rtpPortRange": [20000, 20200],// RTP relay port pool (even ports)
|
||||
"webUiPort": 3060 // Dashboard port
|
||||
"lanIp": "192.168.1.100", // Your server's LAN IP
|
||||
"lanPort": 5070, // SIP signaling port
|
||||
"publicIpSeed": "stun.example.com", // STUN server for public IP discovery
|
||||
"rtpPortRange": { "min": 20000, "max": 20200 }, // RTP port pool (even ports)
|
||||
"webUiPort": 3060 // Dashboard + REST API port
|
||||
},
|
||||
"providers": [
|
||||
{
|
||||
"id": "my-trunk",
|
||||
"name": "My SIP Provider",
|
||||
"host": "sip.provider.com",
|
||||
"port": 5060,
|
||||
"displayName": "My SIP Provider",
|
||||
"domain": "sip.provider.com",
|
||||
"outboundProxy": { "address": "sip.provider.com", "port": 5060 },
|
||||
"username": "user",
|
||||
"password": "pass",
|
||||
"codecs": ["G.722", "PCMA", "PCMU"],
|
||||
"registerExpiry": 3600
|
||||
"codecs": [9, 0, 8, 101], // G.722, PCMU, PCMA, telephone-event
|
||||
"registerIntervalSec": 300
|
||||
}
|
||||
],
|
||||
"devices": [
|
||||
{
|
||||
"id": "desk-phone",
|
||||
"name": "Desk Phone",
|
||||
"type": "sip"
|
||||
"displayName": "Desk Phone",
|
||||
"expectedAddress": "192.168.1.50",
|
||||
"extension": "100"
|
||||
}
|
||||
],
|
||||
"routing": {
|
||||
"inbound": {
|
||||
"default": { "target": "all-devices", "ringBrowser": true }
|
||||
"routes": [
|
||||
{
|
||||
"id": "inbound-main-did",
|
||||
"name": "Main DID",
|
||||
"priority": 200,
|
||||
"enabled": true,
|
||||
"match": {
|
||||
"direction": "inbound",
|
||||
"sourceProvider": "my-trunk",
|
||||
"numberPattern": "+49421219694"
|
||||
},
|
||||
"action": {
|
||||
"targets": ["desk-phone"],
|
||||
"ringBrowsers": true,
|
||||
"voicemailBox": "main"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "inbound-support-did",
|
||||
"name": "Support DID",
|
||||
"priority": 190,
|
||||
"enabled": true,
|
||||
"match": {
|
||||
"direction": "inbound",
|
||||
"sourceProvider": "my-trunk",
|
||||
"numberPattern": "+49421219695"
|
||||
},
|
||||
"action": {
|
||||
"ivrMenuId": "support-menu"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "outbound-default",
|
||||
"name": "Route via trunk",
|
||||
"priority": 100,
|
||||
"enabled": true,
|
||||
"match": { "direction": "outbound" },
|
||||
"action": { "provider": "my-trunk" }
|
||||
}
|
||||
]
|
||||
},
|
||||
"voiceboxes": [
|
||||
{
|
||||
"id": "main",
|
||||
"enabled": true,
|
||||
"greetingText": "Please leave a message after the beep.",
|
||||
"greetingVoice": "af_bella",
|
||||
"noAnswerTimeoutSec": 25,
|
||||
"maxRecordingSec": 120,
|
||||
"maxMessages": 50
|
||||
}
|
||||
}
|
||||
],
|
||||
"contacts": [
|
||||
{ "id": "1", "name": "Alice", "number": "+491234567890", "starred": true }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Inbound number ownership is explicit: add one inbound route per DID (or DID prefix) and scope it with `sourceProvider` when a provider delivers multiple external numbers.
|
||||
|
||||
### TTS Setup (Optional)
|
||||
|
||||
For neural "connecting your call" announcements, download the Kokoro TTS model:
|
||||
For neural voicemail greetings and IVR prompts, download the Kokoro TTS model:
|
||||
|
||||
```bash
|
||||
mkdir -p .nogit/tts
|
||||
# Download the full-quality model (310MB) + voices (27MB)
|
||||
curl -L -o .nogit/tts/kokoro-v1.0.onnx \
|
||||
https://github.com/mzdk100/kokoro/releases/download/V1.0/kokoro-v1.0.onnx
|
||||
curl -L -o .nogit/tts/voices.bin \
|
||||
https://github.com/mzdk100/kokoro/releases/download/V1.0/voices.bin
|
||||
```
|
||||
|
||||
If the model files aren't present, the announcement feature is simply disabled — everything else works fine.
|
||||
Without the model files, TTS prompts (IVR menus, voicemail greetings) are skipped — everything else works fine.
|
||||
|
||||
### Run
|
||||
|
||||
@@ -145,7 +226,7 @@ If the model files aren't present, the announcement feature is simply disabled
|
||||
pnpm start
|
||||
```
|
||||
|
||||
The SIP proxy starts on the configured port and the web dashboard is available at `http://<your-ip>:3060`.
|
||||
The SIP proxy starts on the configured port and the web dashboard is available at `https://<your-ip>:3060`.
|
||||
|
||||
### HTTPS (Optional)
|
||||
|
||||
@@ -157,68 +238,99 @@ Place `cert.pem` and `key.pem` in `.nogit/` for TLS on the dashboard.
|
||||
|
||||
```
|
||||
siprouter/
|
||||
├── ts/ # TypeScript source
|
||||
│ ├── sipproxy.ts # Main entry — bootstraps everything
|
||||
│ ├── config.ts # Config loader & validation
|
||||
│ ├── registrar.ts # Local SIP registrar for devices
|
||||
│ ├── providerstate.ts # Per-provider upstream registration engine
|
||||
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
|
||||
│ ├── webrtcbridge.ts # WebRTC signaling layer
|
||||
│ ├── opusbridge.ts # Rust IPC bridge (smartrust)
|
||||
│ ├── codec.ts # High-level RTP transcoding interface
|
||||
│ ├── announcement.ts # Neural TTS announcement generator
|
||||
│ ├── sip/ # Zero-dependency SIP protocol library
|
||||
│ │ ├── message.ts # SIP message parser/builder/mutator
|
||||
│ │ ├── dialog.ts # RFC 3261 dialog state machine
|
||||
│ │ ├── helpers.ts # SDP builder, digest auth, codec registry
|
||||
│ │ └── rewrite.ts # SIP URI + SDP body rewriting
|
||||
│ └── call/ # Hub-model call management
|
||||
│ ├── call-manager.ts # Central registry, factory, routing
|
||||
│ ├── call.ts # Call hub — owns N legs, media fan-out
|
||||
│ ├── sip-leg.ts # SIP device/provider connection
|
||||
│ ├── webrtc-leg.ts # Browser WebRTC connection
|
||||
│ └── rtp-port-pool.ts # UDP port allocation
|
||||
├── ts_web/ # Web frontend (Lit-based SPA)
|
||||
│ ├── elements/ # Web components (dashboard, phone, etc.)
|
||||
│ └── state/ # App state, WebRTC client, notifications
|
||||
├── rust/ # Rust workspace
|
||||
├── ts/ # TypeScript control plane
|
||||
│ ├── sipproxy.ts # Main entry — bootstraps everything
|
||||
│ ├── config.ts # Config loader & validation
|
||||
│ ├── proxybridge.ts # Rust proxy-engine IPC bridge (smartrust)
|
||||
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
|
||||
│ ├── webrtcbridge.ts # WebRTC signaling layer
|
||||
│ ├── registrar.ts # Browser softphone registration
|
||||
│ ├── voicebox.ts # Voicemail box management
|
||||
│ └── call/
|
||||
│ └── prompt-cache.ts # Named audio prompt WAV management
|
||||
│
|
||||
├── ts_web/ # Web frontend (Lit-based SPA)
|
||||
│ ├── elements/ # Web components (9 dashboard views)
|
||||
│ └── state/ # App state, WebRTC client, notifications
|
||||
│
|
||||
├── rust/ # Rust workspace (the data plane)
|
||||
│ └── crates/
|
||||
│ ├── opus-codec/ # Real-time audio transcoder (Opus/G.722/PCM)
|
||||
│ └── tts-engine/ # Kokoro neural TTS CLI
|
||||
├── html/ # Static HTML shell
|
||||
├── .nogit/ # Secrets, config, models (gitignored)
|
||||
└── dist_rust/ # Compiled Rust binaries (gitignored)
|
||||
│ ├── codec-lib/ # Audio codec library (Opus/G.722/PCMU/PCMA)
|
||||
│ ├── sip-proto/ # Zero-dependency SIP protocol library
|
||||
│ └── proxy-engine/ # Main binary — SIP engine + mixer + RTP
|
||||
│
|
||||
├── html/ # Static HTML shell
|
||||
├── .nogit/ # Secrets, config, TTS models (gitignored)
|
||||
└── dist_rust/ # Compiled Rust binary (gitignored)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎧 Codec Engine (Rust)
|
||||
## 🎧 Audio Engine (Rust)
|
||||
|
||||
The `opus-codec` binary handles all real-time audio processing via a JSON-over-stdio IPC protocol:
|
||||
The `proxy-engine` binary handles all real-time audio processing with a **48kHz f32 internal bus** — encoding and decoding happens only at leg boundaries.
|
||||
|
||||
| Codec | Payload Type | Sample Rate | Use Case |
|
||||
|-------|-------------|-------------|----------|
|
||||
| **Opus** | 111 | 48 kHz | WebRTC browsers |
|
||||
| **G.722** | 9 | 16 kHz | HD SIP devices |
|
||||
### Supported Codecs
|
||||
|
||||
| Codec | PT | Native Rate | Use Case |
|
||||
|-------|:--:|:-----------:|----------|
|
||||
| **Opus** | 111 | 48 kHz | WebRTC browsers (native float encode/decode — zero i16 quantization) |
|
||||
| **G.722** | 9 | 16 kHz | HD SIP devices & providers |
|
||||
| **PCMU** (G.711 µ-law) | 0 | 8 kHz | Legacy SIP |
|
||||
| **PCMA** (G.711 A-law) | 8 | 8 kHz | Legacy SIP |
|
||||
|
||||
**Features:**
|
||||
- Per-call isolated codec sessions (no cross-call state corruption)
|
||||
- FFT-based sample rate conversion via `rubato`
|
||||
- **RNNoise ML noise suppression** with per-direction state — denoises audio flowing to SIP separately from audio flowing to the browser
|
||||
- Raw PCM encoding for TTS frame processing
|
||||
### Audio Pipeline
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Inbound["Inbound path (per leg)"]
|
||||
direction LR
|
||||
IN_RTP["Wire RTP"] --> IN_JB["Jitter Buffer"] --> IN_DEC["Decode"] --> IN_RS["Resample → 48 kHz"] --> IN_DN["Denoise (RNNoise)"] --> IN_BUS["Mix Bus"]
|
||||
end
|
||||
|
||||
subgraph Outbound["Outbound path (per leg)"]
|
||||
direction LR
|
||||
OUT_BUS["Mix Bus"] --> OUT_MM["Mix-Minus"] --> OUT_RS["Resample → codec rate"] --> OUT_ENC["Encode"] --> OUT_RTP["Wire RTP"]
|
||||
end
|
||||
```
|
||||
|
||||
- **Adaptive jitter buffer** — per-leg `BTreeMap`-based buffer keyed by RTP sequence number. Delivers exactly one frame per 20ms mixer tick in sequence order. Adaptive target depth starts at 3 frames (60ms) and adjusts between 2–6 frames based on observed network jitter. Handles hold/resume by detecting large forward sequence jumps and resetting cleanly.
|
||||
- **Packet loss concealment (PLC)** — on missing packets, Opus legs invoke the decoder's built-in PLC (`decode(None)`) to synthesize a smooth fill frame. Non-Opus legs (G.722, PCMU) apply exponential fade (0.85×) toward silence to avoid hard discontinuities.
|
||||
- **FFT-based resampling** via `rubato` — high-quality sinc interpolation with canonical 20ms chunk sizes to ensure consistent resampler state across frames, preventing filter discontinuities
|
||||
- **ML noise suppression** via `nnnoiseless` (RNNoise) — per-leg inbound denoising with SIMD acceleration (AVX/SSE). Skipped for WebRTC legs (browsers already denoise via getUserMedia)
|
||||
- **Mix-minus mixing** — each participant hears everyone except themselves, accumulated in f64 precision
|
||||
- **RFC 3550 compliant header parsing** — properly handles CSRC lists and header extensions
|
||||
|
||||
---
|
||||
|
||||
## 🗣️ Neural TTS (Rust)
|
||||
## 🗣️ Neural TTS
|
||||
|
||||
The `tts-engine` binary uses [Kokoro TTS](https://github.com/mzdk100/kokoro) (82M parameter neural model) to synthesize announcements at startup:
|
||||
Voicemail greetings and IVR prompts are synthesized using [Kokoro TTS](https://github.com/mzdk100/kokoro) — an 82M parameter neural model running via ONNX Runtime directly in the Rust process:
|
||||
|
||||
- **24 kHz, 16-bit mono** output
|
||||
- **25+ voice presets** — American/British, male/female (e.g., `af_bella`, `am_adam`, `bf_emma`, `bm_george`)
|
||||
- **~800ms** synthesis time for a 3-second announcement
|
||||
- Pre-encoded to G.722 + Opus for zero-latency RTP playback during call setup
|
||||
- **~800ms** synthesis time for a 3-second phrase
|
||||
- Lazy-loaded on first use — no startup cost if TTS is unused
|
||||
|
||||
---
|
||||
|
||||
## 📧 Voicemail
|
||||
|
||||
- Configurable voicemail boxes with custom TTS greetings (text + voice) or uploaded WAV
|
||||
- Automatic routing on no-answer timeout (configurable, default 25s)
|
||||
- Recording with configurable max duration (default 120s) and message count limit (default 50)
|
||||
- Unheard message tracking for MWI (message waiting indication)
|
||||
- Web dashboard playback and management
|
||||
- WAV storage in `.nogit/voicemail/`
|
||||
|
||||
---
|
||||
|
||||
## 🔢 IVR (Interactive Voice Response)
|
||||
|
||||
- DTMF-navigable menus with configurable entries
|
||||
- Actions: route to extension, route to voicemail, transfer, submenu, hangup, repeat prompt
|
||||
- Custom TTS prompts per menu
|
||||
- Nested menu support
|
||||
|
||||
---
|
||||
|
||||
@@ -228,33 +340,54 @@ The `tts-engine` binary uses [Kokoro TTS](https://github.com/mzdk100/kokoro) (82
|
||||
|
||||
| View | Description |
|
||||
|------|-------------|
|
||||
| **Overview** | Stats tiles — uptime, providers, devices, active calls |
|
||||
| **Calls** | Active calls with leg details, codec info, packet counters. Add/remove legs, transfer, hangup |
|
||||
| **Phone** | Browser softphone — mic/speaker selection, audio meters, dial pad, incoming call popup |
|
||||
| **Contacts** | Contact management with click-to-call |
|
||||
| **Providers** | SIP trunk config with registration status |
|
||||
| **Log** | Live streaming log viewer |
|
||||
| 📊 **Overview** | Stats tiles — uptime, providers, devices, active calls |
|
||||
| 📞 **Calls** | Active calls with leg details, codec info, add/remove legs, transfer, hangup |
|
||||
| ☎️ **Phone** | Browser softphone — mic/speaker selection, audio meters, dial pad, incoming call popup |
|
||||
| 🔀 **Routes** | Routing rule management — match/action model with priority |
|
||||
| 📧 **Voicemail** | Voicemail box management + message playback |
|
||||
| 🔢 **IVR** | IVR menu builder — DTMF entries, TTS prompts, nested menus |
|
||||
| 👤 **Contacts** | Contact management with click-to-call |
|
||||
| 🔌 **Providers** | SIP trunk configuration and registration status |
|
||||
| 📋 **Log** | Live streaming log viewer |
|
||||
|
||||
### REST API
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/api/status` | GET | Full system status (providers, devices, calls) |
|
||||
| `/api/status` | GET | Full system status (providers, devices, calls, history) |
|
||||
| `/api/call` | POST | Originate a call |
|
||||
| `/api/hangup` | POST | Hang up a call |
|
||||
| `/api/call/:id/addleg` | POST | Add a leg to an active call |
|
||||
| `/api/call/:id/addexternal` | POST | Add an external participant |
|
||||
| `/api/call/:id/addleg` | POST | Add a device leg to an active call |
|
||||
| `/api/call/:id/addexternal` | POST | Add an external participant via provider |
|
||||
| `/api/call/:id/removeleg` | POST | Remove a leg from a call |
|
||||
| `/api/transfer` | POST | Transfer a call |
|
||||
| `/api/config` | GET/POST | Read or update configuration (hot-reload) |
|
||||
| `/api/config` | GET | Read current configuration |
|
||||
| `/api/config` | POST | Update configuration (hot-reload) |
|
||||
| `/api/voicemail/:box` | GET | List voicemail messages |
|
||||
| `/api/voicemail/:box/unheard` | GET | Get unheard message count |
|
||||
| `/api/voicemail/:box/:id/audio` | GET | Stream voicemail audio |
|
||||
| `/api/voicemail/:box/:id/heard` | POST | Mark a voicemail message as heard |
|
||||
| `/api/voicemail/:box/:id` | DELETE | Delete a voicemail message |
|
||||
|
||||
### WebSocket Events
|
||||
|
||||
Connect to `/ws` for real-time push:
|
||||
|
||||
```jsonc
|
||||
{ "type": "status", "data": { ... } } // Full status snapshot (1s interval)
|
||||
{ "type": "status", "data": { ... } } // Full status snapshot (1s interval)
|
||||
{ "type": "log", "data": { "message": "..." } } // Log lines in real-time
|
||||
{ "type": "call-update", "data": { ... } } // Call state change notification
|
||||
{ "type": "webrtc-answer", "data": { ... } } // WebRTC SDP answer for browser calls
|
||||
{ "type": "webrtc-error", "data": { ... } } // WebRTC signaling error
|
||||
```
|
||||
|
||||
Browser → server signaling:
|
||||
|
||||
```jsonc
|
||||
{ "type": "webrtc-offer", "data": { ... } } // Browser sends SDP offer
|
||||
{ "type": "webrtc-accept", "data": { ... } } // Browser accepts incoming call
|
||||
{ "type": "webrtc-ice", "data": { ... } } // ICE candidate exchange
|
||||
{ "type": "webrtc-hangup", "data": { ... } } // Browser hangs up
|
||||
```
|
||||
|
||||
---
|
||||
@@ -264,7 +397,7 @@ Connect to `/ws` for real-time push:
|
||||
| Port | Protocol | Purpose |
|
||||
|------|----------|---------|
|
||||
| 5070 (configurable) | UDP | SIP signaling |
|
||||
| 20000–20200 (configurable) | UDP | RTP relay (even ports, per-call allocation) |
|
||||
| 20000–20200 (configurable) | UDP | RTP media (even ports, per-call allocation) |
|
||||
| 3060 (configurable) | TCP | Web dashboard + WebSocket + REST API |
|
||||
|
||||
---
|
||||
@@ -275,28 +408,21 @@ Connect to `/ws` for real-time push:
|
||||
# Start in dev mode
|
||||
pnpm start
|
||||
|
||||
# Build Rust crates
|
||||
# Build Rust proxy-engine
|
||||
pnpm run buildRust
|
||||
|
||||
# Bundle web frontend
|
||||
pnpm run bundle
|
||||
|
||||
# Restart background server (build + bundle + restart)
|
||||
# Build + bundle + restart background server
|
||||
pnpm run restartBackground
|
||||
```
|
||||
|
||||
### Key Design Decisions
|
||||
|
||||
- **Hub Model** — Calls are N-leg hubs, not point-to-point. This enables multi-party, dynamic leg manipulation, and transfer without tearing down the call.
|
||||
- **Zero-dependency SIP library** — `ts/sip/` is a pure data-level SIP stack (parse/build/mutate/serialize). No transport or timer logic — those live in the application layer.
|
||||
- **Rust for the hot path** — Codec transcoding and noise suppression run in native Rust for real-time performance. TypeScript handles signaling and orchestration.
|
||||
- **Per-session codec isolation** — Each call gets its own Opus/G.722 encoder/decoder state in the Rust process, preventing stateful codec prediction from leaking between concurrent calls.
|
||||
|
||||
---
|
||||
|
||||
## License and Legal Information
|
||||
|
||||
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [license](./license) file.
|
||||
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [LICENSE](./LICENSE) file.
|
||||
|
||||
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
|
||||
|
||||
|
||||
30
rust/.cargo/config.toml
Normal file
30
rust/.cargo/config.toml
Normal file
@@ -0,0 +1,30 @@
|
||||
# Cross-compile configuration for the proxy-engine crate.
|
||||
#
|
||||
# tsrust builds for both x86_64-unknown-linux-gnu and aarch64-unknown-linux-gnu
|
||||
# from an x86_64 host. Without this config, cargo invokes the host `cc` to
|
||||
# link aarch64 objects and fails with
|
||||
# rust-lld: error: <obj.o> is incompatible with elf64-x86-64
|
||||
#
|
||||
# Required Debian/Ubuntu packages for the aarch64 target to work:
|
||||
# sudo apt install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu \
|
||||
# libc6-dev-arm64-cross libstdc++6-arm64-cross
|
||||
#
|
||||
# The `libstdc++.so` dev symlink (needed by the -lstdc++ flag that the
|
||||
# kokoro-tts/ort build scripts emit) is provided by this repo at
|
||||
# ./crosslibs/aarch64/libstdc++.so, pointing at the versioned shared
|
||||
# library installed by `libstdc++6-arm64-cross`. This avoids requiring
|
||||
# the `libstdc++-13-dev-arm64-cross` package, which is not always
|
||||
# installed alongside the runtime.
|
||||
|
||||
[target.aarch64-unknown-linux-gnu]
|
||||
linker = "aarch64-linux-gnu-gcc"
|
||||
rustflags = ["-C", "link-arg=-L.cargo/crosslibs/aarch64"]
|
||||
|
||||
# Tell cc-rs-based build scripts (ring, zstd-sys, audiopus_sys, ort-sys) to
|
||||
# use the aarch64 cross toolchain when compiling C sources for the aarch64
|
||||
# target. Without these, they'd default to the host `cc` and produce x86_64
|
||||
# objects that the aarch64 linker then rejects.
|
||||
[env]
|
||||
CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc"
|
||||
CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"
|
||||
AR_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-ar"
|
||||
1
rust/.cargo/crosslibs/aarch64/libstdc++.so
Symbolic link
1
rust/.cargo/crosslibs/aarch64/libstdc++.so
Symbolic link
@@ -0,0 +1 @@
|
||||
/usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
2211
rust/Cargo.lock
generated
2211
rust/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,14 @@
|
||||
[workspace]
|
||||
members = ["crates/opus-codec", "crates/tts-engine"]
|
||||
members = [
|
||||
"crates/codec-lib",
|
||||
"crates/sip-proto",
|
||||
"crates/proxy-engine",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
lto = true
|
||||
|
||||
[patch.crates-io]
|
||||
kokoro-tts = { path = "vendor/kokoro-tts" }
|
||||
|
||||
10
rust/crates/codec-lib/Cargo.toml
Normal file
10
rust/crates/codec-lib/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "codec-lib"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
audiopus = "0.3.0-rc.0"
|
||||
ezk-g722 = "0.1"
|
||||
rubato = "0.14"
|
||||
nnnoiseless = "0.5"
|
||||
545
rust/crates/codec-lib/src/lib.rs
Normal file
545
rust/crates/codec-lib/src/lib.rs
Normal file
@@ -0,0 +1,545 @@
|
||||
//! Audio codec library for the SIP router.
|
||||
//!
|
||||
//! Handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding with ML noise suppression.
|
||||
//! Used by the `proxy-engine` binary for all audio transcoding.
|
||||
|
||||
use audiopus::coder::{Decoder as OpusDecoder, Encoder as OpusEncoder};
|
||||
use audiopus::packet::Packet as OpusPacket;
|
||||
use audiopus::{Application, Bitrate as OpusBitrate, Channels, MutSignals, SampleRate};
|
||||
use ezk_g722::libg722::{self, Bitrate};
|
||||
use nnnoiseless::DenoiseState;
|
||||
use rubato::{FftFixedIn, Resampler};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// ---- Payload type constants ------------------------------------------------
|
||||
|
||||
pub const PT_PCMU: u8 = 0;
|
||||
pub const PT_PCMA: u8 = 8;
|
||||
pub const PT_G722: u8 = 9;
|
||||
pub const PT_OPUS: u8 = 111;
|
||||
|
||||
/// Return the native sample rate for a given payload type.
|
||||
pub fn codec_sample_rate(pt: u8) -> u32 {
|
||||
match pt {
|
||||
PT_OPUS => 48000,
|
||||
PT_G722 => 16000,
|
||||
_ => 8000, // PCMU, PCMA
|
||||
}
|
||||
}
|
||||
|
||||
// ---- G.711 µ-law (PCMU) ---------------------------------------------------
|
||||
|
||||
pub fn mulaw_encode(sample: i16) -> u8 {
|
||||
const BIAS: i16 = 0x84;
|
||||
const CLIP: i16 = 32635;
|
||||
let sign = if sample < 0 { 0x80u8 } else { 0 };
|
||||
let mut s = (sample as i32).unsigned_abs().min(CLIP as u32) as i16;
|
||||
s += BIAS;
|
||||
let mut exp = 7u8;
|
||||
let mut mask = 0x4000i16;
|
||||
while exp > 0 && (s & mask) == 0 {
|
||||
exp -= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
let mantissa = ((s >> (exp + 3)) & 0x0f) as u8;
|
||||
!(sign | (exp << 4) | mantissa)
|
||||
}
|
||||
|
||||
pub fn mulaw_decode(mulaw: u8) -> i16 {
|
||||
let v = !mulaw;
|
||||
let sign = v & 0x80;
|
||||
let exp = (v >> 4) & 0x07;
|
||||
let mantissa = v & 0x0f;
|
||||
// Use i32 to avoid overflow when exp=7, mantissa=15 (result > i16::MAX).
|
||||
let mut sample = (((mantissa as i32) << 4) + 0x84) << exp;
|
||||
sample -= 0x84;
|
||||
let sample = if sign != 0 { -sample } else { sample };
|
||||
sample.clamp(-32768, 32767) as i16
|
||||
}
|
||||
|
||||
// ---- G.711 A-law (PCMA) ---------------------------------------------------
|
||||
|
||||
pub fn alaw_encode(sample: i16) -> u8 {
|
||||
let sign = if sample >= 0 { 0x80u8 } else { 0 };
|
||||
let s = (sample as i32).unsigned_abs().min(32767) as i16;
|
||||
let mut exp = 7u8;
|
||||
let mut mask = 0x4000i16;
|
||||
while exp > 0 && (s & mask) == 0 {
|
||||
exp -= 1;
|
||||
mask >>= 1;
|
||||
}
|
||||
let mantissa = if exp > 0 {
|
||||
((s >> (exp + 3)) & 0x0f) as u8
|
||||
} else {
|
||||
((s >> 4) & 0x0f) as u8
|
||||
};
|
||||
(sign | (exp << 4) | mantissa) ^ 0x55
|
||||
}
|
||||
|
||||
pub fn alaw_decode(alaw: u8) -> i16 {
|
||||
let v = alaw ^ 0x55;
|
||||
let sign = v & 0x80;
|
||||
let exp = (v >> 4) & 0x07;
|
||||
let mantissa = v & 0x0f;
|
||||
// Use i32 to avoid overflow for extreme values.
|
||||
let sample = if exp == 0 {
|
||||
((mantissa as i32) << 4) + 8
|
||||
} else {
|
||||
(((mantissa as i32) << 4) + 0x108) << (exp - 1)
|
||||
};
|
||||
let sample = if sign != 0 { sample } else { -sample };
|
||||
sample.clamp(-32768, 32767) as i16
|
||||
}
|
||||
|
||||
// ---- TranscodeState --------------------------------------------------------
|
||||
|
||||
/// Per-session codec state holding Opus, G.722, resampler, and denoiser instances.
|
||||
///
|
||||
/// Each concurrent call should get its own `TranscodeState` to prevent stateful
|
||||
/// codecs (Opus, G.722 ADPCM) from corrupting each other.
|
||||
pub struct TranscodeState {
|
||||
opus_enc: OpusEncoder,
|
||||
opus_dec: OpusDecoder,
|
||||
g722_enc: libg722::encoder::Encoder,
|
||||
g722_dec: libg722::decoder::Decoder,
|
||||
/// Cached FFT resamplers keyed by (from_rate, to_rate, chunk_size).
|
||||
resamplers: HashMap<(u32, u32, usize), FftFixedIn<f64>>,
|
||||
/// Cached f32 FFT resamplers keyed by (from_rate, to_rate, chunk_size).
|
||||
resamplers_f32: HashMap<(u32, u32, usize), FftFixedIn<f32>>,
|
||||
/// ML noise suppression for the SIP-bound direction.
|
||||
denoiser_to_sip: Box<DenoiseState<'static>>,
|
||||
/// ML noise suppression for the browser-bound direction.
|
||||
denoiser_to_browser: Box<DenoiseState<'static>>,
|
||||
}
|
||||
|
||||
impl TranscodeState {
|
||||
/// Create a new transcoding session with fresh codec state.
|
||||
pub fn new() -> Result<Self, String> {
|
||||
let mut opus_enc = OpusEncoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)
|
||||
.map_err(|e| format!("opus encoder: {e}"))?;
|
||||
opus_enc
|
||||
.set_complexity(5)
|
||||
.map_err(|e| format!("opus set_complexity: {e}"))?;
|
||||
opus_enc
|
||||
.set_bitrate(OpusBitrate::BitsPerSecond(24000))
|
||||
.map_err(|e| format!("opus set_bitrate: {e}"))?;
|
||||
let opus_dec = OpusDecoder::new(SampleRate::Hz48000, Channels::Mono)
|
||||
.map_err(|e| format!("opus decoder: {e}"))?;
|
||||
let g722_enc = libg722::encoder::Encoder::new(Bitrate::Mode1_64000, false, false);
|
||||
let g722_dec = libg722::decoder::Decoder::new(Bitrate::Mode1_64000, false, false);
|
||||
|
||||
Ok(Self {
|
||||
opus_enc,
|
||||
opus_dec,
|
||||
g722_enc,
|
||||
g722_dec,
|
||||
resamplers: HashMap::new(),
|
||||
resamplers_f32: HashMap::new(),
|
||||
denoiser_to_sip: DenoiseState::new(),
|
||||
denoiser_to_browser: DenoiseState::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// High-quality sample rate conversion using rubato FFT resampler.
|
||||
///
|
||||
/// To maintain continuous filter state, the resampler always processes at a
|
||||
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||
pub fn resample(
|
||||
&mut self,
|
||||
pcm: &[i16],
|
||||
from_rate: u32,
|
||||
to_rate: u32,
|
||||
) -> Result<Vec<i16>, String> {
|
||||
if from_rate == to_rate || pcm.is_empty() {
|
||||
return Ok(pcm.to_vec());
|
||||
}
|
||||
|
||||
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||
let key = (from_rate, to_rate, canonical_chunk);
|
||||
|
||||
if !self.resamplers.contains_key(&key) {
|
||||
let r =
|
||||
FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, canonical_chunk, 1, 1)
|
||||
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
|
||||
self.resamplers.insert(key, r);
|
||||
}
|
||||
let resampler = self.resamplers.get_mut(&key).unwrap();
|
||||
|
||||
let mut output = Vec::with_capacity(
|
||||
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||
);
|
||||
|
||||
let mut offset = 0;
|
||||
while offset < pcm.len() {
|
||||
let remaining = pcm.len() - offset;
|
||||
let copy_len = remaining.min(canonical_chunk);
|
||||
let mut chunk = vec![0.0f64; canonical_chunk];
|
||||
for i in 0..copy_len {
|
||||
chunk[i] = pcm[offset + i] as f64 / 32768.0;
|
||||
}
|
||||
|
||||
let input = vec![chunk];
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
||||
|
||||
if remaining < canonical_chunk {
|
||||
let expected =
|
||||
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||
let take = expected.min(result[0].len());
|
||||
output.extend(
|
||||
result[0][..take]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||
);
|
||||
} else {
|
||||
output.extend(
|
||||
result[0]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||
);
|
||||
}
|
||||
|
||||
offset += canonical_chunk;
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
|
||||
/// Processes in 480-sample (10ms) frames. State persists across calls.
|
||||
pub fn denoise(denoiser: &mut DenoiseState, pcm: &[i16]) -> Vec<i16> {
|
||||
let frame_size = DenoiseState::FRAME_SIZE; // 480
|
||||
let total = pcm.len();
|
||||
let whole = (total / frame_size) * frame_size;
|
||||
let mut output = Vec::with_capacity(total);
|
||||
let mut out_buf = [0.0f32; 480];
|
||||
|
||||
for offset in (0..whole).step_by(frame_size) {
|
||||
let input: Vec<f32> = pcm[offset..offset + frame_size]
|
||||
.iter()
|
||||
.map(|&s| s as f32)
|
||||
.collect();
|
||||
denoiser.process_frame(&mut out_buf, &input);
|
||||
output.extend(
|
||||
out_buf
|
||||
.iter()
|
||||
.map(|&s| s.round().clamp(-32768.0, 32767.0) as i16),
|
||||
);
|
||||
}
|
||||
if whole < total {
|
||||
output.extend_from_slice(&pcm[whole..]);
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
/// Transcode audio payload from one codec to another.
|
||||
///
|
||||
/// `direction`: `Some("to_sip")` or `Some("to_browser")` selects per-direction
|
||||
/// denoiser. `None` skips denoising (backward compat).
|
||||
pub fn transcode(
|
||||
&mut self,
|
||||
data: &[u8],
|
||||
from_pt: u8,
|
||||
to_pt: u8,
|
||||
direction: Option<&str>,
|
||||
) -> Result<Vec<u8>, String> {
|
||||
if from_pt == to_pt {
|
||||
return Ok(data.to_vec());
|
||||
}
|
||||
|
||||
let (pcm, rate) = self.decode_to_pcm(data, from_pt)?;
|
||||
|
||||
let processed = if let Some(dir) = direction {
|
||||
let pcm_48k = self.resample(&pcm, rate, 48000)?;
|
||||
let denoiser = match dir {
|
||||
"to_sip" => &mut self.denoiser_to_sip,
|
||||
_ => &mut self.denoiser_to_browser,
|
||||
};
|
||||
let denoised = Self::denoise(denoiser, &pcm_48k);
|
||||
let target_rate = codec_sample_rate(to_pt);
|
||||
self.resample(&denoised, 48000, target_rate)?
|
||||
} else {
|
||||
let target_rate = codec_sample_rate(to_pt);
|
||||
if rate == target_rate {
|
||||
pcm
|
||||
} else {
|
||||
self.resample(&pcm, rate, target_rate)?
|
||||
}
|
||||
};
|
||||
|
||||
self.encode_from_pcm(&processed, to_pt)
|
||||
}
|
||||
|
||||
/// Decode an encoded audio payload to raw 16-bit PCM samples.
|
||||
/// Returns (samples, sample_rate).
|
||||
pub fn decode_to_pcm(&mut self, data: &[u8], pt: u8) -> Result<(Vec<i16>, u32), String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut pcm = vec![0i16; 5760]; // up to 120ms at 48kHz
|
||||
let packet = OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
|
||||
let out =
|
||||
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
|
||||
let n: usize = self
|
||||
.opus_dec
|
||||
.decode(Some(packet), out, false)
|
||||
.map_err(|e| format!("opus decode: {e}"))?
|
||||
.into();
|
||||
pcm.truncate(n);
|
||||
Ok((pcm, 48000))
|
||||
}
|
||||
PT_G722 => {
|
||||
let pcm = self.g722_dec.decode(data);
|
||||
Ok((pcm, 16000))
|
||||
}
|
||||
PT_PCMU => {
|
||||
let pcm: Vec<i16> = data.iter().map(|&b| mulaw_decode(b)).collect();
|
||||
Ok((pcm, 8000))
|
||||
}
|
||||
PT_PCMA => {
|
||||
let pcm: Vec<i16> = data.iter().map(|&b| alaw_decode(b)).collect();
|
||||
Ok((pcm, 8000))
|
||||
}
|
||||
_ => Err(format!("unsupported source PT {pt}")),
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode raw PCM samples to an audio codec.
|
||||
pub fn encode_from_pcm(&mut self, pcm: &[i16], pt: u8) -> Result<Vec<u8>, String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut buf = vec![0u8; 4000];
|
||||
let n: usize = self
|
||||
.opus_enc
|
||||
.encode(pcm, &mut buf)
|
||||
.map_err(|e| format!("opus encode: {e}"))?
|
||||
.into();
|
||||
buf.truncate(n);
|
||||
Ok(buf)
|
||||
}
|
||||
PT_G722 => Ok(self.g722_enc.encode(pcm)),
|
||||
PT_PCMU => Ok(pcm.iter().map(|&s| mulaw_encode(s)).collect()),
|
||||
PT_PCMA => Ok(pcm.iter().map(|&s| alaw_encode(s)).collect()),
|
||||
_ => Err(format!("unsupported target PT {pt}")),
|
||||
}
|
||||
}
|
||||
|
||||
// ---- f32 API for high-quality internal bus ----------------------------
|
||||
|
||||
/// Decode an encoded audio payload to f32 PCM samples in [-1.0, 1.0].
|
||||
/// Returns (samples, sample_rate).
|
||||
///
|
||||
/// For Opus, uses native float decode (no i16 quantization).
|
||||
/// For G.722/G.711, decodes to i16 then converts (codec is natively i16).
|
||||
pub fn decode_to_f32(&mut self, data: &[u8], pt: u8) -> Result<(Vec<f32>, u32), String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut pcm = vec![0.0f32; 5760]; // up to 120ms at 48kHz
|
||||
let packet = OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
|
||||
let out =
|
||||
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
|
||||
let n: usize = self
|
||||
.opus_dec
|
||||
.decode_float(Some(packet), out, false)
|
||||
.map_err(|e| format!("opus decode_float: {e}"))?
|
||||
.into();
|
||||
pcm.truncate(n);
|
||||
Ok((pcm, 48000))
|
||||
}
|
||||
_ => {
|
||||
// G.722, PCMU, PCMA: natively i16 codecs — decode then convert.
|
||||
let (pcm_i16, rate) = self.decode_to_pcm(data, pt)?;
|
||||
let pcm_f32 = pcm_i16.iter().map(|&s| s as f32 / 32768.0).collect();
|
||||
Ok((pcm_f32, rate))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Opus packet loss concealment — synthesize one frame to fill a gap.
|
||||
/// Returns f32 PCM at 48kHz. `frame_size` should be 960 for 20ms.
|
||||
pub fn opus_plc(&mut self, frame_size: usize) -> Result<Vec<f32>, String> {
|
||||
let mut pcm = vec![0.0f32; frame_size];
|
||||
let out =
|
||||
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus plc signals: {e}"))?;
|
||||
let n: usize = self
|
||||
.opus_dec
|
||||
.decode_float(None::<OpusPacket<'_>>, out, false)
|
||||
.map_err(|e| format!("opus plc: {e}"))?
|
||||
.into();
|
||||
pcm.truncate(n);
|
||||
Ok(pcm)
|
||||
}
|
||||
|
||||
/// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
|
||||
///
|
||||
/// For Opus, uses native float encode (no i16 quantization).
|
||||
/// For G.722/G.711, converts to i16 then encodes (codec is natively i16).
|
||||
pub fn encode_from_f32(&mut self, pcm: &[f32], pt: u8) -> Result<Vec<u8>, String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut buf = vec![0u8; 4000];
|
||||
let n: usize = self
|
||||
.opus_enc
|
||||
.encode_float(pcm, &mut buf)
|
||||
.map_err(|e| format!("opus encode_float: {e}"))?
|
||||
.into();
|
||||
buf.truncate(n);
|
||||
Ok(buf)
|
||||
}
|
||||
_ => {
|
||||
// G.722, PCMU, PCMA: natively i16 codecs.
|
||||
let pcm_i16: Vec<i16> = pcm
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect();
|
||||
self.encode_from_pcm(&pcm_i16, pt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
|
||||
///
|
||||
/// To maintain continuous filter state, the resampler always processes at a
|
||||
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||
pub fn resample_f32(
|
||||
&mut self,
|
||||
pcm: &[f32],
|
||||
from_rate: u32,
|
||||
to_rate: u32,
|
||||
) -> Result<Vec<f32>, String> {
|
||||
if from_rate == to_rate || pcm.is_empty() {
|
||||
return Ok(pcm.to_vec());
|
||||
}
|
||||
|
||||
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||
let key = (from_rate, to_rate, canonical_chunk);
|
||||
|
||||
if !self.resamplers_f32.contains_key(&key) {
|
||||
let r =
|
||||
FftFixedIn::<f32>::new(from_rate as usize, to_rate as usize, canonical_chunk, 1, 1)
|
||||
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
self.resamplers_f32.insert(key, r);
|
||||
}
|
||||
let resampler = self.resamplers_f32.get_mut(&key).unwrap();
|
||||
|
||||
let mut output = Vec::with_capacity(
|
||||
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||
);
|
||||
|
||||
let mut offset = 0;
|
||||
while offset < pcm.len() {
|
||||
let remaining = pcm.len() - offset;
|
||||
let mut chunk = vec![0.0f32; canonical_chunk];
|
||||
let copy_len = remaining.min(canonical_chunk);
|
||||
chunk[..copy_len].copy_from_slice(&pcm[offset..offset + copy_len]);
|
||||
|
||||
let input = vec![chunk];
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
|
||||
if remaining < canonical_chunk {
|
||||
let expected =
|
||||
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||
output.extend_from_slice(&result[0][..expected.min(result[0].len())]);
|
||||
} else {
|
||||
output.extend_from_slice(&result[0]);
|
||||
}
|
||||
|
||||
offset += canonical_chunk;
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.
|
||||
/// Processes in 480-sample (10ms) frames. State persists across calls.
|
||||
/// Operates natively in f32 — no i16 conversion overhead.
|
||||
pub fn denoise_f32(denoiser: &mut DenoiseState, pcm: &[f32]) -> Vec<f32> {
|
||||
let frame_size = DenoiseState::FRAME_SIZE; // 480
|
||||
let total = pcm.len();
|
||||
let whole = (total / frame_size) * frame_size;
|
||||
let mut output = Vec::with_capacity(total);
|
||||
let mut out_buf = [0.0f32; 480];
|
||||
|
||||
// nnnoiseless expects f32 samples scaled as i16 range (-32768..32767).
|
||||
for offset in (0..whole).step_by(frame_size) {
|
||||
let input: Vec<f32> = pcm[offset..offset + frame_size]
|
||||
.iter()
|
||||
.map(|&s| s * 32768.0)
|
||||
.collect();
|
||||
denoiser.process_frame(&mut out_buf, &input);
|
||||
output.extend(out_buf.iter().map(|&s| s / 32768.0));
|
||||
}
|
||||
if whole < total {
|
||||
output.extend_from_slice(&pcm[whole..]);
|
||||
}
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new standalone denoiser for per-leg inbound processing.
|
||||
pub fn new_denoiser() -> Box<DenoiseState<'static>> {
|
||||
DenoiseState::new()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn mulaw_roundtrip() {
|
||||
for sample in [-32768i16, -1000, -1, 0, 1, 1000, 32767] {
|
||||
let encoded = mulaw_encode(sample);
|
||||
let decoded = mulaw_decode(encoded);
|
||||
// µ-law is lossy; verify the decoded value is close.
|
||||
assert!(
|
||||
(sample as i32 - decoded as i32).abs() < 1000,
|
||||
"µ-law roundtrip failed for {sample}: got {decoded}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn alaw_roundtrip() {
|
||||
for sample in [-32768i16, -1000, -1, 0, 1, 1000, 32767] {
|
||||
let encoded = alaw_encode(sample);
|
||||
let decoded = alaw_decode(encoded);
|
||||
assert!(
|
||||
(sample as i32 - decoded as i32).abs() < 1000,
|
||||
"A-law roundtrip failed for {sample}: got {decoded}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn codec_sample_rates() {
|
||||
assert_eq!(codec_sample_rate(PT_OPUS), 48000);
|
||||
assert_eq!(codec_sample_rate(PT_G722), 16000);
|
||||
assert_eq!(codec_sample_rate(PT_PCMU), 8000);
|
||||
assert_eq!(codec_sample_rate(PT_PCMA), 8000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transcode_same_pt_is_passthrough() {
|
||||
let mut st = TranscodeState::new().unwrap();
|
||||
let data = vec![0u8; 160];
|
||||
let result = st.transcode(&data, PT_PCMU, PT_PCMU, None).unwrap();
|
||||
assert_eq!(result, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pcmu_to_pcma_roundtrip() {
|
||||
let mut st = TranscodeState::new().unwrap();
|
||||
// 160 bytes = 20ms of PCMU at 8kHz
|
||||
let pcmu_data: Vec<u8> = (0..160)
|
||||
.map(|i| mulaw_encode((i as i16 * 200) - 16000))
|
||||
.collect();
|
||||
let pcma = st.transcode(&pcmu_data, PT_PCMU, PT_PCMA, None).unwrap();
|
||||
assert_eq!(pcma.len(), 160); // Same frame size
|
||||
let back = st.transcode(&pcma, PT_PCMA, PT_PCMU, None).unwrap();
|
||||
assert_eq!(back.len(), 160);
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
[package]
|
||||
name = "opus-codec"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "opus-codec"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
audiopus = "0.3.0-rc.0"
|
||||
ezk-g722 = "0.1"
|
||||
rubato = "0.14"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
base64 = "0.22"
|
||||
nnnoiseless = { version = "0.5", default-features = false }
|
||||
@@ -1,464 +0,0 @@
|
||||
/// Audio transcoding bridge for smartrust.
|
||||
///
|
||||
/// Handles Opus ↔ G.722 ↔ PCMU transcoding for the SIP router.
|
||||
/// Uses audiopus (libopus) for Opus and ezk-g722 (SpanDSP port) for G.722.
|
||||
///
|
||||
/// Supports per-session codec state so concurrent calls don't corrupt each
|
||||
/// other's stateful codecs (Opus, G.722 ADPCM).
|
||||
///
|
||||
/// Protocol:
|
||||
/// -> {"id":"1","method":"init","params":{}}
|
||||
/// <- {"id":"1","success":true,"result":{}}
|
||||
/// -> {"id":"2","method":"create_session","params":{"session_id":"call-abc"}}
|
||||
/// <- {"id":"2","success":true,"result":{}}
|
||||
/// -> {"id":"3","method":"transcode","params":{"session_id":"call-abc","data_b64":"...","from_pt":111,"to_pt":9}}
|
||||
/// <- {"id":"3","success":true,"result":{"data_b64":"..."}}
|
||||
/// -> {"id":"4","method":"destroy_session","params":{"session_id":"call-abc"}}
|
||||
/// <- {"id":"4","success":true,"result":{}}
|
||||
|
||||
use audiopus::coder::{Decoder as OpusDecoder, Encoder as OpusEncoder};
|
||||
use audiopus::packet::Packet as OpusPacket;
|
||||
use audiopus::{Application, Bitrate as OpusBitrate, Channels, MutSignals, SampleRate};
|
||||
use base64::engine::general_purpose::STANDARD as B64;
|
||||
use base64::Engine as _;
|
||||
use ezk_g722::libg722::{self, Bitrate};
|
||||
use nnnoiseless::DenoiseState;
|
||||
use rubato::{FftFixedIn, Resampler};
|
||||
use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::io::{self, BufRead, Write};
|
||||
|
||||
// Payload type constants.
|
||||
const PT_PCMU: u8 = 0;
|
||||
const PT_PCMA: u8 = 8;
|
||||
const PT_G722: u8 = 9;
|
||||
const PT_OPUS: u8 = 111;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Request {
|
||||
id: String,
|
||||
method: String,
|
||||
#[serde(default)]
|
||||
params: serde_json::Value,
|
||||
}
|
||||
|
||||
fn respond(out: &mut impl Write, id: &str, success: bool, result: Option<serde_json::Value>, error: Option<&str>) {
|
||||
let mut resp = serde_json::json!({ "id": id, "success": success });
|
||||
if let Some(r) = result { resp["result"] = r; }
|
||||
if let Some(e) = error { resp["error"] = serde_json::Value::String(e.to_string()); }
|
||||
let _ = writeln!(out, "{}", resp);
|
||||
let _ = out.flush();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Codec state
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct TranscodeState {
|
||||
opus_enc: OpusEncoder,
|
||||
opus_dec: OpusDecoder,
|
||||
g722_enc: libg722::encoder::Encoder,
|
||||
g722_dec: libg722::decoder::Decoder,
|
||||
// Cached FFT resamplers keyed by (from_rate, to_rate, chunk_size).
|
||||
resamplers: HashMap<(u32, u32, usize), FftFixedIn<f64>>,
|
||||
// Per-direction ML noise suppression (RNNoise). Separate state per direction
|
||||
// prevents the RNN hidden state from being corrupted by interleaved audio streams.
|
||||
denoiser_to_sip: Box<DenoiseState<'static>>,
|
||||
denoiser_to_browser: Box<DenoiseState<'static>>,
|
||||
}
|
||||
|
||||
impl TranscodeState {
|
||||
fn new() -> Result<Self, String> {
|
||||
let mut opus_enc = OpusEncoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)
|
||||
.map_err(|e| format!("opus encoder: {e}"))?;
|
||||
// Telephony-grade tuning: complexity 5 is sufficient for voice bridged to G.722.
|
||||
opus_enc.set_complexity(5).map_err(|e| format!("opus set_complexity: {e}"))?;
|
||||
opus_enc.set_bitrate(OpusBitrate::BitsPerSecond(24000)).map_err(|e| format!("opus set_bitrate: {e}"))?;
|
||||
let opus_dec = OpusDecoder::new(SampleRate::Hz48000, Channels::Mono)
|
||||
.map_err(|e| format!("opus decoder: {e}"))?;
|
||||
let g722_enc = libg722::encoder::Encoder::new(Bitrate::Mode1_64000, false, false);
|
||||
let g722_dec = libg722::decoder::Decoder::new(Bitrate::Mode1_64000, false, false);
|
||||
|
||||
Ok(Self {
|
||||
opus_enc, opus_dec, g722_enc, g722_dec,
|
||||
resamplers: HashMap::new(),
|
||||
denoiser_to_sip: DenoiseState::new(),
|
||||
denoiser_to_browser: DenoiseState::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// High-quality sample rate conversion using rubato FFT resampler.
|
||||
/// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
|
||||
/// maintaining proper inter-frame state for continuous audio streams.
|
||||
fn resample(&mut self, pcm: &[i16], from_rate: u32, to_rate: u32) -> Result<Vec<i16>, String> {
|
||||
if from_rate == to_rate || pcm.is_empty() {
|
||||
return Ok(pcm.to_vec());
|
||||
}
|
||||
|
||||
let chunk = pcm.len();
|
||||
let key = (from_rate, to_rate, chunk);
|
||||
|
||||
// Get or create cached resampler for this rate pair + chunk size.
|
||||
if !self.resamplers.contains_key(&key) {
|
||||
let r = FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
|
||||
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
|
||||
self.resamplers.insert(key, r);
|
||||
}
|
||||
let resampler = self.resamplers.get_mut(&key).unwrap();
|
||||
|
||||
// i16 → f64 normalized to [-1.0, 1.0]
|
||||
let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
|
||||
let input = vec![float_in];
|
||||
|
||||
let result = resampler.process(&input, None)
|
||||
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
||||
|
||||
// f64 → i16
|
||||
Ok(result[0].iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
|
||||
/// Processes in 480-sample (10ms) frames. State persists across calls.
|
||||
fn denoise(denoiser: &mut DenoiseState, pcm: &[i16]) -> Vec<i16> {
|
||||
let frame_size = DenoiseState::FRAME_SIZE; // 480
|
||||
let total = pcm.len();
|
||||
// Round down to whole frames — don't process partial frames to avoid
|
||||
// injecting artificial silence into the RNN state.
|
||||
let whole = (total / frame_size) * frame_size;
|
||||
let mut output = Vec::with_capacity(total);
|
||||
let mut out_buf = [0.0f32; 480];
|
||||
|
||||
for offset in (0..whole).step_by(frame_size) {
|
||||
let input: Vec<f32> = pcm[offset..offset + frame_size]
|
||||
.iter().map(|&s| s as f32).collect();
|
||||
denoiser.process_frame(&mut out_buf, &input);
|
||||
output.extend(out_buf.iter()
|
||||
.map(|&s| s.round().clamp(-32768.0, 32767.0) as i16));
|
||||
}
|
||||
// Pass through any trailing partial-frame samples unmodified.
|
||||
if whole < total {
|
||||
output.extend_from_slice(&pcm[whole..]);
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
/// Transcode audio payload from one codec to another.
|
||||
/// `direction`: "to_sip" or "to_browser" — selects the per-direction denoiser.
|
||||
/// If None, denoising is skipped (backward compat).
|
||||
fn transcode(&mut self, data: &[u8], from_pt: u8, to_pt: u8, direction: Option<&str>) -> Result<Vec<u8>, String> {
|
||||
if from_pt == to_pt {
|
||||
return Ok(data.to_vec());
|
||||
}
|
||||
|
||||
// Decode to PCM (at source sample rate).
|
||||
let (pcm, rate) = self.decode_to_pcm(data, from_pt)?;
|
||||
|
||||
// Apply noise suppression if direction is specified.
|
||||
let processed = if let Some(dir) = direction {
|
||||
// Resample to 48kHz for denoising (no-op when already 48kHz).
|
||||
let pcm_48k = self.resample(&pcm, rate, 48000)?;
|
||||
let denoiser = match dir {
|
||||
"to_sip" => &mut self.denoiser_to_sip,
|
||||
_ => &mut self.denoiser_to_browser,
|
||||
};
|
||||
let denoised = Self::denoise(denoiser, &pcm_48k);
|
||||
// Resample to target rate (no-op when target is 48kHz).
|
||||
let target_rate = codec_sample_rate(to_pt);
|
||||
self.resample(&denoised, 48000, target_rate)?
|
||||
} else {
|
||||
// No denoising — direct resample.
|
||||
let target_rate = codec_sample_rate(to_pt);
|
||||
if rate == target_rate { pcm } else { self.resample(&pcm, rate, target_rate)? }
|
||||
};
|
||||
|
||||
// Encode from PCM.
|
||||
self.encode_from_pcm(&processed, to_pt)
|
||||
}
|
||||
|
||||
fn decode_to_pcm(&mut self, data: &[u8], pt: u8) -> Result<(Vec<i16>, u32), String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut pcm = vec![0i16; 5760]; // up to 120ms at 48kHz (RFC 6716 max)
|
||||
let packet = OpusPacket::try_from(data)
|
||||
.map_err(|e| format!("opus packet: {e}"))?;
|
||||
let out = MutSignals::try_from(&mut pcm[..])
|
||||
.map_err(|e| format!("opus signals: {e}"))?;
|
||||
let n: usize = self.opus_dec.decode(Some(packet), out, false)
|
||||
.map_err(|e| format!("opus decode: {e}"))?.into();
|
||||
pcm.truncate(n);
|
||||
Ok((pcm, 48000))
|
||||
}
|
||||
PT_G722 => {
|
||||
let pcm = self.g722_dec.decode(data);
|
||||
Ok((pcm, 16000))
|
||||
}
|
||||
PT_PCMU => {
|
||||
let pcm: Vec<i16> = data.iter().map(|&b| mulaw_decode(b)).collect();
|
||||
Ok((pcm, 8000))
|
||||
}
|
||||
PT_PCMA => {
|
||||
let pcm: Vec<i16> = data.iter().map(|&b| alaw_decode(b)).collect();
|
||||
Ok((pcm, 8000))
|
||||
}
|
||||
_ => Err(format!("unsupported source PT {pt}")),
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_from_pcm(&mut self, pcm: &[i16], pt: u8) -> Result<Vec<u8>, String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut buf = vec![0u8; 4000];
|
||||
let n: usize = self.opus_enc.encode(pcm, &mut buf)
|
||||
.map_err(|e| format!("opus encode: {e}"))?.into();
|
||||
buf.truncate(n);
|
||||
Ok(buf)
|
||||
}
|
||||
PT_G722 => {
|
||||
Ok(self.g722_enc.encode(pcm))
|
||||
}
|
||||
PT_PCMU => {
|
||||
Ok(pcm.iter().map(|&s| mulaw_encode(s)).collect())
|
||||
}
|
||||
PT_PCMA => {
|
||||
Ok(pcm.iter().map(|&s| alaw_encode(s)).collect())
|
||||
}
|
||||
_ => Err(format!("unsupported target PT {pt}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn codec_sample_rate(pt: u8) -> u32 {
|
||||
match pt {
|
||||
PT_OPUS => 48000,
|
||||
PT_G722 => 16000,
|
||||
_ => 8000, // PCMU, PCMA
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// G.711 µ-law (PCMU)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn mulaw_encode(sample: i16) -> u8 {
|
||||
const BIAS: i16 = 0x84;
|
||||
const CLIP: i16 = 32635;
|
||||
let sign = if sample < 0 { 0x80u8 } else { 0 };
|
||||
// Use i32 to avoid overflow when sample == i16::MIN (-32768).
|
||||
let mut s = (sample as i32).unsigned_abs().min(CLIP as u32) as i16;
|
||||
s += BIAS;
|
||||
let mut exp = 7u8;
|
||||
let mut mask = 0x4000i16;
|
||||
while exp > 0 && (s & mask) == 0 { exp -= 1; mask >>= 1; }
|
||||
let mantissa = ((s >> (exp + 3)) & 0x0f) as u8;
|
||||
!(sign | (exp << 4) | mantissa)
|
||||
}
|
||||
|
||||
fn mulaw_decode(mulaw: u8) -> i16 {
|
||||
let v = !mulaw;
|
||||
let sign = v & 0x80;
|
||||
let exp = (v >> 4) & 0x07;
|
||||
let mantissa = v & 0x0f;
|
||||
let mut sample = (((mantissa as i16) << 4) + 0x84) << exp;
|
||||
sample -= 0x84;
|
||||
if sign != 0 { -sample } else { sample }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// G.711 A-law (PCMA)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn alaw_encode(sample: i16) -> u8 {
|
||||
let sign = if sample >= 0 { 0x80u8 } else { 0 };
|
||||
// Use i32 to avoid overflow when sample == i16::MIN (-32768).
|
||||
let s = (sample as i32).unsigned_abs().min(32767) as i16;
|
||||
let mut exp = 7u8;
|
||||
let mut mask = 0x4000i16;
|
||||
while exp > 0 && (s & mask) == 0 { exp -= 1; mask >>= 1; }
|
||||
let mantissa = if exp > 0 { ((s >> (exp + 3)) & 0x0f) as u8 } else { ((s >> 4) & 0x0f) as u8 };
|
||||
(sign | (exp << 4) | mantissa) ^ 0x55
|
||||
}
|
||||
|
||||
fn alaw_decode(alaw: u8) -> i16 {
|
||||
let v = alaw ^ 0x55;
|
||||
let sign = v & 0x80;
|
||||
let exp = (v >> 4) & 0x07;
|
||||
let mantissa = v & 0x0f;
|
||||
let sample = if exp == 0 {
|
||||
((mantissa as i16) << 4) + 8
|
||||
} else {
|
||||
(((mantissa as i16) << 4) + 0x108) << (exp - 1)
|
||||
};
|
||||
if sign != 0 { sample } else { -sample }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main loop
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Resolve a session: if session_id is provided, look it up in the sessions map;
|
||||
/// otherwise fall back to the default state (backward compat with `init`).
|
||||
fn get_session<'a>(
|
||||
sessions: &'a mut HashMap<String, TranscodeState>,
|
||||
default: &'a mut Option<TranscodeState>,
|
||||
params: &serde_json::Value,
|
||||
) -> Option<&'a mut TranscodeState> {
|
||||
if let Some(sid) = params.get("session_id").and_then(|v| v.as_str()) {
|
||||
sessions.get_mut(sid)
|
||||
} else {
|
||||
default.as_mut()
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let stdin = io::stdin();
|
||||
let stdout = io::stdout();
|
||||
let mut out = io::BufWriter::new(stdout.lock());
|
||||
|
||||
let _ = writeln!(out, r#"{{"event":"ready","data":{{}}}}"#);
|
||||
let _ = out.flush();
|
||||
|
||||
// Default state for backward-compat `init` (no session_id).
|
||||
let mut default_state: Option<TranscodeState> = None;
|
||||
// Per-session codec state for concurrent call isolation.
|
||||
let mut sessions: HashMap<String, TranscodeState> = HashMap::new();
|
||||
|
||||
for line in stdin.lock().lines() {
|
||||
let line = match line {
|
||||
Ok(l) if !l.trim().is_empty() => l,
|
||||
Ok(_) => continue,
|
||||
Err(_) => break,
|
||||
};
|
||||
|
||||
let req: Request = match serde_json::from_str(&line) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
respond(&mut out, "", false, None, Some(&format!("parse: {e}")));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match req.method.as_str() {
|
||||
// Backward-compat: init the default (shared) session.
|
||||
"init" => {
|
||||
match TranscodeState::new() {
|
||||
Ok(s) => {
|
||||
default_state = Some(s);
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
}
|
||||
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
|
||||
}
|
||||
}
|
||||
|
||||
// Create an isolated session with its own codec state.
|
||||
"create_session" => {
|
||||
let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => { respond(&mut out, &req.id, false, None, Some("missing session_id")); continue; }
|
||||
};
|
||||
if sessions.contains_key(&session_id) {
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
continue;
|
||||
}
|
||||
match TranscodeState::new() {
|
||||
Ok(s) => {
|
||||
sessions.insert(session_id, s);
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
}
|
||||
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
|
||||
}
|
||||
}
|
||||
|
||||
// Destroy a session, freeing its codec state.
|
||||
"destroy_session" => {
|
||||
let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => { respond(&mut out, &req.id, false, None, Some("missing session_id")); continue; }
|
||||
};
|
||||
sessions.remove(session_id);
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
}
|
||||
|
||||
// Transcode: uses session_id if provided, else default state.
|
||||
"transcode" => {
|
||||
let st = match get_session(&mut sessions, &mut default_state, &req.params) {
|
||||
Some(s) => s,
|
||||
None => { respond(&mut out, &req.id, false, None, Some("not initialized (no session or default state)")); continue; }
|
||||
};
|
||||
let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => { respond(&mut out, &req.id, false, None, Some("missing data_b64")); continue; }
|
||||
};
|
||||
let from_pt = req.params.get("from_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
|
||||
let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
|
||||
let direction = req.params.get("direction").and_then(|v| v.as_str());
|
||||
|
||||
let data = match B64.decode(data_b64) {
|
||||
Ok(b) => b,
|
||||
Err(e) => { respond(&mut out, &req.id, false, None, Some(&format!("b64: {e}"))); continue; }
|
||||
};
|
||||
|
||||
match st.transcode(&data, from_pt, to_pt, direction) {
|
||||
Ok(result) => {
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({ "data_b64": B64.encode(&result) })), None);
|
||||
}
|
||||
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
|
||||
}
|
||||
}
|
||||
|
||||
// Encode raw 16-bit PCM to a target codec.
|
||||
// Params: data_b64 (raw PCM bytes, 16-bit LE), sample_rate (input Hz), to_pt
|
||||
// Optional: session_id for isolated codec state.
|
||||
"encode_pcm" => {
|
||||
let st = match get_session(&mut sessions, &mut default_state, &req.params) {
|
||||
Some(s) => s,
|
||||
None => { respond(&mut out, &req.id, false, None, Some("not initialized (no session or default state)")); continue; }
|
||||
};
|
||||
let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => { respond(&mut out, &req.id, false, None, Some("missing data_b64")); continue; }
|
||||
};
|
||||
let sample_rate = req.params.get("sample_rate").and_then(|v| v.as_u64()).unwrap_or(22050) as u32;
|
||||
let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(9) as u8;
|
||||
|
||||
let data = match B64.decode(data_b64) {
|
||||
Ok(b) => b,
|
||||
Err(e) => { respond(&mut out, &req.id, false, None, Some(&format!("b64: {e}"))); continue; }
|
||||
};
|
||||
|
||||
if data.len() % 2 != 0 {
|
||||
respond(&mut out, &req.id, false, None, Some("PCM data has odd byte count (expected 16-bit LE samples)"));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Convert raw bytes to i16 samples.
|
||||
let pcm: Vec<i16> = data.chunks_exact(2)
|
||||
.map(|c| i16::from_le_bytes([c[0], c[1]]))
|
||||
.collect();
|
||||
|
||||
// Resample to target codec's sample rate.
|
||||
let target_rate = codec_sample_rate(to_pt);
|
||||
let resampled = match st.resample(&pcm, sample_rate, target_rate) {
|
||||
Ok(r) => r,
|
||||
Err(e) => { respond(&mut out, &req.id, false, None, Some(&e)); continue; }
|
||||
};
|
||||
|
||||
// Encode to target codec (reuse encode_from_pcm).
|
||||
match st.encode_from_pcm(&resampled, to_pt) {
|
||||
Ok(encoded) => {
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({ "data_b64": B64.encode(&encoded) })), None);
|
||||
}
|
||||
Err(e) => { respond(&mut out, &req.id, false, None, Some(&e)); continue; }
|
||||
}
|
||||
}
|
||||
|
||||
// Legacy commands (kept for backward compat).
|
||||
"encode" | "decode" => {
|
||||
respond(&mut out, &req.id, false, None, Some("use 'transcode' command instead"));
|
||||
}
|
||||
|
||||
_ => respond(&mut out, &req.id, false, None, Some(&format!("unknown: {}", req.method))),
|
||||
}
|
||||
}
|
||||
}
|
||||
26
rust/crates/proxy-engine/Cargo.toml
Normal file
26
rust/crates/proxy-engine/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "proxy-engine"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "proxy-engine"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
codec-lib = { path = "../codec-lib" }
|
||||
sip-proto = { path = "../sip-proto" }
|
||||
nnnoiseless = "0.5"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
base64 = "0.22"
|
||||
regex-lite = "0.1"
|
||||
webrtc = "0.8"
|
||||
rand = "0.8"
|
||||
hound = "3.5"
|
||||
kokoro-tts = { version = "0.3", default-features = false, features = ["use-cmudict"] }
|
||||
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
|
||||
"std", "download-binaries", "copy-dylibs", "ndarray",
|
||||
"tls-native-vendored"
|
||||
] }
|
||||
243
rust/crates/proxy-engine/src/audio_player.rs
Normal file
243
rust/crates/proxy-engine/src/audio_player.rs
Normal file
@@ -0,0 +1,243 @@
|
||||
//! Audio player — reads a WAV file and streams it as RTP packets.
|
||||
//! Also provides prompt preparation for the leg interaction system.
|
||||
|
||||
use crate::rtp::{build_rtp_header, rtp_clock_increment};
|
||||
use codec_lib::{codec_sample_rate, TranscodeState};
|
||||
use std::net::SocketAddr;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::time::{self, Duration};
|
||||
|
||||
/// Mixing sample rate used by the mixer (must stay in sync with mixer::MIX_RATE).
|
||||
const MIX_RATE: u32 = 48000;
|
||||
/// Samples per 20ms frame at the mixing rate.
|
||||
const MIX_FRAME_SIZE: usize = 960;
|
||||
|
||||
/// Play a WAV file as RTP to a destination.
|
||||
/// Returns when playback is complete.
|
||||
pub async fn play_wav_file(
|
||||
file_path: &str,
|
||||
socket: Arc<UdpSocket>,
|
||||
dest: SocketAddr,
|
||||
codec_pt: u8,
|
||||
ssrc: u32,
|
||||
) -> Result<u32, String> {
|
||||
let path = Path::new(file_path);
|
||||
if !path.exists() {
|
||||
return Err(format!("WAV file not found: {file_path}"));
|
||||
}
|
||||
|
||||
// Read WAV file.
|
||||
let mut reader =
|
||||
hound::WavReader::open(path).map_err(|e| format!("open WAV {file_path}: {e}"))?;
|
||||
let spec = reader.spec();
|
||||
let wav_rate = spec.sample_rate;
|
||||
|
||||
// Read all samples as i16.
|
||||
let samples: Vec<i16> = if spec.bits_per_sample == 16 {
|
||||
reader.samples::<i16>().filter_map(|s| s.ok()).collect()
|
||||
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
|
||||
reader
|
||||
.samples::<f32>()
|
||||
.filter_map(|s| s.ok())
|
||||
.map(|s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect()
|
||||
} else {
|
||||
return Err(format!(
|
||||
"unsupported WAV format: {}bit {:?}",
|
||||
spec.bits_per_sample, spec.sample_format
|
||||
));
|
||||
};
|
||||
|
||||
if samples.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Create codec state for encoding.
|
||||
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
||||
|
||||
// Resample to target codec rate.
|
||||
let target_rate = codec_sample_rate(codec_pt);
|
||||
let resampled = if wav_rate != target_rate {
|
||||
transcoder
|
||||
.resample(&samples, wav_rate, target_rate)
|
||||
.map_err(|e| format!("resample: {e}"))?
|
||||
} else {
|
||||
samples
|
||||
};
|
||||
|
||||
// Calculate frame size (20ms of audio at target rate).
|
||||
let frame_samples = (target_rate as usize) / 50; // 20ms = 1/50 second
|
||||
|
||||
// Stream as RTP at 20ms intervals.
|
||||
let mut seq: u16 = 0;
|
||||
let mut ts: u32 = 0;
|
||||
let mut offset = 0;
|
||||
let mut interval = time::interval(Duration::from_millis(20));
|
||||
let mut frames_sent = 0u32;
|
||||
|
||||
while offset < resampled.len() {
|
||||
interval.tick().await;
|
||||
|
||||
let end = (offset + frame_samples).min(resampled.len());
|
||||
let frame = &resampled[offset..end];
|
||||
|
||||
// Pad short final frame with silence.
|
||||
let frame_data = if frame.len() < frame_samples {
|
||||
let mut padded = frame.to_vec();
|
||||
padded.resize(frame_samples, 0);
|
||||
padded
|
||||
} else {
|
||||
frame.to_vec()
|
||||
};
|
||||
|
||||
// Encode to target codec.
|
||||
let encoded = match transcoder.encode_from_pcm(&frame_data, codec_pt) {
|
||||
Ok(e) if !e.is_empty() => e,
|
||||
_ => {
|
||||
offset += frame_samples;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Build RTP packet.
|
||||
let header = build_rtp_header(codec_pt, seq, ts, ssrc);
|
||||
let mut packet = header.to_vec();
|
||||
packet.extend_from_slice(&encoded);
|
||||
|
||||
let _ = socket.send_to(&packet, dest).await;
|
||||
|
||||
seq = seq.wrapping_add(1);
|
||||
ts = ts.wrapping_add(rtp_clock_increment(codec_pt));
|
||||
offset += frame_samples;
|
||||
frames_sent += 1;
|
||||
}
|
||||
|
||||
Ok(frames_sent)
|
||||
}
|
||||
|
||||
/// Generate and play a beep tone (sine wave) as RTP.
|
||||
pub async fn play_beep(
|
||||
socket: Arc<UdpSocket>,
|
||||
dest: SocketAddr,
|
||||
codec_pt: u8,
|
||||
ssrc: u32,
|
||||
start_seq: u16,
|
||||
start_ts: u32,
|
||||
freq_hz: u32,
|
||||
duration_ms: u32,
|
||||
) -> Result<(u16, u32), String> {
|
||||
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
||||
let target_rate = codec_sample_rate(codec_pt);
|
||||
let frame_samples = (target_rate as usize) / 50;
|
||||
let total_samples = (target_rate as usize * duration_ms as usize) / 1000;
|
||||
|
||||
// Generate sine wave.
|
||||
let amplitude = 16000i16;
|
||||
let sine: Vec<i16> = (0..total_samples)
|
||||
.map(|i| {
|
||||
let t = i as f64 / target_rate as f64;
|
||||
(amplitude as f64 * (2.0 * std::f64::consts::PI * freq_hz as f64 * t).sin()) as i16
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut seq = start_seq;
|
||||
let mut ts = start_ts;
|
||||
let mut offset = 0;
|
||||
let mut interval = time::interval(Duration::from_millis(20));
|
||||
|
||||
while offset < sine.len() {
|
||||
interval.tick().await;
|
||||
|
||||
let end = (offset + frame_samples).min(sine.len());
|
||||
let mut frame = sine[offset..end].to_vec();
|
||||
frame.resize(frame_samples, 0);
|
||||
|
||||
let encoded = match transcoder.encode_from_pcm(&frame, codec_pt) {
|
||||
Ok(e) if !e.is_empty() => e,
|
||||
_ => {
|
||||
offset += frame_samples;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let header = build_rtp_header(codec_pt, seq, ts, ssrc);
|
||||
let mut packet = header.to_vec();
|
||||
packet.extend_from_slice(&encoded);
|
||||
let _ = socket.send_to(&packet, dest).await;
|
||||
|
||||
seq = seq.wrapping_add(1);
|
||||
ts = ts.wrapping_add(rtp_clock_increment(codec_pt));
|
||||
offset += frame_samples;
|
||||
}
|
||||
|
||||
Ok((seq, ts))
|
||||
}
|
||||
|
||||
/// Load a WAV file and split it into 20ms f32 PCM frames at 48kHz.
|
||||
/// Used by the leg interaction system to prepare prompt audio for the mixer.
|
||||
pub fn load_prompt_pcm_frames(wav_path: &str) -> Result<Vec<Vec<f32>>, String> {
|
||||
let path = Path::new(wav_path);
|
||||
if !path.exists() {
|
||||
return Err(format!("WAV file not found: {wav_path}"));
|
||||
}
|
||||
|
||||
let mut reader =
|
||||
hound::WavReader::open(path).map_err(|e| format!("open WAV {wav_path}: {e}"))?;
|
||||
let spec = reader.spec();
|
||||
let wav_rate = spec.sample_rate;
|
||||
|
||||
// Read all samples as f32 in [-1.0, 1.0].
|
||||
let samples: Vec<f32> = if spec.bits_per_sample == 16 {
|
||||
reader
|
||||
.samples::<i16>()
|
||||
.filter_map(|s| s.ok())
|
||||
.map(|s| s as f32 / 32768.0)
|
||||
.collect()
|
||||
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
|
||||
reader.samples::<f32>().filter_map(|s| s.ok()).collect()
|
||||
} else {
|
||||
return Err(format!(
|
||||
"unsupported WAV format: {}bit {:?}",
|
||||
spec.bits_per_sample, spec.sample_format
|
||||
));
|
||||
};
|
||||
|
||||
if samples.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
pcm_to_mix_frames(&samples, wav_rate)
|
||||
}
|
||||
|
||||
/// Convert PCM samples at an arbitrary rate into 48kHz 20ms mixer frames.
|
||||
pub fn pcm_to_mix_frames(samples: &[f32], sample_rate: u32) -> Result<Vec<Vec<f32>>, String> {
|
||||
if samples.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Resample to MIX_RATE (48kHz) if needed.
|
||||
let resampled = if sample_rate != MIX_RATE {
|
||||
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
||||
transcoder
|
||||
.resample_f32(samples, sample_rate, MIX_RATE)
|
||||
.map_err(|e| format!("resample: {e}"))?
|
||||
} else {
|
||||
samples.to_vec()
|
||||
};
|
||||
|
||||
// Split into MIX_FRAME_SIZE (960) sample frames.
|
||||
let mut frames = Vec::new();
|
||||
let mut offset = 0;
|
||||
while offset < resampled.len() {
|
||||
let end = (offset + MIX_FRAME_SIZE).min(resampled.len());
|
||||
let mut frame = resampled[offset..end].to_vec();
|
||||
// Pad short final frame with silence.
|
||||
frame.resize(MIX_FRAME_SIZE, 0.0);
|
||||
frames.push(frame);
|
||||
offset += MIX_FRAME_SIZE;
|
||||
}
|
||||
|
||||
Ok(frames)
|
||||
}
|
||||
245
rust/crates/proxy-engine/src/call.rs
Normal file
245
rust/crates/proxy-engine/src/call.rs
Normal file
@@ -0,0 +1,245 @@
|
||||
//! Call hub — owns N legs and a mixer task.
|
||||
//!
|
||||
//! Every call has a central mixer that provides mix-minus audio to all
|
||||
//! participants. Legs can be added and removed dynamically mid-call.
|
||||
|
||||
use crate::mixer::{MixerCommand, RtpPacket};
|
||||
use crate::sip_leg::SipLeg;
|
||||
use sip_proto::message::SipMessage;
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
pub type LegId = String;
|
||||
|
||||
/// Call state machine.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CallState {
|
||||
SettingUp,
|
||||
Ringing,
|
||||
Connected,
|
||||
Voicemail,
|
||||
Ivr,
|
||||
Terminated,
|
||||
}
|
||||
|
||||
impl CallState {
|
||||
/// Wire-format string for events/dashboards. Not currently emitted —
|
||||
/// call state changes flow as typed events (`call_answered`, etc.) —
|
||||
/// but kept for future status-snapshot work.
|
||||
#[allow(dead_code)]
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::SettingUp => "setting-up",
|
||||
Self::Ringing => "ringing",
|
||||
Self::Connected => "connected",
|
||||
Self::Voicemail => "voicemail",
|
||||
Self::Ivr => "ivr",
|
||||
Self::Terminated => "terminated",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CallDirection {
|
||||
Inbound,
|
||||
Outbound,
|
||||
}
|
||||
|
||||
impl CallDirection {
|
||||
/// Wire-format string. See CallState::as_str.
|
||||
#[allow(dead_code)]
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Inbound => "inbound",
|
||||
Self::Outbound => "outbound",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The type of a call leg.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum LegKind {
|
||||
SipProvider,
|
||||
SipDevice,
|
||||
WebRtc,
|
||||
/// Voicemail playback, IVR prompt playback, recording — not yet wired up
|
||||
/// as a distinct leg kind (those paths currently use the mixer's role
|
||||
/// system instead). Kept behind allow so adding a real media leg later
|
||||
/// doesn't require re-introducing the variant.
|
||||
#[allow(dead_code)]
|
||||
Media,
|
||||
Tool, // observer leg for recording, transcription, etc.
|
||||
}
|
||||
|
||||
impl LegKind {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::SipProvider => "sip-provider",
|
||||
Self::SipDevice => "sip-device",
|
||||
Self::WebRtc => "webrtc",
|
||||
Self::Media => "media",
|
||||
Self::Tool => "tool",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-leg state.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum LegState {
|
||||
Inviting,
|
||||
Ringing,
|
||||
Connected,
|
||||
Terminated,
|
||||
}
|
||||
|
||||
impl LegState {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Inviting => "inviting",
|
||||
Self::Ringing => "ringing",
|
||||
Self::Connected => "connected",
|
||||
Self::Terminated => "terminated",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a single leg in a call.
|
||||
pub struct LegInfo {
|
||||
pub id: LegId,
|
||||
pub kind: LegKind,
|
||||
pub state: LegState,
|
||||
pub codec_pt: u8,
|
||||
|
||||
/// For SIP legs: the SIP dialog manager (handles 407 auth, BYE, etc).
|
||||
pub sip_leg: Option<SipLeg>,
|
||||
/// For SIP legs: the SIP Call-ID for message routing.
|
||||
pub sip_call_id: Option<String>,
|
||||
/// For WebRTC legs: the session ID in WebRtcEngine.
|
||||
///
|
||||
/// Populated at leg creation but not yet consumed by the hub —
|
||||
/// WebRTC session lookup currently goes through the session registry
|
||||
/// directly. Kept for introspection/debugging.
|
||||
#[allow(dead_code)]
|
||||
pub webrtc_session_id: Option<String>,
|
||||
/// The RTP socket allocated for this leg.
|
||||
pub rtp_socket: Option<Arc<UdpSocket>>,
|
||||
/// The RTP port number.
|
||||
pub rtp_port: u16,
|
||||
/// Public IP to advertise in SDP/Record-Route when THIS leg is the
|
||||
/// destination of a rewrite. Populated only for provider legs; `None`
|
||||
/// for LAN SIP devices, WebRTC browsers, media, and tool legs (which
|
||||
/// are reachable via `lan_ip`). See `route_passthrough_message` for
|
||||
/// the per-destination advertise-IP logic.
|
||||
pub public_ip: Option<String>,
|
||||
/// The remote media endpoint (learned from SDP or address learning).
|
||||
pub remote_media: Option<SocketAddr>,
|
||||
/// SIP signaling address (provider or device).
|
||||
pub signaling_addr: Option<SocketAddr>,
|
||||
|
||||
/// Flexible key-value metadata (consent state, tool config, etc.).
|
||||
/// Persisted into call history on call end.
|
||||
pub metadata: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
/// A multiparty call with N legs and a central mixer.
|
||||
pub struct Call {
|
||||
// Duplicated from the HashMap key in CallManager. Kept for future
|
||||
// status-snapshot work.
|
||||
#[allow(dead_code)]
|
||||
pub id: String,
|
||||
pub state: CallState,
|
||||
// Populated at call creation but not currently consumed — dashboard
|
||||
// pull snapshots are gone (push events only).
|
||||
#[allow(dead_code)]
|
||||
pub direction: CallDirection,
|
||||
pub created_at: Instant,
|
||||
|
||||
// Metadata.
|
||||
pub caller_number: Option<String>,
|
||||
pub callee_number: Option<String>,
|
||||
#[allow(dead_code)]
|
||||
pub provider_id: String,
|
||||
|
||||
/// Original INVITE from the device (for device-originated outbound calls).
|
||||
/// Used to construct proper 180/200/error responses back to the device.
|
||||
pub device_invite: Option<SipMessage>,
|
||||
|
||||
/// All legs in this call, keyed by leg ID.
|
||||
pub legs: HashMap<LegId, LegInfo>,
|
||||
|
||||
/// Channel to send commands to the mixer task.
|
||||
pub mixer_cmd_tx: mpsc::Sender<MixerCommand>,
|
||||
|
||||
/// Handle to the mixer task (aborted on call teardown).
|
||||
mixer_task: Option<JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl Call {
|
||||
pub fn new(
|
||||
id: String,
|
||||
direction: CallDirection,
|
||||
provider_id: String,
|
||||
mixer_cmd_tx: mpsc::Sender<MixerCommand>,
|
||||
mixer_task: JoinHandle<()>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
state: CallState::SettingUp,
|
||||
direction,
|
||||
created_at: Instant::now(),
|
||||
caller_number: None,
|
||||
callee_number: None,
|
||||
provider_id,
|
||||
device_invite: None,
|
||||
legs: HashMap::new(),
|
||||
mixer_cmd_tx,
|
||||
mixer_task: Some(mixer_task),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a leg to the mixer. Sends the AddLeg command with channel endpoints.
|
||||
pub async fn add_leg_to_mixer(
|
||||
&self,
|
||||
leg_id: &str,
|
||||
codec_pt: u8,
|
||||
inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
) {
|
||||
let _ = self
|
||||
.mixer_cmd_tx
|
||||
.send(MixerCommand::AddLeg {
|
||||
leg_id: leg_id.to_string(),
|
||||
codec_pt,
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Remove a leg from the mixer.
|
||||
pub async fn remove_leg_from_mixer(&self, leg_id: &str) {
|
||||
let _ = self
|
||||
.mixer_cmd_tx
|
||||
.send(MixerCommand::RemoveLeg {
|
||||
leg_id: leg_id.to_string(),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
pub fn duration_secs(&self) -> u64 {
|
||||
self.created_at.elapsed().as_secs()
|
||||
}
|
||||
|
||||
/// Shut down the mixer and abort its task.
|
||||
pub async fn shutdown_mixer(&mut self) {
|
||||
let _ = self.mixer_cmd_tx.send(MixerCommand::Shutdown).await;
|
||||
if let Some(handle) = self.mixer_task.take() {
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
2263
rust/crates/proxy-engine/src/call_manager.rs
Normal file
2263
rust/crates/proxy-engine/src/call_manager.rs
Normal file
File diff suppressed because it is too large
Load Diff
704
rust/crates/proxy-engine/src/config.rs
Normal file
704
rust/crates/proxy-engine/src/config.rs
Normal file
@@ -0,0 +1,704 @@
|
||||
//! Configuration types received from the TypeScript control plane.
|
||||
//!
|
||||
//! TypeScript loads config from `.nogit/config.json` and sends it to the
|
||||
//! proxy engine via the `configure` command. These types mirror the TS interfaces.
|
||||
|
||||
use serde::Deserialize;
|
||||
use sip_proto::message::SipMessage;
|
||||
use std::net::SocketAddr;
|
||||
|
||||
/// Network endpoint.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Endpoint {
|
||||
pub address: String,
|
||||
pub port: u16,
|
||||
}
|
||||
|
||||
impl Endpoint {
|
||||
/// Resolve to a SocketAddr. Handles both IP addresses and hostnames.
|
||||
pub fn to_socket_addr(&self) -> Option<SocketAddr> {
|
||||
// Try direct parse first (IP address).
|
||||
if let Ok(addr) = format!("{}:{}", self.address, self.port).parse() {
|
||||
return Some(addr);
|
||||
}
|
||||
// DNS resolution for hostnames.
|
||||
use std::net::ToSocketAddrs;
|
||||
format!("{}:{}", self.address, self.port)
|
||||
.to_socket_addrs()
|
||||
.ok()
|
||||
.and_then(|mut addrs| addrs.next())
|
||||
}
|
||||
}
|
||||
|
||||
/// Provider quirks for codec/protocol workarounds.
|
||||
//
|
||||
// Deserialized from provider config for TS parity. Early-media silence
|
||||
// injection and related workarounds are not yet ported to the Rust engine,
|
||||
// so every field is populated by serde but not yet consumed.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Quirks {
|
||||
#[serde(rename = "earlyMediaSilence")]
|
||||
pub early_media_silence: bool,
|
||||
#[serde(rename = "silencePayloadType")]
|
||||
pub silence_payload_type: Option<u8>,
|
||||
#[serde(rename = "silenceMaxPackets")]
|
||||
pub silence_max_packets: Option<u32>,
|
||||
}
|
||||
|
||||
/// A SIP trunk provider configuration.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ProviderConfig {
|
||||
pub id: String,
|
||||
// UI label — populated by serde for parity with the TS config, not
|
||||
// consumed at runtime.
|
||||
#[allow(dead_code)]
|
||||
#[serde(rename = "displayName")]
|
||||
pub display_name: String,
|
||||
pub domain: String,
|
||||
#[serde(rename = "outboundProxy")]
|
||||
pub outbound_proxy: Endpoint,
|
||||
pub username: String,
|
||||
pub password: String,
|
||||
#[serde(rename = "registerIntervalSec")]
|
||||
pub register_interval_sec: u32,
|
||||
pub codecs: Vec<u8>,
|
||||
// Workaround knobs populated by serde but not yet acted upon — see Quirks.
|
||||
#[allow(dead_code)]
|
||||
pub quirks: Quirks,
|
||||
}
|
||||
|
||||
/// A SIP device (phone) configuration.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct DeviceConfig {
|
||||
pub id: String,
|
||||
#[serde(rename = "displayName")]
|
||||
pub display_name: String,
|
||||
#[serde(rename = "expectedAddress")]
|
||||
pub expected_address: String,
|
||||
pub extension: String,
|
||||
}
|
||||
|
||||
/// Route match criteria.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct RouteMatch {
|
||||
pub direction: String, // "inbound" | "outbound"
|
||||
#[serde(rename = "numberPattern")]
|
||||
pub number_pattern: Option<String>,
|
||||
#[serde(rename = "callerPattern")]
|
||||
pub caller_pattern: Option<String>,
|
||||
#[serde(rename = "sourceProvider")]
|
||||
pub source_provider: Option<String>,
|
||||
#[serde(rename = "sourceDevice")]
|
||||
pub source_device: Option<String>,
|
||||
}
|
||||
|
||||
/// Route action.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
// Several fields (voicemail_box, ivr_menu_id, no_answer_timeout) are read
|
||||
// by resolve_inbound_route but not yet honored downstream — see the
|
||||
// multi-target TODO in CallManager::create_inbound_call.
|
||||
#[allow(dead_code)]
|
||||
pub struct RouteAction {
|
||||
pub targets: Option<Vec<String>>,
|
||||
#[serde(rename = "ringBrowsers")]
|
||||
pub ring_browsers: Option<bool>,
|
||||
#[serde(rename = "voicemailBox")]
|
||||
pub voicemail_box: Option<String>,
|
||||
#[serde(rename = "ivrMenuId")]
|
||||
pub ivr_menu_id: Option<String>,
|
||||
#[serde(rename = "noAnswerTimeout")]
|
||||
pub no_answer_timeout: Option<u32>,
|
||||
pub provider: Option<String>,
|
||||
#[serde(rename = "failoverProviders")]
|
||||
pub failover_providers: Option<Vec<String>>,
|
||||
#[serde(rename = "stripPrefix")]
|
||||
pub strip_prefix: Option<String>,
|
||||
#[serde(rename = "prependPrefix")]
|
||||
pub prepend_prefix: Option<String>,
|
||||
}
|
||||
|
||||
/// A routing rule.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Route {
|
||||
// `id` and `name` are UI identifiers, populated by serde but not
|
||||
// consumed by the resolvers.
|
||||
#[allow(dead_code)]
|
||||
pub id: String,
|
||||
#[allow(dead_code)]
|
||||
pub name: String,
|
||||
pub priority: i32,
|
||||
pub enabled: bool,
|
||||
#[serde(rename = "match")]
|
||||
pub match_criteria: RouteMatch,
|
||||
pub action: RouteAction,
|
||||
}
|
||||
|
||||
/// Proxy network settings.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ProxyConfig {
|
||||
#[serde(rename = "lanIp")]
|
||||
pub lan_ip: String,
|
||||
#[serde(rename = "lanPort")]
|
||||
pub lan_port: u16,
|
||||
#[serde(rename = "publicIpSeed")]
|
||||
pub public_ip_seed: Option<String>,
|
||||
#[serde(rename = "rtpPortRange")]
|
||||
pub rtp_port_range: RtpPortRange,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct RtpPortRange {
|
||||
pub min: u16,
|
||||
pub max: u16,
|
||||
}
|
||||
|
||||
/// Full application config pushed from TypeScript.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct AppConfig {
|
||||
pub proxy: ProxyConfig,
|
||||
pub providers: Vec<ProviderConfig>,
|
||||
pub devices: Vec<DeviceConfig>,
|
||||
pub routing: RoutingConfig,
|
||||
#[serde(default)]
|
||||
pub voiceboxes: Vec<VoiceboxConfig>,
|
||||
#[serde(default)]
|
||||
pub ivr: Option<IvrConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct RoutingConfig {
|
||||
pub routes: Vec<Route>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Voicebox config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct VoiceboxConfig {
|
||||
pub id: String,
|
||||
#[serde(default)]
|
||||
pub enabled: bool,
|
||||
#[serde(rename = "greetingText")]
|
||||
pub greeting_text: Option<String>,
|
||||
#[serde(rename = "greetingVoice")]
|
||||
pub greeting_voice: Option<String>,
|
||||
#[serde(rename = "greetingWavPath")]
|
||||
pub greeting_wav_path: Option<String>,
|
||||
#[serde(rename = "maxRecordingSec")]
|
||||
pub max_recording_sec: Option<u32>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// IVR config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct IvrConfig {
|
||||
pub enabled: bool,
|
||||
pub menus: Vec<IvrMenuConfig>,
|
||||
#[serde(rename = "entryMenuId")]
|
||||
pub entry_menu_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct IvrMenuConfig {
|
||||
pub id: String,
|
||||
#[serde(rename = "promptText")]
|
||||
pub prompt_text: String,
|
||||
#[serde(rename = "promptVoice")]
|
||||
pub prompt_voice: Option<String>,
|
||||
pub entries: Vec<IvrMenuEntry>,
|
||||
#[serde(rename = "timeoutSec")]
|
||||
pub timeout_sec: Option<u32>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct IvrMenuEntry {
|
||||
pub digit: String,
|
||||
pub action: String,
|
||||
pub target: Option<String>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pattern matching (ported from ts/config.ts)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Extract the URI user part and normalize phone-like identities for routing.
|
||||
///
|
||||
/// This keeps inbound route matching stable across provider-specific URI shapes,
|
||||
/// e.g. `sip:+49 421 219694@trunk.example` and `sip:0049421219694@trunk.example`
|
||||
/// both normalize to `+49421219694`.
|
||||
pub fn normalize_routing_identity(value: &str) -> String {
|
||||
let extracted = SipMessage::extract_uri_user(value).unwrap_or(value).trim();
|
||||
if extracted.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let mut digits = String::new();
|
||||
let mut saw_plus = false;
|
||||
|
||||
for (idx, ch) in extracted.chars().enumerate() {
|
||||
if ch.is_ascii_digit() {
|
||||
digits.push(ch);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ch == '+' && idx == 0 {
|
||||
saw_plus = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if matches!(ch, ' ' | '\t' | '-' | '.' | '/' | '(' | ')') {
|
||||
continue;
|
||||
}
|
||||
|
||||
return extracted.to_string();
|
||||
}
|
||||
|
||||
if digits.is_empty() {
|
||||
return extracted.to_string();
|
||||
}
|
||||
if saw_plus {
|
||||
return format!("+{digits}");
|
||||
}
|
||||
if digits.starts_with("00") && digits.len() > 2 {
|
||||
return format!("+{}", &digits[2..]);
|
||||
}
|
||||
|
||||
digits
|
||||
}
|
||||
|
||||
fn looks_like_phone_identity(value: &str) -> bool {
|
||||
let digits = value.chars().filter(|c| c.is_ascii_digit()).count();
|
||||
digits >= 6 && value.chars().all(|c| c.is_ascii_digit() || c == '+')
|
||||
}
|
||||
|
||||
/// Pick the best inbound called-number identity from common SIP headers.
|
||||
///
|
||||
/// Some providers deliver the DID in `To` / `P-Called-Party-ID` while the
|
||||
/// request URI contains an account username. Prefer a phone-like identity when
|
||||
/// present; otherwise fall back to the request URI user part.
|
||||
pub fn extract_inbound_called_number(msg: &SipMessage) -> String {
|
||||
let request_uri = normalize_routing_identity(msg.request_uri().unwrap_or(""));
|
||||
if looks_like_phone_identity(&request_uri) {
|
||||
return request_uri;
|
||||
}
|
||||
|
||||
for header_name in [
|
||||
"P-Called-Party-ID",
|
||||
"X-Called-Party-ID",
|
||||
"Diversion",
|
||||
"History-Info",
|
||||
"To",
|
||||
] {
|
||||
let candidate = normalize_routing_identity(msg.get_header(header_name).unwrap_or(""));
|
||||
if looks_like_phone_identity(&candidate) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
request_uri
|
||||
}
|
||||
|
||||
fn parse_numeric_range_value(value: &str) -> Option<(bool, &str)> {
|
||||
let trimmed = value.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (has_plus, digits) = if let Some(rest) = trimmed.strip_prefix('+') {
|
||||
(true, rest)
|
||||
} else {
|
||||
(false, trimmed)
|
||||
};
|
||||
|
||||
if digits.is_empty() || !digits.chars().all(|c| c.is_ascii_digit()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((has_plus, digits))
|
||||
}
|
||||
|
||||
fn matches_numeric_range_pattern(pattern: &str, value: &str) -> bool {
|
||||
let Some((start, end)) = pattern.split_once("..") else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let Some((start_plus, start_digits)) = parse_numeric_range_value(start) else {
|
||||
return false;
|
||||
};
|
||||
let Some((end_plus, end_digits)) = parse_numeric_range_value(end) else {
|
||||
return false;
|
||||
};
|
||||
let Some((value_plus, value_digits)) = parse_numeric_range_value(value) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
if start_plus != end_plus || value_plus != start_plus {
|
||||
return false;
|
||||
}
|
||||
if start_digits.len() != end_digits.len() || value_digits.len() != start_digits.len() {
|
||||
return false;
|
||||
}
|
||||
if start_digits > end_digits {
|
||||
return false;
|
||||
}
|
||||
|
||||
value_digits >= start_digits && value_digits <= end_digits
|
||||
}
|
||||
|
||||
/// Test a value against a pattern string.
|
||||
/// - None/empty: matches everything (wildcard)
|
||||
/// - `start..end`: numeric range match
|
||||
/// - Trailing '*': prefix match
|
||||
/// - Starts with '/': regex match
|
||||
/// - Otherwise: exact match
|
||||
pub fn matches_pattern(pattern: Option<&str>, value: &str) -> bool {
|
||||
let pattern = match pattern {
|
||||
None => return true,
|
||||
Some(p) if p.is_empty() => return true,
|
||||
Some(p) => p,
|
||||
};
|
||||
|
||||
// Prefix match: "+49*"
|
||||
if pattern.ends_with('*') {
|
||||
return value.starts_with(&pattern[..pattern.len() - 1]);
|
||||
}
|
||||
|
||||
if matches_numeric_range_pattern(pattern, value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Regex match: "/^\\+49/" or "/pattern/i"
|
||||
if pattern.starts_with('/') {
|
||||
if let Some(last_slash) = pattern[1..].rfind('/') {
|
||||
let re_str = &pattern[1..1 + last_slash];
|
||||
let flags = &pattern[2 + last_slash..];
|
||||
let case_insensitive = flags.contains('i');
|
||||
if let Ok(re) = if case_insensitive {
|
||||
regex_lite::Regex::new(&format!("(?i){re_str}"))
|
||||
} else {
|
||||
regex_lite::Regex::new(re_str)
|
||||
} {
|
||||
return re.is_match(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Exact match.
|
||||
value == pattern
|
||||
}
|
||||
|
||||
/// Result of resolving an outbound route.
|
||||
pub struct OutboundRouteResult {
|
||||
pub provider: ProviderConfig,
|
||||
// TODO: prefix rewriting is unfinished — this is computed but the
|
||||
// caller ignores it and uses the raw dialed number.
|
||||
#[allow(dead_code)]
|
||||
pub transformed_number: String,
|
||||
}
|
||||
|
||||
/// Result of resolving an inbound route.
|
||||
//
|
||||
// `device_ids`, `ring_all_devices`, and `ring_browsers` are consumed by
|
||||
// create_inbound_call.
|
||||
// The remaining fields (voicemail_box, ivr_menu_id, no_answer_timeout)
|
||||
// are resolved but not yet acted upon — see the multi-target TODO.
|
||||
#[allow(dead_code)]
|
||||
pub struct InboundRouteResult {
|
||||
pub device_ids: Vec<String>,
|
||||
pub ring_all_devices: bool,
|
||||
pub ring_browsers: bool,
|
||||
pub voicemail_box: Option<String>,
|
||||
pub ivr_menu_id: Option<String>,
|
||||
pub no_answer_timeout: Option<u32>,
|
||||
}
|
||||
|
||||
impl AppConfig {
|
||||
/// Resolve which provider to use for an outbound call.
|
||||
pub fn resolve_outbound_route(
|
||||
&self,
|
||||
dialed_number: &str,
|
||||
source_device_id: Option<&str>,
|
||||
is_provider_registered: &dyn Fn(&str) -> bool,
|
||||
) -> Option<OutboundRouteResult> {
|
||||
let mut routes: Vec<&Route> = self
|
||||
.routing
|
||||
.routes
|
||||
.iter()
|
||||
.filter(|r| r.enabled && r.match_criteria.direction == "outbound")
|
||||
.collect();
|
||||
routes.sort_by(|a, b| b.priority.cmp(&a.priority));
|
||||
|
||||
for route in &routes {
|
||||
let m = &route.match_criteria;
|
||||
|
||||
if !matches_pattern(m.number_pattern.as_deref(), dialed_number) {
|
||||
continue;
|
||||
}
|
||||
if let Some(sd) = &m.source_device {
|
||||
if source_device_id != Some(sd.as_str()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Find a registered provider.
|
||||
let mut candidates: Vec<&str> = Vec::new();
|
||||
if let Some(p) = &route.action.provider {
|
||||
candidates.push(p);
|
||||
}
|
||||
if let Some(fps) = &route.action.failover_providers {
|
||||
candidates.extend(fps.iter().map(|s| s.as_str()));
|
||||
}
|
||||
|
||||
for pid in candidates {
|
||||
let provider = match self.providers.iter().find(|p| p.id == pid) {
|
||||
Some(p) => p,
|
||||
None => continue,
|
||||
};
|
||||
if !is_provider_registered(pid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut num = dialed_number.to_string();
|
||||
if let Some(strip) = &route.action.strip_prefix {
|
||||
if num.starts_with(strip.as_str()) {
|
||||
num = num[strip.len()..].to_string();
|
||||
}
|
||||
}
|
||||
if let Some(prepend) = &route.action.prepend_prefix {
|
||||
num = format!("{prepend}{num}");
|
||||
}
|
||||
|
||||
return Some(OutboundRouteResult {
|
||||
provider: provider.clone(),
|
||||
transformed_number: num,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: first provider.
|
||||
self.providers.first().map(|p| OutboundRouteResult {
|
||||
provider: p.clone(),
|
||||
transformed_number: dialed_number.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Resolve which devices to ring for an inbound call.
|
||||
pub fn resolve_inbound_route(
|
||||
&self,
|
||||
provider_id: &str,
|
||||
called_number: &str,
|
||||
caller_number: &str,
|
||||
) -> Option<InboundRouteResult> {
|
||||
let mut routes: Vec<&Route> = self
|
||||
.routing
|
||||
.routes
|
||||
.iter()
|
||||
.filter(|r| r.enabled && r.match_criteria.direction == "inbound")
|
||||
.collect();
|
||||
routes.sort_by(|a, b| b.priority.cmp(&a.priority));
|
||||
|
||||
for route in &routes {
|
||||
let m = &route.match_criteria;
|
||||
|
||||
if let Some(sp) = &m.source_provider {
|
||||
if sp != provider_id {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if !matches_pattern(m.number_pattern.as_deref(), called_number) {
|
||||
continue;
|
||||
}
|
||||
if !matches_pattern(m.caller_pattern.as_deref(), caller_number) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let explicit_targets = route.action.targets.clone();
|
||||
return Some(InboundRouteResult {
|
||||
device_ids: explicit_targets.clone().unwrap_or_default(),
|
||||
ring_all_devices: explicit_targets.is_none(),
|
||||
ring_browsers: route.action.ring_browsers.unwrap_or(false),
|
||||
voicemail_box: route.action.voicemail_box.clone(),
|
||||
ivr_menu_id: route.action.ivr_menu_id.clone(),
|
||||
no_answer_timeout: route.action.no_answer_timeout,
|
||||
});
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn test_app_config(routes: Vec<Route>) -> AppConfig {
|
||||
AppConfig {
|
||||
proxy: ProxyConfig {
|
||||
lan_ip: "127.0.0.1".to_string(),
|
||||
lan_port: 5070,
|
||||
public_ip_seed: None,
|
||||
rtp_port_range: RtpPortRange {
|
||||
min: 20_000,
|
||||
max: 20_100,
|
||||
},
|
||||
},
|
||||
providers: vec![ProviderConfig {
|
||||
id: "provider-a".to_string(),
|
||||
display_name: "Provider A".to_string(),
|
||||
domain: "example.com".to_string(),
|
||||
outbound_proxy: Endpoint {
|
||||
address: "example.com".to_string(),
|
||||
port: 5060,
|
||||
},
|
||||
username: "user".to_string(),
|
||||
password: "pass".to_string(),
|
||||
register_interval_sec: 300,
|
||||
codecs: vec![9],
|
||||
quirks: Quirks {
|
||||
early_media_silence: false,
|
||||
silence_payload_type: None,
|
||||
silence_max_packets: None,
|
||||
},
|
||||
}],
|
||||
devices: vec![DeviceConfig {
|
||||
id: "desk".to_string(),
|
||||
display_name: "Desk".to_string(),
|
||||
expected_address: "127.0.0.1".to_string(),
|
||||
extension: "100".to_string(),
|
||||
}],
|
||||
routing: RoutingConfig { routes },
|
||||
voiceboxes: vec![],
|
||||
ivr: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_routing_identity_extracts_uri_user_and_phone_number() {
|
||||
assert_eq!(
|
||||
normalize_routing_identity("sip:0049 421 219694@voip.easybell.de"),
|
||||
"+49421219694"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_routing_identity("<tel:+49 (421) 219694>"),
|
||||
"+49421219694"
|
||||
);
|
||||
assert_eq!(normalize_routing_identity("sip:100@pbx.local"), "100");
|
||||
assert_eq!(normalize_routing_identity("sip:alice@pbx.local"), "alice");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_inbound_route_requires_explicit_match() {
|
||||
let cfg = test_app_config(vec![]);
|
||||
assert!(cfg
|
||||
.resolve_inbound_route("provider-a", "+49421219694", "+491701234567")
|
||||
.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_inbound_route_matches_per_number_on_shared_provider() {
|
||||
let cfg = test_app_config(vec![
|
||||
Route {
|
||||
id: "main".to_string(),
|
||||
name: "Main DID".to_string(),
|
||||
priority: 200,
|
||||
enabled: true,
|
||||
match_criteria: RouteMatch {
|
||||
direction: "inbound".to_string(),
|
||||
number_pattern: Some("+49421219694".to_string()),
|
||||
caller_pattern: None,
|
||||
source_provider: Some("provider-a".to_string()),
|
||||
source_device: None,
|
||||
},
|
||||
action: RouteAction {
|
||||
targets: Some(vec!["desk".to_string()]),
|
||||
ring_browsers: Some(true),
|
||||
voicemail_box: None,
|
||||
ivr_menu_id: None,
|
||||
no_answer_timeout: None,
|
||||
provider: None,
|
||||
failover_providers: None,
|
||||
strip_prefix: None,
|
||||
prepend_prefix: None,
|
||||
},
|
||||
},
|
||||
Route {
|
||||
id: "support".to_string(),
|
||||
name: "Support DID".to_string(),
|
||||
priority: 100,
|
||||
enabled: true,
|
||||
match_criteria: RouteMatch {
|
||||
direction: "inbound".to_string(),
|
||||
number_pattern: Some("+49421219695".to_string()),
|
||||
caller_pattern: None,
|
||||
source_provider: Some("provider-a".to_string()),
|
||||
source_device: None,
|
||||
},
|
||||
action: RouteAction {
|
||||
targets: None,
|
||||
ring_browsers: Some(false),
|
||||
voicemail_box: Some("support-box".to_string()),
|
||||
ivr_menu_id: None,
|
||||
no_answer_timeout: Some(20),
|
||||
provider: None,
|
||||
failover_providers: None,
|
||||
strip_prefix: None,
|
||||
prepend_prefix: None,
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
let main = cfg
|
||||
.resolve_inbound_route("provider-a", "+49421219694", "+491701234567")
|
||||
.expect("main DID should match");
|
||||
assert_eq!(main.device_ids, vec!["desk".to_string()]);
|
||||
assert!(main.ring_browsers);
|
||||
|
||||
let support = cfg
|
||||
.resolve_inbound_route("provider-a", "+49421219695", "+491701234567")
|
||||
.expect("support DID should match");
|
||||
assert_eq!(support.voicemail_box.as_deref(), Some("support-box"));
|
||||
assert_eq!(support.no_answer_timeout, Some(20));
|
||||
assert!(!support.ring_browsers);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_inbound_called_number_prefers_did_headers_over_username_ruri() {
|
||||
let raw = b"INVITE sip:2830573e1@proxy.example SIP/2.0\r\nTo: <sip:+4942116767548@proxy.example>\r\nFrom: <sip:+491701234567@provider.example>;tag=abc\r\nCall-ID: test-1\r\nCSeq: 1 INVITE\r\nContent-Length: 0\r\n\r\n";
|
||||
let msg = SipMessage::parse(raw).expect("invite should parse");
|
||||
assert_eq!(extract_inbound_called_number(&msg), "+4942116767548");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_inbound_called_number_keeps_phone_ruri_when_already_present() {
|
||||
let raw = b"INVITE sip:042116767548@proxy.example SIP/2.0\r\nTo: <sip:2830573e1@proxy.example>\r\nFrom: <sip:+491701234567@provider.example>;tag=abc\r\nCall-ID: test-2\r\nCSeq: 1 INVITE\r\nContent-Length: 0\r\n\r\n";
|
||||
let msg = SipMessage::parse(raw).expect("invite should parse");
|
||||
assert_eq!(extract_inbound_called_number(&msg), "042116767548");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_pattern_supports_numeric_ranges() {
|
||||
assert!(matches_pattern(
|
||||
Some("042116767546..042116767548"),
|
||||
"042116767547"
|
||||
));
|
||||
assert!(!matches_pattern(
|
||||
Some("042116767546..042116767548"),
|
||||
"042116767549"
|
||||
));
|
||||
assert!(matches_pattern(
|
||||
Some("+4942116767546..+4942116767548"),
|
||||
"+4942116767547"
|
||||
));
|
||||
assert!(!matches_pattern(
|
||||
Some("+4942116767546..+4942116767548"),
|
||||
"042116767547"
|
||||
));
|
||||
}
|
||||
}
|
||||
53
rust/crates/proxy-engine/src/ipc.rs
Normal file
53
rust/crates/proxy-engine/src/ipc.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
//! IPC protocol — command dispatch and event emission.
|
||||
//!
|
||||
//! All communication with the TypeScript control plane goes through
|
||||
//! JSON-line messages on stdin/stdout (smartrust protocol).
|
||||
|
||||
use serde::Deserialize;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
/// Sender for serialized stdout output.
|
||||
pub type OutTx = mpsc::UnboundedSender<String>;
|
||||
|
||||
/// A command received from the TypeScript control plane.
|
||||
#[derive(Deserialize)]
|
||||
pub struct Command {
|
||||
pub id: String,
|
||||
pub method: String,
|
||||
#[serde(default)]
|
||||
pub params: serde_json::Value,
|
||||
}
|
||||
|
||||
/// Send a response to a command.
|
||||
pub fn respond(
|
||||
tx: &OutTx,
|
||||
id: &str,
|
||||
success: bool,
|
||||
result: Option<serde_json::Value>,
|
||||
error: Option<&str>,
|
||||
) {
|
||||
let mut resp = serde_json::json!({ "id": id, "success": success });
|
||||
if let Some(r) = result {
|
||||
resp["result"] = r;
|
||||
}
|
||||
if let Some(e) = error {
|
||||
resp["error"] = serde_json::Value::String(e.to_string());
|
||||
}
|
||||
let _ = tx.send(resp.to_string());
|
||||
}
|
||||
|
||||
/// Send a success response.
|
||||
pub fn respond_ok(tx: &OutTx, id: &str, result: serde_json::Value) {
|
||||
respond(tx, id, true, Some(result), None);
|
||||
}
|
||||
|
||||
/// Send an error response.
|
||||
pub fn respond_err(tx: &OutTx, id: &str, error: &str) {
|
||||
respond(tx, id, false, None, Some(error));
|
||||
}
|
||||
|
||||
/// Emit an event to the TypeScript control plane.
|
||||
pub fn emit_event(tx: &OutTx, event: &str, data: serde_json::Value) {
|
||||
let msg = serde_json::json!({ "event": event, "data": data });
|
||||
let _ = tx.send(msg.to_string());
|
||||
}
|
||||
188
rust/crates/proxy-engine/src/jitter_buffer.rs
Normal file
188
rust/crates/proxy-engine/src/jitter_buffer.rs
Normal file
@@ -0,0 +1,188 @@
|
||||
//! Per-leg adaptive jitter buffer for the audio mixer.
|
||||
//!
|
||||
//! Sits between inbound RTP packet reception and the mixer's decode step.
|
||||
//! Reorders packets by sequence number and delivers exactly one frame per
|
||||
//! 20ms mixer tick, smoothing out network jitter. When a packet is missing,
|
||||
//! the mixer can invoke codec PLC to conceal the gap.
|
||||
|
||||
use crate::mixer::RtpPacket;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
/// Per-leg jitter buffer. Collects RTP packets keyed by sequence number,
|
||||
/// delivers one frame per 20ms tick in sequence order.
|
||||
///
|
||||
/// Adaptive target depth: starts at 3 frames (60ms), adjusts between
|
||||
/// 2–6 frames based on observed jitter.
|
||||
pub struct JitterBuffer {
|
||||
/// Packets waiting for playout, keyed by seq number.
|
||||
buffer: BTreeMap<u16, RtpPacket>,
|
||||
/// Next expected sequence number for playout.
|
||||
next_seq: Option<u16>,
|
||||
/// Target buffer depth in frames (adaptive).
|
||||
target_depth: u32,
|
||||
/// Current fill level high-water mark (for adaptation).
|
||||
max_fill_seen: u32,
|
||||
/// Ticks since last adaptation adjustment.
|
||||
adapt_counter: u32,
|
||||
/// Consecutive ticks where buffer was empty (for ramp-up).
|
||||
empty_streak: u32,
|
||||
/// Consecutive ticks where buffer had excess (for ramp-down).
|
||||
excess_streak: u32,
|
||||
/// Whether we've started playout (initial fill complete).
|
||||
playing: bool,
|
||||
/// Number of frames consumed since start (for stats).
|
||||
frames_consumed: u64,
|
||||
/// Number of frames lost (gap in sequence).
|
||||
frames_lost: u64,
|
||||
}
|
||||
|
||||
/// What the mixer gets back each tick.
|
||||
pub enum JitterResult {
|
||||
/// A packet is available for decoding.
|
||||
Packet(RtpPacket),
|
||||
/// Packet was expected but missing — invoke PLC.
|
||||
Missing,
|
||||
/// Buffer is in initial fill phase — output silence.
|
||||
Filling,
|
||||
}
|
||||
|
||||
impl JitterBuffer {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
buffer: BTreeMap::new(),
|
||||
next_seq: None,
|
||||
target_depth: 3, // 60ms initial target
|
||||
max_fill_seen: 0,
|
||||
adapt_counter: 0,
|
||||
empty_streak: 0,
|
||||
excess_streak: 0,
|
||||
playing: false,
|
||||
frames_consumed: 0,
|
||||
frames_lost: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Push a received RTP packet into the buffer.
|
||||
pub fn push(&mut self, pkt: RtpPacket) {
|
||||
// Ignore duplicates.
|
||||
if self.buffer.contains_key(&pkt.seq) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Detect large forward seq jump (hold/resume, SSRC change).
|
||||
if let Some(next) = self.next_seq {
|
||||
let jump = pkt.seq.wrapping_sub(next);
|
||||
if jump > 1000 && jump < 0x8000 {
|
||||
// Massive forward jump — reset buffer.
|
||||
self.reset();
|
||||
self.next_seq = Some(pkt.seq);
|
||||
}
|
||||
}
|
||||
|
||||
if self.next_seq.is_none() {
|
||||
self.next_seq = Some(pkt.seq);
|
||||
}
|
||||
|
||||
self.buffer.insert(pkt.seq, pkt);
|
||||
}
|
||||
|
||||
/// Consume one frame for the current 20ms tick.
|
||||
/// Called once per mixer tick per leg.
|
||||
pub fn consume(&mut self) -> JitterResult {
|
||||
// Track fill level for adaptation.
|
||||
let fill = self.buffer.len() as u32;
|
||||
if fill > self.max_fill_seen {
|
||||
self.max_fill_seen = fill;
|
||||
}
|
||||
|
||||
// Initial fill phase: wait until we have target_depth packets.
|
||||
if !self.playing {
|
||||
if fill >= self.target_depth {
|
||||
self.playing = true;
|
||||
} else {
|
||||
return JitterResult::Filling;
|
||||
}
|
||||
}
|
||||
|
||||
let seq = match self.next_seq {
|
||||
Some(s) => s,
|
||||
None => return JitterResult::Filling,
|
||||
};
|
||||
|
||||
// Advance next_seq (wrapping u16).
|
||||
self.next_seq = Some(seq.wrapping_add(1));
|
||||
|
||||
// Try to pull the expected sequence number.
|
||||
if let Some(pkt) = self.buffer.remove(&seq) {
|
||||
self.frames_consumed += 1;
|
||||
self.empty_streak = 0;
|
||||
|
||||
// Adaptive: if buffer is consistently deep, we can tighten.
|
||||
if fill > self.target_depth + 2 {
|
||||
self.excess_streak += 1;
|
||||
} else {
|
||||
self.excess_streak = 0;
|
||||
}
|
||||
|
||||
JitterResult::Packet(pkt)
|
||||
} else {
|
||||
// Packet missing — PLC needed.
|
||||
self.frames_lost += 1;
|
||||
self.empty_streak += 1;
|
||||
self.excess_streak = 0;
|
||||
|
||||
JitterResult::Missing
|
||||
}
|
||||
}
|
||||
|
||||
/// Run adaptation logic. Call every tick; internally gates to ~1s intervals.
|
||||
pub fn adapt(&mut self) {
|
||||
self.adapt_counter += 1;
|
||||
if self.adapt_counter < 50 {
|
||||
return;
|
||||
}
|
||||
self.adapt_counter = 0;
|
||||
|
||||
// If we had many empty ticks, increase depth.
|
||||
if self.empty_streak > 3 && self.target_depth < 6 {
|
||||
self.target_depth += 1;
|
||||
}
|
||||
// If buffer consistently overfull, decrease depth.
|
||||
else if self.excess_streak > 25 && self.target_depth > 2 {
|
||||
self.target_depth -= 1;
|
||||
}
|
||||
|
||||
self.max_fill_seen = 0;
|
||||
}
|
||||
|
||||
/// Discard packets that are too old (seq far behind next_seq).
|
||||
/// Prevents unbounded memory growth from reordered/late packets.
|
||||
pub fn prune_stale(&mut self) {
|
||||
if let Some(next) = self.next_seq {
|
||||
// Remove anything more than 100 frames behind playout point.
|
||||
// Use wrapping arithmetic: if (next - seq) > 100, it's stale.
|
||||
let stale: Vec<u16> = self
|
||||
.buffer
|
||||
.keys()
|
||||
.filter(|&&seq| {
|
||||
let age = next.wrapping_sub(seq);
|
||||
age > 100 && age < 0x8000 // < 0x8000 means it's actually behind, not ahead
|
||||
})
|
||||
.copied()
|
||||
.collect();
|
||||
for seq in stale {
|
||||
self.buffer.remove(&seq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset the buffer (e.g., after re-INVITE / hold-resume).
|
||||
pub fn reset(&mut self) {
|
||||
self.buffer.clear();
|
||||
self.next_seq = None;
|
||||
self.playing = false;
|
||||
self.empty_streak = 0;
|
||||
self.excess_streak = 0;
|
||||
self.adapt_counter = 0;
|
||||
}
|
||||
}
|
||||
111
rust/crates/proxy-engine/src/leg_io.rs
Normal file
111
rust/crates/proxy-engine/src/leg_io.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
//! Leg I/O task spawners.
|
||||
//!
|
||||
//! Each SIP leg gets two tasks:
|
||||
//! - Inbound: recv_from on RTP socket → strip header → send RtpPacket to mixer channel
|
||||
//! - Outbound: recv encoded RTP from mixer channel → send_to remote media endpoint
|
||||
//!
|
||||
//! WebRTC leg I/O is handled inside webrtc_engine.rs (on_track + track.write).
|
||||
|
||||
use crate::mixer::RtpPacket;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
/// Channel pair for connecting a leg to the mixer.
|
||||
pub struct LegChannels {
|
||||
/// Mixer receives decoded packets from this leg.
|
||||
pub inbound_tx: mpsc::Sender<RtpPacket>,
|
||||
pub inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
/// Mixer sends encoded RTP to this leg.
|
||||
pub outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
pub outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
}
|
||||
|
||||
/// Create a channel pair for a leg.
|
||||
pub fn create_leg_channels() -> LegChannels {
|
||||
let (inbound_tx, inbound_rx) = mpsc::channel::<RtpPacket>(64);
|
||||
let (outbound_tx, outbound_rx) = mpsc::channel::<Vec<u8>>(8);
|
||||
LegChannels {
|
||||
inbound_tx,
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
outbound_rx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn the inbound I/O task for a SIP leg.
|
||||
/// Reads RTP from the socket, parses the variable-length header (RFC 3550),
|
||||
/// and sends the payload to the mixer.
|
||||
/// Returns the JoinHandle (exits when the inbound_tx channel is dropped).
|
||||
pub fn spawn_sip_inbound(
|
||||
rtp_socket: Arc<UdpSocket>,
|
||||
inbound_tx: mpsc::Sender<RtpPacket>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 1500];
|
||||
loop {
|
||||
match rtp_socket.recv_from(&mut buf).await {
|
||||
Ok((n, _from)) => {
|
||||
if n < 12 {
|
||||
continue; // Too small for RTP header.
|
||||
}
|
||||
let pt = buf[1] & 0x7F;
|
||||
let marker = (buf[1] & 0x80) != 0;
|
||||
let seq = u16::from_be_bytes([buf[2], buf[3]]);
|
||||
let timestamp = u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]);
|
||||
|
||||
// RFC 3550: header length = 12 + (CC * 4) + optional extension.
|
||||
let cc = (buf[0] & 0x0F) as usize;
|
||||
let has_extension = (buf[0] & 0x10) != 0;
|
||||
let mut offset = 12 + cc * 4;
|
||||
if has_extension {
|
||||
if offset + 4 > n {
|
||||
continue; // Malformed: extension header truncated.
|
||||
}
|
||||
let ext_len =
|
||||
u16::from_be_bytes([buf[offset + 2], buf[offset + 3]]) as usize;
|
||||
offset += 4 + ext_len * 4;
|
||||
}
|
||||
if offset >= n {
|
||||
continue; // No payload after header.
|
||||
}
|
||||
|
||||
let payload = buf[offset..n].to_vec();
|
||||
if payload.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if inbound_tx
|
||||
.send(RtpPacket {
|
||||
payload,
|
||||
payload_type: pt,
|
||||
marker,
|
||||
seq,
|
||||
timestamp,
|
||||
})
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
break; // Channel closed — leg removed.
|
||||
}
|
||||
}
|
||||
Err(_) => break, // Socket error.
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Spawn the outbound I/O task for a SIP leg.
|
||||
/// Reads encoded RTP packets from the mixer and sends them to the remote media endpoint.
|
||||
/// Returns the JoinHandle (exits when the outbound_rx channel is closed).
|
||||
pub fn spawn_sip_outbound(
|
||||
rtp_socket: Arc<UdpSocket>,
|
||||
remote_media: SocketAddr,
|
||||
mut outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
while let Some(rtp_data) = outbound_rx.recv().await {
|
||||
let _ = rtp_socket.send_to(&rtp_data, remote_media).await;
|
||||
}
|
||||
})
|
||||
}
|
||||
1608
rust/crates/proxy-engine/src/main.rs
Normal file
1608
rust/crates/proxy-engine/src/main.rs
Normal file
File diff suppressed because it is too large
Load Diff
908
rust/crates/proxy-engine/src/mixer.rs
Normal file
908
rust/crates/proxy-engine/src/mixer.rs
Normal file
@@ -0,0 +1,908 @@
|
||||
//! Audio mixer — mix-minus engine for multiparty calls.
|
||||
//!
|
||||
//! Each Call spawns one mixer task. Legs communicate with the mixer via
|
||||
//! tokio mpsc channels — no shared mutable state, no lock contention.
|
||||
//!
|
||||
//! Internal bus format: 48kHz f32 PCM (960 samples per 20ms frame).
|
||||
//! All encoding/decoding happens at leg boundaries. Per-leg inbound denoising at 48kHz.
|
||||
//!
|
||||
//! The mixer runs a 20ms tick loop:
|
||||
//! 1. Drain inbound channels, reorder RTP, decode variable-duration packets to 48kHz,
|
||||
//! and queue them in per-leg PCM buffers
|
||||
//! 2. Compute total mix (sum of all **participant** legs' f32 PCM as f64)
|
||||
//! 3. For each participant leg: mix-minus = total - own, resample to leg codec rate, encode, send
|
||||
//! 4. For each isolated leg: play prompt frame or silence, check DTMF
|
||||
//! 5. For each tool leg: send per-source unmerged audio batch
|
||||
//! 6. Forward DTMF between participant legs only
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::jitter_buffer::{JitterBuffer, JitterResult};
|
||||
use crate::rtp::{build_rtp_header, rtp_clock_increment, rtp_clock_rate};
|
||||
use crate::tts::TtsStreamMessage;
|
||||
use codec_lib::{codec_sample_rate, new_denoiser, TranscodeState};
|
||||
use nnnoiseless::DenoiseState;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use tokio::sync::{mpsc, oneshot, watch};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::{self, Duration, MissedTickBehavior};
|
||||
|
||||
/// Mixing sample rate — 48kHz. Opus is native, G.722 needs 3× upsample, G.711 needs 6× upsample.
|
||||
/// All processing (denoising, mixing) happens at this rate in f32 for maximum quality.
|
||||
const MIX_RATE: u32 = 48000;
|
||||
/// Samples per 20ms frame at the mixing rate.
|
||||
const MIX_FRAME_SIZE: usize = 960; // 48000 * 0.020
|
||||
/// Safety cap for how much timestamp-derived gap fill we synthesize at once.
|
||||
const MAX_GAP_FILL_SAMPLES: usize = MIX_FRAME_SIZE * 6; // 120ms
|
||||
/// Bound how many decode / concealment steps a leg can consume in one tick.
|
||||
const MAX_PACKET_STEPS_PER_TICK: usize = 24;
|
||||
/// Report the first output drop immediately, then every N drops.
|
||||
const DROP_REPORT_INTERVAL: u64 = 50;
|
||||
|
||||
/// A raw RTP payload received from a leg (no RTP header).
|
||||
pub struct RtpPacket {
|
||||
pub payload: Vec<u8>,
|
||||
pub payload_type: u8,
|
||||
/// RTP marker bit (first packet of a DTMF event, etc.).
|
||||
pub marker: bool,
|
||||
/// RTP sequence number for reordering.
|
||||
pub seq: u16,
|
||||
/// RTP timestamp from the original packet header.
|
||||
pub timestamp: u32,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Leg roles
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// What role a leg currently plays in the mixer.
|
||||
enum LegRole {
|
||||
/// Normal participant: contributes to mix, receives mix-minus.
|
||||
Participant,
|
||||
/// Temporarily isolated for IVR/consent interaction.
|
||||
Isolated(IsolationState),
|
||||
}
|
||||
|
||||
struct IsolationState {
|
||||
/// PCM frames at MIX_RATE (960 samples each, 48kHz f32) queued for playback.
|
||||
prompt_frames: VecDeque<Vec<f32>>,
|
||||
/// Live TTS frames arrive here while playback is already in progress.
|
||||
prompt_stream_rx: Option<mpsc::Receiver<TtsStreamMessage>>,
|
||||
/// Cancels the background TTS producer when the interaction ends early.
|
||||
prompt_cancel_tx: Option<watch::Sender<bool>>,
|
||||
/// Whether the live prompt stream has ended.
|
||||
prompt_stream_finished: bool,
|
||||
/// Digits that complete the interaction (e.g., ['1', '2']).
|
||||
expected_digits: Vec<char>,
|
||||
/// Ticks remaining before timeout (decremented each tick after prompt ends).
|
||||
timeout_ticks_remaining: u32,
|
||||
/// Whether we've finished playing the prompt.
|
||||
prompt_done: bool,
|
||||
/// Channel to send the result back to the command handler.
|
||||
result_tx: Option<oneshot::Sender<InteractionResult>>,
|
||||
}
|
||||
|
||||
/// Result of a leg interaction (consent prompt, IVR, etc.).
|
||||
pub enum InteractionResult {
|
||||
/// The participant pressed one of the expected digits.
|
||||
Digit(char),
|
||||
/// No digit was received within the timeout.
|
||||
Timeout,
|
||||
/// The leg was removed or the call tore down before completion.
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool legs
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Type of tool leg.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ToolType {
|
||||
Recording,
|
||||
Transcription,
|
||||
}
|
||||
|
||||
/// Per-source audio delivered to a tool leg each mixer tick.
|
||||
pub struct ToolAudioBatch {
|
||||
pub sources: Vec<ToolAudioSource>,
|
||||
}
|
||||
|
||||
/// One participant's 20ms audio frame.
|
||||
pub struct ToolAudioSource {
|
||||
pub leg_id: String,
|
||||
/// PCM at 48kHz f32, MIX_FRAME_SIZE (960) samples.
|
||||
pub pcm_48k: Vec<f32>,
|
||||
}
|
||||
|
||||
/// Internal storage for a tool leg inside the mixer.
|
||||
struct ToolLegSlot {
|
||||
#[allow(dead_code)]
|
||||
tool_type: ToolType,
|
||||
audio_tx: mpsc::Sender<ToolAudioBatch>,
|
||||
dropped_batches: u64,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Commands
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Commands sent to the mixer task via a control channel.
|
||||
pub enum MixerCommand {
|
||||
/// Add a new participant leg to the mix.
|
||||
AddLeg {
|
||||
leg_id: String,
|
||||
codec_pt: u8,
|
||||
inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
},
|
||||
/// Remove a leg from the mix (channels are dropped, I/O tasks exit).
|
||||
RemoveLeg { leg_id: String },
|
||||
/// Shut down the mixer.
|
||||
Shutdown,
|
||||
|
||||
/// Isolate a leg and start an interaction (consent prompt, IVR).
|
||||
/// The leg is removed from the mix and hears the prompt instead.
|
||||
/// DTMF from the leg is checked against expected_digits.
|
||||
StartInteraction {
|
||||
leg_id: String,
|
||||
/// PCM frames at MIX_RATE (48kHz f32), each 960 samples.
|
||||
prompt_pcm_frames: Vec<Vec<f32>>,
|
||||
/// Optional live prompt stream. Frames are appended as they are synthesized.
|
||||
prompt_stream_rx: Option<mpsc::Receiver<TtsStreamMessage>>,
|
||||
/// Optional cancellation handle for the live prompt stream.
|
||||
prompt_cancel_tx: Option<watch::Sender<bool>>,
|
||||
expected_digits: Vec<char>,
|
||||
timeout_ms: u32,
|
||||
result_tx: oneshot::Sender<InteractionResult>,
|
||||
},
|
||||
|
||||
/// Add a tool leg that receives per-source unmerged audio.
|
||||
AddToolLeg {
|
||||
leg_id: String,
|
||||
tool_type: ToolType,
|
||||
audio_tx: mpsc::Sender<ToolAudioBatch>,
|
||||
},
|
||||
/// Remove a tool leg (drops the channel, background task finalizes).
|
||||
RemoveToolLeg { leg_id: String },
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mixer internals
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Internal per-leg state inside the mixer.
|
||||
struct MixerLegSlot {
|
||||
codec_pt: u8,
|
||||
transcoder: TranscodeState,
|
||||
/// Per-leg inbound denoiser (48kHz, 480-sample frames).
|
||||
denoiser: Box<DenoiseState<'static>>,
|
||||
inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
/// Decoded PCM waiting for playout. Variable-duration RTP packets are
|
||||
/// decoded into this FIFO; the mixer consumes exactly one 20ms frame per tick.
|
||||
pcm_buffer: VecDeque<f32>,
|
||||
/// Last decoded+denoised PCM frame at MIX_RATE (960 samples, 48kHz f32).
|
||||
last_pcm_frame: Vec<f32>,
|
||||
/// Next RTP timestamp expected from the inbound stream.
|
||||
expected_rtp_timestamp: Option<u32>,
|
||||
/// Best-effort estimate of packet duration in RTP clock units.
|
||||
estimated_packet_ts: u32,
|
||||
/// Number of consecutive ticks with no inbound packet.
|
||||
silent_ticks: u32,
|
||||
/// Per-leg jitter buffer for packet reordering and timing.
|
||||
jitter: JitterBuffer,
|
||||
// RTP output state.
|
||||
rtp_seq: u16,
|
||||
rtp_ts: u32,
|
||||
rtp_ssrc: u32,
|
||||
/// Dropped outbound frames for this leg (queue full / closed).
|
||||
outbound_drops: u64,
|
||||
/// Current role of this leg in the mixer.
|
||||
role: LegRole,
|
||||
}
|
||||
|
||||
fn mix_samples_to_rtp_ts(codec_pt: u8, mix_samples: usize) -> u32 {
|
||||
let clock_rate = rtp_clock_rate(codec_pt).max(1) as u64;
|
||||
(((mix_samples as u64 * clock_rate) + (MIX_RATE as u64 / 2)) / MIX_RATE as u64) as u32
|
||||
}
|
||||
|
||||
fn rtp_ts_to_mix_samples(codec_pt: u8, rtp_ts: u32) -> usize {
|
||||
let clock_rate = rtp_clock_rate(codec_pt).max(1) as u64;
|
||||
(((rtp_ts as u64 * MIX_RATE as u64) + (clock_rate / 2)) / clock_rate) as usize
|
||||
}
|
||||
|
||||
fn is_forward_rtp_delta(delta: u32) -> bool {
|
||||
delta > 0 && delta < 0x8000_0000
|
||||
}
|
||||
|
||||
fn should_emit_drop_event(total_drops: u64) -> bool {
|
||||
total_drops == 1 || total_drops % DROP_REPORT_INTERVAL == 0
|
||||
}
|
||||
|
||||
fn emit_output_drop_event(
|
||||
out_tx: &OutTx,
|
||||
call_id: &str,
|
||||
leg_id: Option<&str>,
|
||||
tool_leg_id: Option<&str>,
|
||||
stream: &str,
|
||||
reason: &str,
|
||||
total_drops: u64,
|
||||
) {
|
||||
if !should_emit_drop_event(total_drops) {
|
||||
return;
|
||||
}
|
||||
|
||||
emit_event(
|
||||
out_tx,
|
||||
"mixer_output_drop",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": leg_id,
|
||||
"tool_leg_id": tool_leg_id,
|
||||
"stream": stream,
|
||||
"reason": reason,
|
||||
"total_drops": total_drops,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn fade_concealment_from_last_frame(slot: &mut MixerLegSlot, samples: usize, decay: f32) {
|
||||
let mut template = if slot.last_pcm_frame.is_empty() {
|
||||
vec![0.0f32; MIX_FRAME_SIZE]
|
||||
} else {
|
||||
slot.last_pcm_frame.clone()
|
||||
};
|
||||
|
||||
let mut remaining = samples;
|
||||
while remaining > 0 {
|
||||
for sample in &mut template {
|
||||
*sample *= decay;
|
||||
}
|
||||
let take = remaining.min(template.len());
|
||||
slot.pcm_buffer.extend(template.iter().take(take).copied());
|
||||
remaining -= take;
|
||||
}
|
||||
}
|
||||
|
||||
fn append_packet_loss_concealment(slot: &mut MixerLegSlot, samples: usize) {
|
||||
let mut remaining = samples.max(1);
|
||||
while remaining > 0 {
|
||||
let chunk = remaining.min(MIX_FRAME_SIZE);
|
||||
if slot.codec_pt == codec_lib::PT_OPUS {
|
||||
match slot.transcoder.opus_plc(chunk) {
|
||||
Ok(mut pcm) => {
|
||||
pcm.resize(chunk, 0.0);
|
||||
slot.pcm_buffer.extend(pcm);
|
||||
}
|
||||
Err(_) => fade_concealment_from_last_frame(slot, chunk, 0.8),
|
||||
}
|
||||
} else {
|
||||
fade_concealment_from_last_frame(slot, chunk, 0.85);
|
||||
}
|
||||
remaining -= chunk;
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_packet_to_mix_pcm(slot: &mut MixerLegSlot, pkt: &RtpPacket) -> Option<Vec<f32>> {
|
||||
let (pcm, rate) = slot
|
||||
.transcoder
|
||||
.decode_to_f32(&pkt.payload, pkt.payload_type)
|
||||
.ok()?;
|
||||
|
||||
let pcm_48k = if rate == MIX_RATE {
|
||||
pcm
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&pcm, rate, MIX_RATE)
|
||||
.unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
|
||||
};
|
||||
|
||||
let processed = if slot.codec_pt != codec_lib::PT_OPUS {
|
||||
TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k)
|
||||
} else {
|
||||
pcm_48k
|
||||
};
|
||||
|
||||
Some(processed)
|
||||
}
|
||||
|
||||
fn queue_inbound_packet(slot: &mut MixerLegSlot, pkt: RtpPacket) {
|
||||
if let Some(pcm_48k) = decode_packet_to_mix_pcm(slot, &pkt) {
|
||||
if pcm_48k.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(expected_ts) = slot.expected_rtp_timestamp {
|
||||
let gap_ts = pkt.timestamp.wrapping_sub(expected_ts);
|
||||
if is_forward_rtp_delta(gap_ts) {
|
||||
let gap_samples = rtp_ts_to_mix_samples(slot.codec_pt, gap_ts);
|
||||
if gap_samples <= MAX_GAP_FILL_SAMPLES {
|
||||
append_packet_loss_concealment(slot, gap_samples);
|
||||
} else {
|
||||
slot.pcm_buffer.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let packet_ts = mix_samples_to_rtp_ts(slot.codec_pt, pcm_48k.len());
|
||||
if packet_ts > 0 {
|
||||
slot.estimated_packet_ts = packet_ts;
|
||||
slot.expected_rtp_timestamp = Some(pkt.timestamp.wrapping_add(packet_ts));
|
||||
}
|
||||
slot.pcm_buffer.extend(pcm_48k);
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_leg_playout_buffer(slot: &mut MixerLegSlot) {
|
||||
let mut steps = 0usize;
|
||||
while slot.pcm_buffer.len() < MIX_FRAME_SIZE && steps < MAX_PACKET_STEPS_PER_TICK {
|
||||
steps += 1;
|
||||
match slot.jitter.consume() {
|
||||
JitterResult::Packet(pkt) => queue_inbound_packet(slot, pkt),
|
||||
JitterResult::Missing => {
|
||||
let conceal_ts = slot
|
||||
.estimated_packet_ts
|
||||
.max(rtp_clock_increment(slot.codec_pt));
|
||||
let conceal_samples =
|
||||
rtp_ts_to_mix_samples(slot.codec_pt, conceal_ts).clamp(1, MAX_GAP_FILL_SAMPLES);
|
||||
append_packet_loss_concealment(slot, conceal_samples);
|
||||
if let Some(expected_ts) = slot.expected_rtp_timestamp {
|
||||
slot.expected_rtp_timestamp = Some(expected_ts.wrapping_add(conceal_ts));
|
||||
}
|
||||
}
|
||||
JitterResult::Filling => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn take_mix_frame(slot: &mut MixerLegSlot) -> Vec<f32> {
|
||||
let mut frame = Vec::with_capacity(MIX_FRAME_SIZE);
|
||||
while frame.len() < MIX_FRAME_SIZE {
|
||||
if let Some(sample) = slot.pcm_buffer.pop_front() {
|
||||
frame.push(sample);
|
||||
} else {
|
||||
frame.push(0.0);
|
||||
}
|
||||
}
|
||||
frame
|
||||
}
|
||||
|
||||
fn soft_limit_sample(sample: f32) -> f32 {
|
||||
const KNEE: f32 = 0.85;
|
||||
|
||||
let abs = sample.abs();
|
||||
if abs <= KNEE {
|
||||
sample
|
||||
} else {
|
||||
let excess = abs - KNEE;
|
||||
let compressed = KNEE + (excess / (1.0 + (excess / (1.0 - KNEE))));
|
||||
sample.signum() * compressed.min(1.0)
|
||||
}
|
||||
}
|
||||
|
||||
fn try_send_leg_output(
|
||||
out_tx: &OutTx,
|
||||
call_id: &str,
|
||||
leg_id: &str,
|
||||
slot: &mut MixerLegSlot,
|
||||
rtp: Vec<u8>,
|
||||
stream: &str,
|
||||
) {
|
||||
let reason = match slot.outbound_tx.try_send(rtp) {
|
||||
Ok(()) => return,
|
||||
Err(mpsc::error::TrySendError::Full(_)) => "full",
|
||||
Err(mpsc::error::TrySendError::Closed(_)) => "closed",
|
||||
};
|
||||
|
||||
slot.outbound_drops += 1;
|
||||
emit_output_drop_event(
|
||||
out_tx,
|
||||
call_id,
|
||||
Some(leg_id),
|
||||
None,
|
||||
stream,
|
||||
reason,
|
||||
slot.outbound_drops,
|
||||
);
|
||||
}
|
||||
|
||||
fn try_send_tool_output(
|
||||
out_tx: &OutTx,
|
||||
call_id: &str,
|
||||
tool_leg_id: &str,
|
||||
tool: &mut ToolLegSlot,
|
||||
batch: ToolAudioBatch,
|
||||
) {
|
||||
let reason = match tool.audio_tx.try_send(batch) {
|
||||
Ok(()) => return,
|
||||
Err(mpsc::error::TrySendError::Full(_)) => "full",
|
||||
Err(mpsc::error::TrySendError::Closed(_)) => "closed",
|
||||
};
|
||||
|
||||
tool.dropped_batches += 1;
|
||||
emit_output_drop_event(
|
||||
out_tx,
|
||||
call_id,
|
||||
None,
|
||||
Some(tool_leg_id),
|
||||
"tool-batch",
|
||||
reason,
|
||||
tool.dropped_batches,
|
||||
);
|
||||
}
|
||||
|
||||
fn cancel_prompt_producer(state: &mut IsolationState) {
|
||||
if let Some(cancel_tx) = state.prompt_cancel_tx.take() {
|
||||
let _ = cancel_tx.send(true);
|
||||
}
|
||||
}
|
||||
|
||||
fn cancel_isolated_interaction(state: &mut IsolationState) {
|
||||
cancel_prompt_producer(state);
|
||||
if let Some(tx) = state.result_tx.take() {
|
||||
let _ = tx.send(InteractionResult::Cancelled);
|
||||
}
|
||||
}
|
||||
|
||||
fn drain_prompt_stream(
|
||||
out_tx: &OutTx,
|
||||
call_id: &str,
|
||||
leg_id: &str,
|
||||
state: &mut IsolationState,
|
||||
) {
|
||||
loop {
|
||||
let Some(mut stream_rx) = state.prompt_stream_rx.take() else {
|
||||
return;
|
||||
};
|
||||
|
||||
match stream_rx.try_recv() {
|
||||
Ok(TtsStreamMessage::Frames(frames)) => {
|
||||
state.prompt_frames.extend(frames);
|
||||
state.prompt_stream_rx = Some(stream_rx);
|
||||
}
|
||||
Ok(TtsStreamMessage::Finished) => {
|
||||
state.prompt_stream_finished = true;
|
||||
return;
|
||||
}
|
||||
Ok(TtsStreamMessage::Failed(error)) => {
|
||||
emit_event(
|
||||
out_tx,
|
||||
"mixer_error",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": leg_id,
|
||||
"error": format!("tts stream failed: {error}"),
|
||||
}),
|
||||
);
|
||||
state.prompt_stream_finished = true;
|
||||
return;
|
||||
}
|
||||
Err(mpsc::error::TryRecvError::Empty) => {
|
||||
state.prompt_stream_rx = Some(stream_rx);
|
||||
return;
|
||||
}
|
||||
Err(mpsc::error::TryRecvError::Disconnected) => {
|
||||
state.prompt_stream_finished = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn the mixer task for a call. Returns the command sender and task handle.
|
||||
pub fn spawn_mixer(call_id: String, out_tx: OutTx) -> (mpsc::Sender<MixerCommand>, JoinHandle<()>) {
|
||||
let (cmd_tx, cmd_rx) = mpsc::channel::<MixerCommand>(32);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
mixer_loop(call_id, cmd_rx, out_tx).await;
|
||||
});
|
||||
|
||||
(cmd_tx, handle)
|
||||
}
|
||||
|
||||
/// The 20ms mixing loop.
|
||||
async fn mixer_loop(call_id: String, mut cmd_rx: mpsc::Receiver<MixerCommand>, out_tx: OutTx) {
|
||||
let mut legs: HashMap<String, MixerLegSlot> = HashMap::new();
|
||||
let mut tool_legs: HashMap<String, ToolLegSlot> = HashMap::new();
|
||||
let mut interval = time::interval(Duration::from_millis(20));
|
||||
interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
// ── 1. Process control commands (non-blocking). ─────────────
|
||||
loop {
|
||||
match cmd_rx.try_recv() {
|
||||
Ok(MixerCommand::AddLeg {
|
||||
leg_id,
|
||||
codec_pt,
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
}) => {
|
||||
let transcoder = match TranscodeState::new() {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"mixer_error",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": leg_id,
|
||||
"error": format!("codec init: {e}"),
|
||||
}),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
legs.insert(
|
||||
leg_id,
|
||||
MixerLegSlot {
|
||||
codec_pt,
|
||||
transcoder,
|
||||
denoiser: new_denoiser(),
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
pcm_buffer: VecDeque::new(),
|
||||
last_pcm_frame: vec![0.0f32; MIX_FRAME_SIZE],
|
||||
expected_rtp_timestamp: None,
|
||||
estimated_packet_ts: rtp_clock_increment(codec_pt),
|
||||
silent_ticks: 0,
|
||||
rtp_seq: 0,
|
||||
rtp_ts: 0,
|
||||
rtp_ssrc: rand::random(),
|
||||
outbound_drops: 0,
|
||||
role: LegRole::Participant,
|
||||
jitter: JitterBuffer::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
Ok(MixerCommand::RemoveLeg { leg_id }) => {
|
||||
// If the leg is isolated, send Cancelled before dropping.
|
||||
if let Some(slot) = legs.get_mut(&leg_id) {
|
||||
if let LegRole::Isolated(ref mut state) = slot.role {
|
||||
cancel_isolated_interaction(state);
|
||||
}
|
||||
}
|
||||
legs.remove(&leg_id);
|
||||
// Channels drop → I/O tasks exit cleanly.
|
||||
}
|
||||
Ok(MixerCommand::Shutdown) => {
|
||||
// Cancel all outstanding interactions before shutting down.
|
||||
for slot in legs.values_mut() {
|
||||
if let LegRole::Isolated(ref mut state) = slot.role {
|
||||
cancel_isolated_interaction(state);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
Ok(MixerCommand::StartInteraction {
|
||||
leg_id,
|
||||
prompt_pcm_frames,
|
||||
prompt_stream_rx,
|
||||
prompt_cancel_tx,
|
||||
expected_digits,
|
||||
timeout_ms,
|
||||
result_tx,
|
||||
}) => {
|
||||
if let Some(slot) = legs.get_mut(&leg_id) {
|
||||
// Cancel any existing interaction first.
|
||||
if let LegRole::Isolated(ref mut old_state) = slot.role {
|
||||
cancel_isolated_interaction(old_state);
|
||||
}
|
||||
let timeout_ticks = timeout_ms / 20;
|
||||
slot.role = LegRole::Isolated(IsolationState {
|
||||
prompt_frames: VecDeque::from(prompt_pcm_frames),
|
||||
prompt_stream_rx,
|
||||
prompt_cancel_tx,
|
||||
prompt_stream_finished: false,
|
||||
expected_digits,
|
||||
timeout_ticks_remaining: timeout_ticks,
|
||||
prompt_done: false,
|
||||
result_tx: Some(result_tx),
|
||||
});
|
||||
} else {
|
||||
// Leg not found — immediately cancel.
|
||||
if let Some(cancel_tx) = prompt_cancel_tx {
|
||||
let _ = cancel_tx.send(true);
|
||||
}
|
||||
let _ = result_tx.send(InteractionResult::Cancelled);
|
||||
}
|
||||
}
|
||||
Ok(MixerCommand::AddToolLeg {
|
||||
leg_id,
|
||||
tool_type,
|
||||
audio_tx,
|
||||
}) => {
|
||||
tool_legs.insert(
|
||||
leg_id,
|
||||
ToolLegSlot {
|
||||
tool_type,
|
||||
audio_tx,
|
||||
dropped_batches: 0,
|
||||
},
|
||||
);
|
||||
}
|
||||
Ok(MixerCommand::RemoveToolLeg { leg_id }) => {
|
||||
tool_legs.remove(&leg_id);
|
||||
// Dropping the ToolLegSlot drops audio_tx → background task sees channel close.
|
||||
}
|
||||
Err(mpsc::error::TryRecvError::Empty) => break,
|
||||
Err(mpsc::error::TryRecvError::Disconnected) => return,
|
||||
}
|
||||
}
|
||||
|
||||
if legs.is_empty() && tool_legs.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── 2. Drain inbound packets, decode to 48kHz f32 PCM. ────
|
||||
// DTMF (PT 101) packets are collected separately.
|
||||
// Audio packets are sorted by sequence number and decoded
|
||||
// in order to maintain codec state (critical for G.722 ADPCM).
|
||||
let leg_ids: Vec<String> = legs.keys().cloned().collect();
|
||||
let mut dtmf_forward: Vec<(String, RtpPacket)> = Vec::new();
|
||||
|
||||
for lid in &leg_ids {
|
||||
let slot = legs.get_mut(lid).unwrap();
|
||||
|
||||
// Step 2a: Drain all pending packets into the jitter buffer.
|
||||
let mut got_audio = false;
|
||||
loop {
|
||||
match slot.inbound_rx.try_recv() {
|
||||
Ok(pkt) => {
|
||||
if pkt.payload_type == 101 {
|
||||
dtmf_forward.push((lid.clone(), pkt));
|
||||
} else {
|
||||
got_audio = true;
|
||||
slot.jitter.push(pkt);
|
||||
}
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2b: Decode enough RTP to cover one 20ms playout frame.
|
||||
// Variable-duration packets (10ms, 20ms, 60ms, ...) accumulate in
|
||||
// the per-leg PCM FIFO; we pop exactly one 20ms frame below.
|
||||
fill_leg_playout_buffer(slot);
|
||||
slot.last_pcm_frame = take_mix_frame(slot);
|
||||
|
||||
// Run jitter adaptation + prune stale packets.
|
||||
slot.jitter.adapt();
|
||||
slot.jitter.prune_stale();
|
||||
|
||||
// Silent ticks: based on actual network reception, not jitter buffer state.
|
||||
if got_audio || dtmf_forward.iter().any(|(src, _)| src == lid) {
|
||||
slot.silent_ticks = 0;
|
||||
} else {
|
||||
slot.silent_ticks += 1;
|
||||
}
|
||||
if slot.silent_ticks > 150 {
|
||||
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
|
||||
slot.pcm_buffer.clear();
|
||||
slot.expected_rtp_timestamp = None;
|
||||
slot.estimated_packet_ts = rtp_clock_increment(slot.codec_pt);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 3. Compute total mix from PARTICIPANT legs only. ────────
|
||||
// Accumulate as f64 to prevent precision loss when summing f32.
|
||||
let mut total_mix = vec![0.0f64; MIX_FRAME_SIZE];
|
||||
for slot in legs.values() {
|
||||
if matches!(slot.role, LegRole::Participant) {
|
||||
for (i, &s) in slot.last_pcm_frame.iter().enumerate().take(MIX_FRAME_SIZE) {
|
||||
total_mix[i] += s as f64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── 4. Per-leg output. ──────────────────────────────────────
|
||||
// Collect interaction completions to apply after the loop
|
||||
// (can't mutate role while iterating mutably for encode).
|
||||
let mut completed_interactions: Vec<(String, InteractionResult)> = Vec::new();
|
||||
|
||||
for (lid, slot) in legs.iter_mut() {
|
||||
match &mut slot.role {
|
||||
LegRole::Participant => {
|
||||
// Mix-minus: total minus this leg's own contribution.
|
||||
// Apply a light soft limiter instead of hard clipping the sum.
|
||||
let mut mix_minus = Vec::with_capacity(MIX_FRAME_SIZE);
|
||||
for i in 0..MIX_FRAME_SIZE {
|
||||
let sample = (total_mix[i] - slot.last_pcm_frame[i] as f64) as f32;
|
||||
mix_minus.push(soft_limit_sample(sample));
|
||||
}
|
||||
|
||||
// Resample from 48kHz to the leg's codec native rate.
|
||||
let target_rate = codec_sample_rate(slot.codec_pt);
|
||||
let resampled = if target_rate == MIX_RATE {
|
||||
mix_minus
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&mix_minus, MIX_RATE, target_rate)
|
||||
.unwrap_or_default()
|
||||
};
|
||||
|
||||
// Encode to the leg's codec (f32 → i16 → codec inside encode_from_f32).
|
||||
let encoded = match slot.transcoder.encode_from_f32(&resampled, slot.codec_pt) {
|
||||
Ok(e) if !e.is_empty() => e,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Build RTP packet with header.
|
||||
let header =
|
||||
build_rtp_header(slot.codec_pt, slot.rtp_seq, slot.rtp_ts, slot.rtp_ssrc);
|
||||
let mut rtp = header.to_vec();
|
||||
rtp.extend_from_slice(&encoded);
|
||||
|
||||
slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
|
||||
slot.rtp_ts = slot.rtp_ts.wrapping_add(rtp_clock_increment(slot.codec_pt));
|
||||
|
||||
try_send_leg_output(&out_tx, &call_id, lid, slot, rtp, "participant-audio");
|
||||
}
|
||||
LegRole::Isolated(state) => {
|
||||
drain_prompt_stream(&out_tx, &call_id, lid, state);
|
||||
|
||||
// Check for DTMF digit from this leg.
|
||||
let mut matched_digit: Option<char> = None;
|
||||
for (src_lid, dtmf_pkt) in &dtmf_forward {
|
||||
if src_lid == lid && dtmf_pkt.payload.len() >= 4 {
|
||||
let event_id = dtmf_pkt.payload[0];
|
||||
let end_bit = (dtmf_pkt.payload[1] & 0x80) != 0;
|
||||
if end_bit {
|
||||
const EVENT_CHARS: &[char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '*', '#',
|
||||
'A', 'B', 'C', 'D',
|
||||
];
|
||||
if let Some(&ch) = EVENT_CHARS.get(event_id as usize) {
|
||||
if state.expected_digits.contains(&ch) {
|
||||
matched_digit = Some(ch);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(digit) = matched_digit {
|
||||
// Interaction complete — digit matched.
|
||||
completed_interactions.push((lid.clone(), InteractionResult::Digit(digit)));
|
||||
} else {
|
||||
// Play prompt frame, wait for live TTS, or move to timeout once the
|
||||
// prompt stream has fully drained.
|
||||
let pcm_frame = if let Some(frame) = state.prompt_frames.pop_front() {
|
||||
frame
|
||||
} else if !state.prompt_stream_finished {
|
||||
vec![0.0f32; MIX_FRAME_SIZE]
|
||||
} else {
|
||||
state.prompt_done = true;
|
||||
vec![0.0f32; MIX_FRAME_SIZE]
|
||||
};
|
||||
|
||||
// Encode prompt frame to the leg's codec.
|
||||
let target_rate = codec_sample_rate(slot.codec_pt);
|
||||
let resampled = if target_rate == MIX_RATE {
|
||||
pcm_frame
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&pcm_frame, MIX_RATE, target_rate)
|
||||
.unwrap_or_default()
|
||||
};
|
||||
|
||||
let mut prompt_rtp: Option<Vec<u8>> = None;
|
||||
if let Ok(encoded) =
|
||||
slot.transcoder.encode_from_f32(&resampled, slot.codec_pt)
|
||||
{
|
||||
if !encoded.is_empty() {
|
||||
let header = build_rtp_header(
|
||||
slot.codec_pt,
|
||||
slot.rtp_seq,
|
||||
slot.rtp_ts,
|
||||
slot.rtp_ssrc,
|
||||
);
|
||||
let mut rtp = header.to_vec();
|
||||
rtp.extend_from_slice(&encoded);
|
||||
slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
|
||||
slot.rtp_ts =
|
||||
slot.rtp_ts.wrapping_add(rtp_clock_increment(slot.codec_pt));
|
||||
prompt_rtp = Some(rtp);
|
||||
}
|
||||
}
|
||||
|
||||
// Check timeout (only after prompt finishes).
|
||||
if state.prompt_done {
|
||||
if state.timeout_ticks_remaining == 0 {
|
||||
completed_interactions
|
||||
.push((lid.clone(), InteractionResult::Timeout));
|
||||
} else {
|
||||
state.timeout_ticks_remaining -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(rtp) = prompt_rtp {
|
||||
try_send_leg_output(
|
||||
&out_tx,
|
||||
&call_id,
|
||||
lid,
|
||||
slot,
|
||||
rtp,
|
||||
"isolated-prompt",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply completed interactions — revert legs to Participant.
|
||||
for (lid, result) in completed_interactions {
|
||||
if let Some(slot) = legs.get_mut(&lid) {
|
||||
if let LegRole::Isolated(ref mut state) = slot.role {
|
||||
cancel_prompt_producer(state);
|
||||
if let Some(tx) = state.result_tx.take() {
|
||||
let _ = tx.send(result);
|
||||
}
|
||||
}
|
||||
slot.role = LegRole::Participant;
|
||||
}
|
||||
}
|
||||
|
||||
// ── 5. Distribute per-source audio to tool legs. ────────────
|
||||
if !tool_legs.is_empty() {
|
||||
// Collect participant PCM frames (computed in step 2).
|
||||
let sources: Vec<ToolAudioSource> = legs
|
||||
.iter()
|
||||
.filter(|(_, s)| matches!(s.role, LegRole::Participant))
|
||||
.map(|(lid, s)| ToolAudioSource {
|
||||
leg_id: lid.clone(),
|
||||
pcm_48k: s.last_pcm_frame.clone(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
for (tool_leg_id, tool) in tool_legs.iter_mut() {
|
||||
let batch = ToolAudioBatch {
|
||||
sources: sources
|
||||
.iter()
|
||||
.map(|s| ToolAudioSource {
|
||||
leg_id: s.leg_id.clone(),
|
||||
pcm_48k: s.pcm_48k.clone(),
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
try_send_tool_output(&out_tx, &call_id, tool_leg_id, tool, batch);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 6. Forward DTMF packets between participant legs only. ──
|
||||
for (source_lid, dtmf_pkt) in &dtmf_forward {
|
||||
// Skip if the source is an isolated leg (its DTMF was handled in step 4).
|
||||
if let Some(src_slot) = legs.get(source_lid) {
|
||||
if matches!(src_slot.role, LegRole::Isolated(_)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
for (target_lid, target_slot) in legs.iter_mut() {
|
||||
if target_lid == source_lid {
|
||||
continue; // Don't echo DTMF back to sender.
|
||||
}
|
||||
// Don't forward to isolated legs.
|
||||
if matches!(target_slot.role, LegRole::Isolated(_)) {
|
||||
continue;
|
||||
}
|
||||
let mut header = build_rtp_header(
|
||||
101,
|
||||
target_slot.rtp_seq,
|
||||
target_slot.rtp_ts,
|
||||
target_slot.rtp_ssrc,
|
||||
);
|
||||
if dtmf_pkt.marker {
|
||||
header[1] |= 0x80; // Set marker bit.
|
||||
}
|
||||
let mut rtp_out = header.to_vec();
|
||||
rtp_out.extend_from_slice(&dtmf_pkt.payload);
|
||||
target_slot.rtp_seq = target_slot.rtp_seq.wrapping_add(1);
|
||||
// Don't increment rtp_ts for DTMF — it shares timestamp context with audio.
|
||||
try_send_leg_output(&out_tx, &call_id, target_lid, target_slot, rtp_out, "dtmf");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
383
rust/crates/proxy-engine/src/provider.rs
Normal file
383
rust/crates/proxy-engine/src/provider.rs
Normal file
@@ -0,0 +1,383 @@
|
||||
//! Provider registration state machine.
|
||||
//!
|
||||
//! Handles the REGISTER cycle with upstream SIP providers:
|
||||
//! - Sends periodic REGISTER messages
|
||||
//! - Handles 401/407 Digest authentication challenges
|
||||
//! - Detects public IP from Via received= parameter
|
||||
//! - Emits registration state events to TypeScript
|
||||
//!
|
||||
//! Ported from ts/providerstate.ts.
|
||||
|
||||
use crate::config::ProviderConfig;
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use sip_proto::helpers::{
|
||||
compute_digest_auth, generate_branch, generate_call_id, generate_tag, parse_digest_challenge,
|
||||
};
|
||||
use sip_proto::message::{RequestOptions, SipMessage};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::time::{self, Duration};
|
||||
|
||||
/// Runtime state for a single SIP provider.
|
||||
pub struct ProviderState {
|
||||
pub config: ProviderConfig,
|
||||
pub public_ip: Option<String>,
|
||||
pub is_registered: bool,
|
||||
pub registered_aor: String,
|
||||
|
||||
// Registration transaction state.
|
||||
reg_call_id: String,
|
||||
reg_cseq: u32,
|
||||
reg_from_tag: String,
|
||||
|
||||
// Network.
|
||||
lan_ip: String,
|
||||
lan_port: u16,
|
||||
}
|
||||
|
||||
impl ProviderState {
|
||||
pub fn new(config: ProviderConfig, public_ip_seed: Option<&str>) -> Self {
|
||||
let aor = format!("sip:{}@{}", config.username, config.domain);
|
||||
Self {
|
||||
public_ip: public_ip_seed.map(|s| s.to_string()),
|
||||
is_registered: false,
|
||||
registered_aor: aor,
|
||||
reg_call_id: generate_call_id(None),
|
||||
reg_cseq: 0,
|
||||
reg_from_tag: generate_tag(),
|
||||
lan_ip: String::new(),
|
||||
lan_port: 0,
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build and send a REGISTER request.
|
||||
pub fn build_register(&mut self) -> Vec<u8> {
|
||||
self.reg_cseq += 1;
|
||||
let pub_ip = self.public_ip.as_deref().unwrap_or(&self.lan_ip);
|
||||
|
||||
let register = SipMessage::create_request(
|
||||
"REGISTER",
|
||||
&format!("sip:{}", self.config.domain),
|
||||
RequestOptions {
|
||||
via_host: pub_ip.to_string(),
|
||||
via_port: self.lan_port,
|
||||
via_transport: None,
|
||||
via_branch: Some(generate_branch()),
|
||||
from_uri: self.registered_aor.clone(),
|
||||
from_display_name: None,
|
||||
from_tag: Some(self.reg_from_tag.clone()),
|
||||
to_uri: self.registered_aor.clone(),
|
||||
to_display_name: None,
|
||||
to_tag: None,
|
||||
call_id: Some(self.reg_call_id.clone()),
|
||||
cseq: Some(self.reg_cseq),
|
||||
contact: Some(format!(
|
||||
"<sip:{}@{}:{}>",
|
||||
self.config.username, pub_ip, self.lan_port
|
||||
)),
|
||||
max_forwards: Some(70),
|
||||
body: None,
|
||||
content_type: None,
|
||||
extra_headers: Some(vec![
|
||||
(
|
||||
"Expires".to_string(),
|
||||
self.config.register_interval_sec.to_string(),
|
||||
),
|
||||
("User-Agent".to_string(), "SipRouter/1.0".to_string()),
|
||||
(
|
||||
"Allow".to_string(),
|
||||
"INVITE, ACK, OPTIONS, CANCEL, BYE, SUBSCRIBE, NOTIFY, INFO, REFER, UPDATE"
|
||||
.to_string(),
|
||||
),
|
||||
]),
|
||||
},
|
||||
);
|
||||
register.serialize()
|
||||
}
|
||||
|
||||
/// Handle a SIP response that might be for this provider's REGISTER.
|
||||
/// Returns true if the message was consumed.
|
||||
pub fn handle_registration_response(&mut self, msg: &SipMessage) -> Option<Vec<u8>> {
|
||||
if !msg.is_response() {
|
||||
return None;
|
||||
}
|
||||
if msg.call_id() != self.reg_call_id {
|
||||
return None;
|
||||
}
|
||||
let cseq_method = msg.cseq_method().unwrap_or("");
|
||||
if !cseq_method.eq_ignore_ascii_case("REGISTER") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let code = msg.status_code().unwrap_or(0);
|
||||
|
||||
if code == 200 {
|
||||
self.is_registered = true;
|
||||
return Some(Vec::new()); // consumed, no reply needed
|
||||
}
|
||||
|
||||
if code == 401 || code == 407 {
|
||||
let challenge_header = if code == 401 {
|
||||
msg.get_header("WWW-Authenticate")
|
||||
} else {
|
||||
msg.get_header("Proxy-Authenticate")
|
||||
};
|
||||
|
||||
let challenge_header = match challenge_header {
|
||||
Some(h) => h,
|
||||
None => return Some(Vec::new()), // consumed but no challenge
|
||||
};
|
||||
|
||||
let challenge = match parse_digest_challenge(challenge_header) {
|
||||
Some(c) => c,
|
||||
None => return Some(Vec::new()),
|
||||
};
|
||||
|
||||
let auth_value = compute_digest_auth(
|
||||
&self.config.username,
|
||||
&self.config.password,
|
||||
&challenge.realm,
|
||||
&challenge.nonce,
|
||||
"REGISTER",
|
||||
&format!("sip:{}", self.config.domain),
|
||||
challenge.algorithm.as_deref(),
|
||||
challenge.opaque.as_deref(),
|
||||
);
|
||||
|
||||
// Resend REGISTER with auth credentials.
|
||||
self.reg_cseq += 1;
|
||||
let pub_ip = self.public_ip.as_deref().unwrap_or(&self.lan_ip);
|
||||
|
||||
let auth_header_name = if code == 401 {
|
||||
"Authorization"
|
||||
} else {
|
||||
"Proxy-Authorization"
|
||||
};
|
||||
|
||||
let register = SipMessage::create_request(
|
||||
"REGISTER",
|
||||
&format!("sip:{}", self.config.domain),
|
||||
RequestOptions {
|
||||
via_host: pub_ip.to_string(),
|
||||
via_port: self.lan_port,
|
||||
via_transport: None,
|
||||
via_branch: Some(generate_branch()),
|
||||
from_uri: self.registered_aor.clone(),
|
||||
from_display_name: None,
|
||||
from_tag: Some(self.reg_from_tag.clone()),
|
||||
to_uri: self.registered_aor.clone(),
|
||||
to_display_name: None,
|
||||
to_tag: None,
|
||||
call_id: Some(self.reg_call_id.clone()),
|
||||
cseq: Some(self.reg_cseq),
|
||||
contact: Some(format!(
|
||||
"<sip:{}@{}:{}>",
|
||||
self.config.username, pub_ip, self.lan_port
|
||||
)),
|
||||
max_forwards: Some(70),
|
||||
body: None,
|
||||
content_type: None,
|
||||
extra_headers: Some(vec![
|
||||
(auth_header_name.to_string(), auth_value),
|
||||
(
|
||||
"Expires".to_string(),
|
||||
self.config.register_interval_sec.to_string(),
|
||||
),
|
||||
("User-Agent".to_string(), "SipRouter/1.0".to_string()),
|
||||
(
|
||||
"Allow".to_string(),
|
||||
"INVITE, ACK, OPTIONS, CANCEL, BYE, SUBSCRIBE, NOTIFY, INFO, REFER, UPDATE"
|
||||
.to_string(),
|
||||
),
|
||||
]),
|
||||
},
|
||||
);
|
||||
return Some(register.serialize());
|
||||
}
|
||||
|
||||
if code >= 400 {
|
||||
self.is_registered = false;
|
||||
}
|
||||
|
||||
Some(Vec::new()) // consumed
|
||||
}
|
||||
|
||||
/// Detect public IP from Via received= parameter.
|
||||
pub fn detect_public_ip(&mut self, via: &str) {
|
||||
if let Some(m) = via.find("received=") {
|
||||
let rest = &via[m + 9..];
|
||||
let end = rest
|
||||
.find(|c: char| !c.is_ascii_digit() && c != '.')
|
||||
.unwrap_or(rest.len());
|
||||
let ip = &rest[..end];
|
||||
if !ip.is_empty() && self.public_ip.as_deref() != Some(ip) {
|
||||
self.public_ip = Some(ip.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_network(&mut self, lan_ip: &str, lan_port: u16) {
|
||||
self.lan_ip = lan_ip.to_string();
|
||||
self.lan_port = lan_port;
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages all provider states and their registration cycles.
|
||||
pub struct ProviderManager {
|
||||
providers: Vec<Arc<Mutex<ProviderState>>>,
|
||||
out_tx: OutTx,
|
||||
}
|
||||
|
||||
impl ProviderManager {
|
||||
pub fn new(out_tx: OutTx) -> Self {
|
||||
Self {
|
||||
providers: Vec::new(),
|
||||
out_tx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize providers from config and start registration cycles.
|
||||
pub async fn configure(
|
||||
&mut self,
|
||||
configs: &[ProviderConfig],
|
||||
public_ip_seed: Option<&str>,
|
||||
lan_ip: &str,
|
||||
lan_port: u16,
|
||||
socket: Arc<UdpSocket>,
|
||||
) {
|
||||
self.providers.clear();
|
||||
|
||||
for cfg in configs {
|
||||
let mut ps = ProviderState::new(cfg.clone(), public_ip_seed);
|
||||
ps.set_network(lan_ip, lan_port);
|
||||
let ps = Arc::new(Mutex::new(ps));
|
||||
self.providers.push(ps.clone());
|
||||
|
||||
// Start the registration cycle.
|
||||
let socket = socket.clone();
|
||||
let out_tx = self.out_tx.clone();
|
||||
tokio::spawn(async move {
|
||||
provider_register_loop(ps, socket, out_tx).await;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to handle a SIP response as a provider registration response.
|
||||
/// Returns true if consumed.
|
||||
pub async fn handle_response(&self, msg: &SipMessage, socket: &UdpSocket) -> bool {
|
||||
for ps_arc in &self.providers {
|
||||
let mut ps = ps_arc.lock().await;
|
||||
let was_registered = ps.is_registered;
|
||||
if let Some(reply) = ps.handle_registration_response(msg) {
|
||||
// If there's a reply to send (e.g. auth retry).
|
||||
if !reply.is_empty() {
|
||||
if let Some(dest) = ps.config.outbound_proxy.to_socket_addr() {
|
||||
let _ = socket.send_to(&reply, dest).await;
|
||||
}
|
||||
}
|
||||
// Emit registration state change.
|
||||
if ps.is_registered != was_registered {
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"provider_registered",
|
||||
serde_json::json!({
|
||||
"provider_id": ps.config.id,
|
||||
"registered": ps.is_registered,
|
||||
"public_ip": ps.public_ip,
|
||||
}),
|
||||
);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Find which provider sent a packet by matching source address.
|
||||
pub async fn find_by_address(&self, addr: &SocketAddr) -> Option<Arc<Mutex<ProviderState>>> {
|
||||
for ps_arc in &self.providers {
|
||||
let ps = ps_arc.lock().await;
|
||||
let proxy_addr = format!(
|
||||
"{}:{}",
|
||||
ps.config.outbound_proxy.address, ps.config.outbound_proxy.port
|
||||
);
|
||||
if let Ok(expected) = proxy_addr.parse::<SocketAddr>() {
|
||||
if expected == *addr {
|
||||
return Some(ps_arc.clone());
|
||||
}
|
||||
}
|
||||
// Also match by IP only (port may differ).
|
||||
if ps.config.outbound_proxy.address == addr.ip().to_string() {
|
||||
return Some(ps_arc.clone());
|
||||
}
|
||||
|
||||
// Hostname-based providers (e.g. sipgate.de) often deliver inbound
|
||||
// INVITEs from resolved IPs rather than the literal configured host.
|
||||
// Resolve the proxy host and accept any matching IP/port variant.
|
||||
use std::net::ToSocketAddrs;
|
||||
if let Ok(resolved) = format!(
|
||||
"{}:{}",
|
||||
ps.config.outbound_proxy.address, ps.config.outbound_proxy.port
|
||||
)
|
||||
.to_socket_addrs()
|
||||
{
|
||||
for resolved_addr in resolved {
|
||||
if resolved_addr == *addr || resolved_addr.ip() == addr.ip() {
|
||||
return Some(ps_arc.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Find a provider by its config ID (e.g. "easybell").
|
||||
pub async fn find_by_provider_id(
|
||||
&self,
|
||||
provider_id: &str,
|
||||
) -> Option<Arc<Mutex<ProviderState>>> {
|
||||
for ps_arc in &self.providers {
|
||||
let ps = ps_arc.lock().await;
|
||||
if ps.config.id == provider_id {
|
||||
return Some(ps_arc.clone());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Registration loop for a single provider.
|
||||
async fn provider_register_loop(
|
||||
ps: Arc<Mutex<ProviderState>>,
|
||||
socket: Arc<UdpSocket>,
|
||||
_out_tx: OutTx,
|
||||
) {
|
||||
// Initial registration.
|
||||
{
|
||||
let mut state = ps.lock().await;
|
||||
let register_buf = state.build_register();
|
||||
if let Some(dest) = state.config.outbound_proxy.to_socket_addr() {
|
||||
let _ = socket.send_to(®ister_buf, dest).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Re-register periodically (85% of the interval).
|
||||
let interval_sec = {
|
||||
let state = ps.lock().await;
|
||||
(state.config.register_interval_sec as f64 * 0.85) as u64
|
||||
};
|
||||
let mut interval = time::interval(Duration::from_secs(interval_sec.max(30)));
|
||||
interval.tick().await; // skip first immediate tick
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
let mut state = ps.lock().await;
|
||||
let register_buf = state.build_register();
|
||||
if let Some(dest) = state.config.outbound_proxy.to_socket_addr() {
|
||||
let _ = socket.send_to(®ister_buf, dest).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
187
rust/crates/proxy-engine/src/recorder.rs
Normal file
187
rust/crates/proxy-engine/src/recorder.rs
Normal file
@@ -0,0 +1,187 @@
|
||||
//! Audio recorder — receives RTP packets and writes a WAV file.
|
||||
|
||||
use codec_lib::TranscodeState;
|
||||
use std::path::Path;
|
||||
|
||||
/// Active recording session.
|
||||
pub struct Recorder {
|
||||
writer: hound::WavWriter<std::io::BufWriter<std::fs::File>>,
|
||||
transcoder: TranscodeState,
|
||||
source_pt: u8,
|
||||
total_samples: u64,
|
||||
sample_rate: u32,
|
||||
max_samples: Option<u64>,
|
||||
file_path: String,
|
||||
}
|
||||
|
||||
impl Recorder {
|
||||
/// Create a new recorder that writes to a WAV file.
|
||||
/// `source_pt` is the RTP payload type of the incoming audio.
|
||||
/// `max_duration_ms` optionally limits the recording length.
|
||||
pub fn new(
|
||||
file_path: &str,
|
||||
source_pt: u8,
|
||||
max_duration_ms: Option<u64>,
|
||||
) -> Result<Self, String> {
|
||||
// Ensure parent directory exists.
|
||||
if let Some(parent) = Path::new(file_path).parent() {
|
||||
std::fs::create_dir_all(parent).map_err(|e| format!("create dir: {e}"))?;
|
||||
}
|
||||
|
||||
let sample_rate = 8000u32; // Record at 8kHz (standard telephony)
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let writer = hound::WavWriter::create(file_path, spec)
|
||||
.map_err(|e| format!("create WAV {file_path}: {e}"))?;
|
||||
|
||||
let transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
||||
|
||||
let max_samples = max_duration_ms.map(|ms| (sample_rate as u64 * ms) / 1000);
|
||||
|
||||
Ok(Self {
|
||||
writer,
|
||||
transcoder,
|
||||
source_pt,
|
||||
total_samples: 0,
|
||||
sample_rate,
|
||||
max_samples,
|
||||
file_path: file_path.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a recorder that writes raw PCM at a given sample rate.
|
||||
/// Used by tool legs that already have decoded PCM (no RTP processing needed).
|
||||
pub fn new_pcm(
|
||||
file_path: &str,
|
||||
sample_rate: u32,
|
||||
max_duration_ms: Option<u64>,
|
||||
) -> Result<Self, String> {
|
||||
if let Some(parent) = Path::new(file_path).parent() {
|
||||
std::fs::create_dir_all(parent).map_err(|e| format!("create dir: {e}"))?;
|
||||
}
|
||||
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let writer = hound::WavWriter::create(file_path, spec)
|
||||
.map_err(|e| format!("create WAV {file_path}: {e}"))?;
|
||||
|
||||
// source_pt is unused for PCM recording; set to 0.
|
||||
let transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
||||
let max_samples = max_duration_ms.map(|ms| (sample_rate as u64 * ms) / 1000);
|
||||
|
||||
Ok(Self {
|
||||
writer,
|
||||
transcoder,
|
||||
source_pt: 0,
|
||||
total_samples: 0,
|
||||
sample_rate,
|
||||
max_samples,
|
||||
file_path: file_path.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Write raw PCM samples directly (no RTP decoding).
|
||||
/// Returns true if recording should continue, false if max duration reached.
|
||||
pub fn write_pcm(&mut self, samples: &[i16]) -> bool {
|
||||
for &sample in samples {
|
||||
if self.writer.write_sample(sample).is_err() {
|
||||
return false;
|
||||
}
|
||||
self.total_samples += 1;
|
||||
if let Some(max) = self.max_samples {
|
||||
if self.total_samples >= max {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Process an incoming RTP packet (full packet with header).
|
||||
/// Returns true if recording should continue, false if max duration reached.
|
||||
pub fn process_rtp(&mut self, data: &[u8]) -> bool {
|
||||
if data.len() <= 12 {
|
||||
return true; // Too short, skip.
|
||||
}
|
||||
|
||||
let pt = data[1] & 0x7F;
|
||||
// Skip telephone-event (DTMF) packets.
|
||||
if pt == 101 {
|
||||
return true;
|
||||
}
|
||||
|
||||
let payload = &data[12..];
|
||||
if payload.is_empty() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Decode to PCM.
|
||||
let (pcm, rate) = match self.transcoder.decode_to_pcm(payload, self.source_pt) {
|
||||
Ok(result) => result,
|
||||
Err(_) => return true, // Decode failed, skip packet.
|
||||
};
|
||||
|
||||
// Resample to 8kHz if needed.
|
||||
let pcm_8k = if rate != self.sample_rate {
|
||||
match self.transcoder.resample(&pcm, rate, self.sample_rate) {
|
||||
Ok(r) => r,
|
||||
Err(_) => return true,
|
||||
}
|
||||
} else {
|
||||
pcm
|
||||
};
|
||||
|
||||
// Write samples.
|
||||
for &sample in &pcm_8k {
|
||||
if let Err(_) = self.writer.write_sample(sample) {
|
||||
return false;
|
||||
}
|
||||
self.total_samples += 1;
|
||||
|
||||
if let Some(max) = self.max_samples {
|
||||
if self.total_samples >= max {
|
||||
return false; // Max duration reached.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Stop recording and finalize the WAV file.
|
||||
pub fn stop(self) -> RecordingResult {
|
||||
let duration_ms = if self.sample_rate > 0 {
|
||||
(self.total_samples * 1000) / self.sample_rate as u64
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// Writer is finalized on drop (writes RIFF header sizes).
|
||||
drop(self.writer);
|
||||
|
||||
RecordingResult {
|
||||
file_path: self.file_path,
|
||||
duration_ms,
|
||||
total_samples: self.total_samples,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RecordingResult {
|
||||
pub file_path: String,
|
||||
pub duration_ms: u64,
|
||||
// Running-sample total kept for parity with the TS recorder; not yet
|
||||
// surfaced through any event or dashboard field.
|
||||
#[allow(dead_code)]
|
||||
pub total_samples: u64,
|
||||
}
|
||||
146
rust/crates/proxy-engine/src/registrar.rs
Normal file
146
rust/crates/proxy-engine/src/registrar.rs
Normal file
@@ -0,0 +1,146 @@
|
||||
//! Device registrar — accepts REGISTER from SIP phones and tracks contacts.
|
||||
//!
|
||||
//! When a device sends REGISTER, the registrar responds with 200 OK
|
||||
//! and stores the device's current contact (source IP:port).
|
||||
//!
|
||||
//! Ported from ts/registrar.ts.
|
||||
|
||||
use crate::config::DeviceConfig;
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use sip_proto::helpers::generate_tag;
|
||||
use sip_proto::message::{ResponseOptions, SipMessage};
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
const MAX_EXPIRES: u32 = 300;
|
||||
|
||||
/// A registered device entry.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RegisteredDevice {
|
||||
pub device_id: String,
|
||||
// These fields are populated at REGISTER time for logging/debugging but are
|
||||
// not read back — device identity flows via the `device_registered` push
|
||||
// event, not via struct queries. Kept behind allow(dead_code) because
|
||||
// removing them would churn handle_register for no runtime benefit.
|
||||
#[allow(dead_code)]
|
||||
pub display_name: String,
|
||||
#[allow(dead_code)]
|
||||
pub extension: String,
|
||||
pub contact_addr: SocketAddr,
|
||||
#[allow(dead_code)]
|
||||
pub registered_at: Instant,
|
||||
pub expires_at: Instant,
|
||||
#[allow(dead_code)]
|
||||
pub aor: String,
|
||||
}
|
||||
|
||||
/// Manages device registrations.
|
||||
pub struct Registrar {
|
||||
/// Known device configs (from app config).
|
||||
devices: Vec<DeviceConfig>,
|
||||
/// Currently registered devices, keyed by device ID.
|
||||
registered: HashMap<String, RegisteredDevice>,
|
||||
out_tx: OutTx,
|
||||
}
|
||||
|
||||
impl Registrar {
|
||||
pub fn new(out_tx: OutTx) -> Self {
|
||||
Self {
|
||||
devices: Vec::new(),
|
||||
registered: HashMap::new(),
|
||||
out_tx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the known device list from config.
|
||||
pub fn configure(&mut self, devices: &[DeviceConfig]) {
|
||||
self.devices = devices.to_vec();
|
||||
}
|
||||
|
||||
/// Try to handle a SIP REGISTER from a device.
|
||||
/// Returns Some(response_bytes) if handled, None if not a known device.
|
||||
pub fn handle_register(&mut self, msg: &SipMessage, from_addr: SocketAddr) -> Option<Vec<u8>> {
|
||||
if msg.method() != Some("REGISTER") {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the device by matching the source IP against expectedAddress.
|
||||
let from_ip = from_addr.ip().to_string();
|
||||
let device = self
|
||||
.devices
|
||||
.iter()
|
||||
.find(|d| d.expected_address == from_ip)?;
|
||||
|
||||
let from_header = msg.get_header("From").unwrap_or("");
|
||||
let aor = SipMessage::extract_uri(from_header)
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| format!("sip:{}@{}", device.extension, from_ip));
|
||||
|
||||
let expires_header = msg.get_header("Expires");
|
||||
let requested: u32 = expires_header.and_then(|s| s.parse().ok()).unwrap_or(3600);
|
||||
let expires = requested.min(MAX_EXPIRES);
|
||||
|
||||
let entry = RegisteredDevice {
|
||||
device_id: device.id.clone(),
|
||||
display_name: device.display_name.clone(),
|
||||
extension: device.extension.clone(),
|
||||
contact_addr: from_addr,
|
||||
registered_at: Instant::now(),
|
||||
expires_at: Instant::now() + Duration::from_secs(expires as u64),
|
||||
aor: aor.clone(),
|
||||
};
|
||||
self.registered.insert(device.id.clone(), entry);
|
||||
|
||||
// Emit event to TypeScript.
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"device_registered",
|
||||
serde_json::json!({
|
||||
"device_id": device.id,
|
||||
"display_name": device.display_name,
|
||||
"address": from_ip,
|
||||
"port": from_addr.port(),
|
||||
"aor": aor,
|
||||
"expires": expires,
|
||||
}),
|
||||
);
|
||||
|
||||
// Build 200 OK response.
|
||||
let contact = msg
|
||||
.get_header("Contact")
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| format!("<sip:{}:{}>", from_ip, from_addr.port()));
|
||||
|
||||
let response = SipMessage::create_response(
|
||||
200,
|
||||
"OK",
|
||||
msg,
|
||||
Some(ResponseOptions {
|
||||
to_tag: Some(generate_tag()),
|
||||
contact: Some(contact),
|
||||
extra_headers: Some(vec![("Expires".to_string(), expires.to_string())]),
|
||||
..Default::default()
|
||||
}),
|
||||
);
|
||||
|
||||
Some(response.serialize())
|
||||
}
|
||||
|
||||
/// Get the contact address for a registered device.
|
||||
pub fn get_device_contact(&self, device_id: &str) -> Option<SocketAddr> {
|
||||
let entry = self.registered.get(device_id)?;
|
||||
if Instant::now() > entry.expires_at {
|
||||
return None;
|
||||
}
|
||||
Some(entry.contact_addr)
|
||||
}
|
||||
|
||||
/// Find a registered device by its source IP address.
|
||||
pub fn find_by_address(&self, addr: &SocketAddr) -> Option<&RegisteredDevice> {
|
||||
let ip = addr.ip().to_string();
|
||||
self.registered
|
||||
.values()
|
||||
.find(|e| e.contact_addr.ip().to_string() == ip && Instant::now() <= e.expires_at)
|
||||
}
|
||||
}
|
||||
96
rust/crates/proxy-engine/src/rtp.rs
Normal file
96
rust/crates/proxy-engine/src/rtp.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
//! RTP port pool for media sockets.
|
||||
//!
|
||||
//! Manages a pool of even-numbered UDP ports for RTP media. `allocate()`
|
||||
//! hands back an `Arc<UdpSocket>` to the caller (stored on the owning
|
||||
//! `LegInfo`), while the pool itself keeps only a `Weak<UdpSocket>`. When
|
||||
//! the call terminates and `LegInfo` is dropped, the strong refcount
|
||||
//! reaches zero, the socket is closed, and `allocate()` prunes the dead
|
||||
//! weak ref the next time it scans that slot — so the port automatically
|
||||
//! becomes available for reuse without any explicit `release()` plumbing.
|
||||
//!
|
||||
//! This fixes the previous leak where the pool held `Arc<UdpSocket>` and
|
||||
//! `release()` was never called, eventually exhausting the port range and
|
||||
//! causing "503 Service Unavailable" on new calls.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Weak};
|
||||
use tokio::net::UdpSocket;
|
||||
|
||||
/// A single RTP port allocation.
|
||||
pub struct RtpAllocation {
|
||||
pub port: u16,
|
||||
pub socket: Arc<UdpSocket>,
|
||||
}
|
||||
|
||||
/// RTP port pool — allocates even-numbered UDP ports.
|
||||
pub struct RtpPortPool {
|
||||
min: u16,
|
||||
max: u16,
|
||||
allocated: HashMap<u16, Weak<UdpSocket>>,
|
||||
}
|
||||
|
||||
impl RtpPortPool {
|
||||
pub fn new(min: u16, max: u16) -> Self {
|
||||
let min = if min % 2 == 0 { min } else { min + 1 };
|
||||
Self {
|
||||
min,
|
||||
max,
|
||||
allocated: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate an even-numbered port and bind a UDP socket.
|
||||
pub async fn allocate(&mut self) -> Option<RtpAllocation> {
|
||||
let mut port = self.min;
|
||||
while port < self.max {
|
||||
// Prune a dead weak ref at this slot: if the last strong Arc
|
||||
// (held by the owning LegInfo) was dropped when the call ended,
|
||||
// the socket is already closed and the slot is free again.
|
||||
if let Some(weak) = self.allocated.get(&port) {
|
||||
if weak.strong_count() == 0 {
|
||||
self.allocated.remove(&port);
|
||||
}
|
||||
}
|
||||
if !self.allocated.contains_key(&port) {
|
||||
match UdpSocket::bind(format!("0.0.0.0:{port}")).await {
|
||||
Ok(sock) => {
|
||||
let sock = Arc::new(sock);
|
||||
self.allocated.insert(port, Arc::downgrade(&sock));
|
||||
return Some(RtpAllocation { port, socket: sock });
|
||||
}
|
||||
Err(_) => {
|
||||
// Port in use, try next.
|
||||
}
|
||||
}
|
||||
}
|
||||
port += 2;
|
||||
}
|
||||
None // Pool exhausted.
|
||||
}
|
||||
}
|
||||
|
||||
/// Build an RTP header with the given parameters.
|
||||
pub fn build_rtp_header(pt: u8, seq: u16, timestamp: u32, ssrc: u32) -> [u8; 12] {
|
||||
let mut header = [0u8; 12];
|
||||
header[0] = 0x80; // V=2
|
||||
header[1] = pt & 0x7F;
|
||||
header[2..4].copy_from_slice(&seq.to_be_bytes());
|
||||
header[4..8].copy_from_slice(×tamp.to_be_bytes());
|
||||
header[8..12].copy_from_slice(&ssrc.to_be_bytes());
|
||||
header
|
||||
}
|
||||
|
||||
/// Get the RTP clock increment per 20ms frame for a payload type.
|
||||
pub fn rtp_clock_increment(pt: u8) -> u32 {
|
||||
rtp_clock_rate(pt) / 50
|
||||
}
|
||||
|
||||
/// Get the RTP clock rate for a payload type.
|
||||
pub fn rtp_clock_rate(pt: u8) -> u32 {
|
||||
match pt {
|
||||
9 => 8000, // G.722 uses an 8kHz RTP clock despite 16kHz audio.
|
||||
0 | 8 => 8000, // PCMU/PCMA
|
||||
111 => 48000, // Opus
|
||||
_ => 8000,
|
||||
}
|
||||
}
|
||||
480
rust/crates/proxy-engine/src/sip_leg.rs
Normal file
480
rust/crates/proxy-engine/src/sip_leg.rs
Normal file
@@ -0,0 +1,480 @@
|
||||
//! SipLeg — manages one side of a B2BUA call.
|
||||
//!
|
||||
//! Handles the full INVITE lifecycle:
|
||||
//! - Send INVITE with SDP
|
||||
//! - Handle 407 Proxy Authentication (digest auth retry)
|
||||
//! - Handle 200 OK (ACK, learn media endpoint)
|
||||
//! - Handle BYE/CANCEL (teardown)
|
||||
//! - Track SIP dialog state (early → confirmed → terminated)
|
||||
//!
|
||||
//! Ported from ts/call/sip-leg.ts.
|
||||
|
||||
use sip_proto::dialog::{DialogState, SipDialog};
|
||||
use sip_proto::helpers::{
|
||||
build_sdp, compute_digest_auth, generate_branch, generate_tag, parse_digest_challenge,
|
||||
parse_sdp_endpoint, SdpOptions,
|
||||
};
|
||||
use sip_proto::message::{RequestOptions, SipMessage};
|
||||
use std::net::SocketAddr;
|
||||
use tokio::net::UdpSocket;
|
||||
|
||||
/// State of a SIP leg.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum LegState {
|
||||
Inviting,
|
||||
Ringing,
|
||||
Connected,
|
||||
Terminating,
|
||||
Terminated,
|
||||
}
|
||||
|
||||
/// Configuration for creating a SIP leg.
|
||||
pub struct SipLegConfig {
|
||||
/// Proxy LAN IP (for Via, Contact, SDP).
|
||||
pub lan_ip: String,
|
||||
/// Proxy LAN port.
|
||||
pub lan_port: u16,
|
||||
/// Public IP (for provider-facing legs).
|
||||
pub public_ip: Option<String>,
|
||||
/// SIP target endpoint (provider outbound proxy or device address).
|
||||
pub sip_target: SocketAddr,
|
||||
/// Provider credentials (for 407 auth).
|
||||
// username is carried for parity with the provider config but digest auth
|
||||
// rebuilds the username from the registered AOR, so this slot is never read.
|
||||
#[allow(dead_code)]
|
||||
pub username: Option<String>,
|
||||
pub password: Option<String>,
|
||||
pub registered_aor: Option<String>,
|
||||
/// Codec payload types to offer.
|
||||
pub codecs: Vec<u8>,
|
||||
/// Our RTP port for SDP.
|
||||
pub rtp_port: u16,
|
||||
}
|
||||
|
||||
/// A SIP leg with full dialog management.
|
||||
pub struct SipLeg {
|
||||
// Leg identity is tracked via the enclosing LegInfo's key in the call's
|
||||
// leg map; SipLeg itself never reads this field back. Kept to preserve
|
||||
// the (id, config) constructor shape used by the call manager.
|
||||
#[allow(dead_code)]
|
||||
pub id: String,
|
||||
pub state: LegState,
|
||||
pub config: SipLegConfig,
|
||||
pub dialog: Option<SipDialog>,
|
||||
|
||||
/// The INVITE we sent (needed for CANCEL and 407 ACK).
|
||||
invite: Option<SipMessage>,
|
||||
/// Original unauthenticated INVITE (for re-ACKing retransmitted 407s).
|
||||
orig_invite: Option<SipMessage>,
|
||||
/// Whether we've attempted digest auth.
|
||||
auth_attempted: bool,
|
||||
|
||||
/// Remote media endpoint (learned from SDP in 200 OK).
|
||||
pub remote_media: Option<SocketAddr>,
|
||||
}
|
||||
|
||||
impl SipLeg {
|
||||
pub fn new(id: String, config: SipLegConfig) -> Self {
|
||||
Self {
|
||||
id,
|
||||
state: LegState::Inviting,
|
||||
config,
|
||||
dialog: None,
|
||||
invite: None,
|
||||
orig_invite: None,
|
||||
auth_attempted: false,
|
||||
remote_media: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build and send an INVITE to establish this leg.
|
||||
pub async fn send_invite(
|
||||
&mut self,
|
||||
from_uri: &str,
|
||||
to_uri: &str,
|
||||
sip_call_id: &str,
|
||||
socket: &UdpSocket,
|
||||
) {
|
||||
let ip = self
|
||||
.config
|
||||
.public_ip
|
||||
.as_deref()
|
||||
.unwrap_or(&self.config.lan_ip);
|
||||
|
||||
let sdp = build_sdp(&SdpOptions {
|
||||
ip,
|
||||
port: self.config.rtp_port,
|
||||
payload_types: &self.config.codecs,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let invite = SipMessage::create_request(
|
||||
"INVITE",
|
||||
to_uri,
|
||||
RequestOptions {
|
||||
via_host: ip.to_string(),
|
||||
via_port: self.config.lan_port,
|
||||
via_transport: None,
|
||||
via_branch: Some(generate_branch()),
|
||||
from_uri: from_uri.to_string(),
|
||||
from_display_name: None,
|
||||
from_tag: Some(generate_tag()),
|
||||
to_uri: to_uri.to_string(),
|
||||
to_display_name: None,
|
||||
to_tag: None,
|
||||
call_id: Some(sip_call_id.to_string()),
|
||||
cseq: Some(1),
|
||||
contact: Some(format!("<sip:{ip}:{}>", self.config.lan_port)),
|
||||
max_forwards: Some(70),
|
||||
body: Some(sdp),
|
||||
content_type: Some("application/sdp".to_string()),
|
||||
extra_headers: Some(vec![(
|
||||
"User-Agent".to_string(),
|
||||
"SipRouter/1.0".to_string(),
|
||||
)]),
|
||||
},
|
||||
);
|
||||
|
||||
self.dialog = Some(SipDialog::from_uac_invite(
|
||||
&invite,
|
||||
ip,
|
||||
self.config.lan_port,
|
||||
));
|
||||
self.invite = Some(invite.clone());
|
||||
self.state = LegState::Inviting;
|
||||
|
||||
let _ = socket
|
||||
.send_to(&invite.serialize(), self.config.sip_target)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Handle an incoming SIP message routed to this leg.
|
||||
/// Returns an optional reply to send (e.g. ACK, auth retry INVITE).
|
||||
pub fn handle_message(&mut self, msg: &SipMessage) -> SipLegAction {
|
||||
if msg.is_response() {
|
||||
self.handle_response(msg)
|
||||
} else {
|
||||
self.handle_request(msg)
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_response(&mut self, msg: &SipMessage) -> SipLegAction {
|
||||
let code = msg.status_code().unwrap_or(0);
|
||||
let cseq_method = msg.cseq_method().unwrap_or("").to_uppercase();
|
||||
|
||||
if cseq_method != "INVITE" {
|
||||
return SipLegAction::None;
|
||||
}
|
||||
|
||||
// Handle retransmitted 407 for the original unauthenticated INVITE.
|
||||
if self.auth_attempted {
|
||||
if let Some(dialog) = &self.dialog {
|
||||
let response_cseq: u32 = msg
|
||||
.get_header("CSeq")
|
||||
.and_then(|s| s.split_whitespace().next())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
if response_cseq < dialog.local_cseq && code >= 400 {
|
||||
// ACK the retransmitted error response.
|
||||
if let Some(orig) = &self.orig_invite {
|
||||
let ack = build_non_2xx_ack(orig, msg);
|
||||
return SipLegAction::Send(ack.serialize());
|
||||
}
|
||||
return SipLegAction::None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle 407 Proxy Authentication Required.
|
||||
if code == 407 {
|
||||
return self.handle_auth_challenge(msg);
|
||||
}
|
||||
|
||||
// Update dialog state.
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.process_response(msg);
|
||||
}
|
||||
|
||||
if code == 180 || code == 183 {
|
||||
self.state = LegState::Ringing;
|
||||
SipLegAction::StateChange(LegState::Ringing)
|
||||
} else if code >= 200 && code < 300 {
|
||||
// ACK the 200 OK.
|
||||
let ack_buf = if let Some(dialog) = &self.dialog {
|
||||
let ack = dialog.create_ack();
|
||||
Some(ack.serialize())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// If already connected (200 retransmit), just re-ACK.
|
||||
if self.state == LegState::Connected {
|
||||
return match ack_buf {
|
||||
Some(buf) => SipLegAction::Send(buf),
|
||||
None => SipLegAction::None,
|
||||
};
|
||||
}
|
||||
|
||||
// Learn media endpoint from SDP.
|
||||
if msg.has_sdp_body() {
|
||||
if let Some(ep) = parse_sdp_endpoint(&msg.body) {
|
||||
if let Ok(addr) = format!("{}:{}", ep.address, ep.port).parse() {
|
||||
self.remote_media = Some(addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.state = LegState::Connected;
|
||||
|
||||
match ack_buf {
|
||||
Some(buf) => SipLegAction::ConnectedWithAck(buf),
|
||||
None => SipLegAction::StateChange(LegState::Connected),
|
||||
}
|
||||
} else if code >= 300 {
|
||||
self.state = LegState::Terminated;
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.terminate();
|
||||
}
|
||||
SipLegAction::Terminated(format!("rejected_{code}"))
|
||||
} else {
|
||||
SipLegAction::None // 1xx provisional
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_auth_challenge(&mut self, msg: &SipMessage) -> SipLegAction {
|
||||
if self.auth_attempted {
|
||||
self.state = LegState::Terminated;
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.terminate();
|
||||
}
|
||||
return SipLegAction::Terminated("auth_rejected".to_string());
|
||||
}
|
||||
self.auth_attempted = true;
|
||||
|
||||
let challenge_header = match msg.get_header("Proxy-Authenticate") {
|
||||
Some(h) => h,
|
||||
None => {
|
||||
self.state = LegState::Terminated;
|
||||
return SipLegAction::Terminated("407_no_challenge".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let challenge = match parse_digest_challenge(challenge_header) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
self.state = LegState::Terminated;
|
||||
return SipLegAction::Terminated("407_bad_challenge".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let password = match &self.config.password {
|
||||
Some(p) => p.clone(),
|
||||
None => {
|
||||
self.state = LegState::Terminated;
|
||||
return SipLegAction::Terminated("407_no_password".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let aor = match &self.config.registered_aor {
|
||||
Some(a) => a.clone(),
|
||||
None => {
|
||||
self.state = LegState::Terminated;
|
||||
return SipLegAction::Terminated("407_no_aor".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let username = aor
|
||||
.trim_start_matches("sip:")
|
||||
.trim_start_matches("sips:")
|
||||
.split('@')
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let dest_uri = self
|
||||
.invite
|
||||
.as_ref()
|
||||
.and_then(|i| i.request_uri())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let auth_value = compute_digest_auth(
|
||||
&username,
|
||||
&password,
|
||||
&challenge.realm,
|
||||
&challenge.nonce,
|
||||
"INVITE",
|
||||
&dest_uri,
|
||||
challenge.algorithm.as_deref(),
|
||||
challenge.opaque.as_deref(),
|
||||
);
|
||||
|
||||
// ACK the 407.
|
||||
let mut ack_buf = None;
|
||||
if let Some(invite) = &self.invite {
|
||||
let ack = build_non_2xx_ack(invite, msg);
|
||||
ack_buf = Some(ack.serialize());
|
||||
}
|
||||
|
||||
// Save original INVITE for retransmission handling.
|
||||
self.orig_invite = self.invite.clone();
|
||||
|
||||
// Build authenticated INVITE with same From tag, CSeq=2.
|
||||
let ip = self
|
||||
.config
|
||||
.public_ip
|
||||
.as_deref()
|
||||
.unwrap_or(&self.config.lan_ip);
|
||||
let from_tag = self
|
||||
.dialog
|
||||
.as_ref()
|
||||
.map(|d| d.local_tag.clone())
|
||||
.unwrap_or_else(generate_tag);
|
||||
|
||||
let sdp = build_sdp(&SdpOptions {
|
||||
ip,
|
||||
port: self.config.rtp_port,
|
||||
payload_types: &self.config.codecs,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let call_id = self
|
||||
.dialog
|
||||
.as_ref()
|
||||
.map(|d| d.call_id.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
let invite_auth = SipMessage::create_request(
|
||||
"INVITE",
|
||||
&dest_uri,
|
||||
RequestOptions {
|
||||
via_host: ip.to_string(),
|
||||
via_port: self.config.lan_port,
|
||||
via_transport: None,
|
||||
via_branch: Some(generate_branch()),
|
||||
from_uri: aor,
|
||||
from_display_name: None,
|
||||
from_tag: Some(from_tag),
|
||||
to_uri: dest_uri.clone(),
|
||||
to_display_name: None,
|
||||
to_tag: None,
|
||||
call_id: Some(call_id),
|
||||
cseq: Some(2),
|
||||
contact: Some(format!("<sip:{ip}:{}>", self.config.lan_port)),
|
||||
max_forwards: Some(70),
|
||||
body: Some(sdp),
|
||||
content_type: Some("application/sdp".to_string()),
|
||||
extra_headers: Some(vec![
|
||||
("Proxy-Authorization".to_string(), auth_value),
|
||||
("User-Agent".to_string(), "SipRouter/1.0".to_string()),
|
||||
]),
|
||||
},
|
||||
);
|
||||
|
||||
self.invite = Some(invite_auth.clone());
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.local_cseq = 2;
|
||||
}
|
||||
|
||||
// Return both the ACK for the 407 and the new authenticated INVITE.
|
||||
let invite_buf = invite_auth.serialize();
|
||||
SipLegAction::AuthRetry {
|
||||
ack_407: ack_buf,
|
||||
invite_with_auth: invite_buf,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_request(&mut self, msg: &SipMessage) -> SipLegAction {
|
||||
let method = msg.method().unwrap_or("");
|
||||
|
||||
if method == "BYE" {
|
||||
let ok = SipMessage::create_response(200, "OK", msg, None);
|
||||
self.state = LegState::Terminated;
|
||||
if let Some(dialog) = &mut self.dialog {
|
||||
dialog.terminate();
|
||||
}
|
||||
return SipLegAction::SendAndTerminate(ok.serialize(), "bye".to_string());
|
||||
}
|
||||
|
||||
if method == "INFO" {
|
||||
let ok = SipMessage::create_response(200, "OK", msg, None);
|
||||
return SipLegAction::Send(ok.serialize());
|
||||
}
|
||||
|
||||
SipLegAction::None
|
||||
}
|
||||
|
||||
/// Build a BYE or CANCEL to tear down this leg.
|
||||
pub fn build_hangup(&mut self) -> Option<Vec<u8>> {
|
||||
let dialog = self.dialog.as_mut()?;
|
||||
|
||||
let msg = if dialog.state == DialogState::Confirmed {
|
||||
dialog.create_request("BYE", None, None, None)
|
||||
} else if dialog.state == DialogState::Early {
|
||||
if let Some(invite) = &self.invite {
|
||||
dialog.create_cancel(invite)
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
self.state = LegState::Terminating;
|
||||
dialog.terminate();
|
||||
Some(msg.serialize())
|
||||
}
|
||||
}
|
||||
|
||||
/// Actions produced by the SipLeg message handler.
|
||||
pub enum SipLegAction {
|
||||
/// No action needed.
|
||||
None,
|
||||
/// Send a SIP message (ACK, 200 OK to INFO, etc.).
|
||||
Send(Vec<u8>),
|
||||
/// Leg state changed.
|
||||
StateChange(LegState),
|
||||
/// Connected — send this ACK.
|
||||
ConnectedWithAck(Vec<u8>),
|
||||
/// Terminated with a reason.
|
||||
Terminated(String),
|
||||
/// Send 200 OK and terminate.
|
||||
SendAndTerminate(Vec<u8>, String),
|
||||
/// 407 auth retry — send ACK for 407, then send new INVITE with auth.
|
||||
AuthRetry {
|
||||
ack_407: Option<Vec<u8>>,
|
||||
invite_with_auth: Vec<u8>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Build an ACK for a non-2xx response (same transaction as the INVITE).
|
||||
fn build_non_2xx_ack(original_invite: &SipMessage, response: &SipMessage) -> SipMessage {
|
||||
let via = original_invite.get_header("Via").unwrap_or("").to_string();
|
||||
let from = original_invite.get_header("From").unwrap_or("").to_string();
|
||||
let to = response.get_header("To").unwrap_or("").to_string();
|
||||
let call_id = original_invite.call_id().to_string();
|
||||
let cseq_num: u32 = original_invite
|
||||
.get_header("CSeq")
|
||||
.and_then(|s| s.split_whitespace().next())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(1);
|
||||
|
||||
let ruri = original_invite
|
||||
.request_uri()
|
||||
.unwrap_or("sip:unknown")
|
||||
.to_string();
|
||||
|
||||
SipMessage::new(
|
||||
format!("ACK {ruri} SIP/2.0"),
|
||||
vec![
|
||||
("Via".to_string(), via),
|
||||
("From".to_string(), from),
|
||||
("To".to_string(), to),
|
||||
("Call-ID".to_string(), call_id),
|
||||
("CSeq".to_string(), format!("{cseq_num} ACK")),
|
||||
("Max-Forwards".to_string(), "70".to_string()),
|
||||
("Content-Length".to_string(), "0".to_string()),
|
||||
],
|
||||
String::new(),
|
||||
)
|
||||
}
|
||||
49
rust/crates/proxy-engine/src/sip_transport.rs
Normal file
49
rust/crates/proxy-engine/src/sip_transport.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
//! SIP UDP transport — owns the main SIP socket.
|
||||
//!
|
||||
//! Binds a UDP socket, receives SIP messages, and provides a send method.
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
|
||||
/// The SIP UDP transport layer.
|
||||
pub struct SipTransport {
|
||||
socket: Arc<UdpSocket>,
|
||||
}
|
||||
|
||||
impl SipTransport {
|
||||
/// Bind a UDP socket on the given address (e.g. "0.0.0.0:5070").
|
||||
pub async fn bind(bind_addr: &str) -> Result<Self, String> {
|
||||
let socket = UdpSocket::bind(bind_addr)
|
||||
.await
|
||||
.map_err(|e| format!("bind {bind_addr}: {e}"))?;
|
||||
Ok(Self {
|
||||
socket: Arc::new(socket),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a clone of the socket Arc for the receiver task.
|
||||
pub fn socket(&self) -> Arc<UdpSocket> {
|
||||
self.socket.clone()
|
||||
}
|
||||
|
||||
/// Spawn the UDP receive loop. Calls the handler for every received packet.
|
||||
pub fn spawn_receiver<F>(&self, handler: F)
|
||||
where
|
||||
F: Fn(&[u8], SocketAddr) + Send + 'static,
|
||||
{
|
||||
let socket = self.socket.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65535];
|
||||
loop {
|
||||
match socket.recv_from(&mut buf).await {
|
||||
Ok((n, addr)) => handler(&buf[..n], addr),
|
||||
Err(e) => {
|
||||
eprintln!("[sip_transport] recv error: {e}");
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
144
rust/crates/proxy-engine/src/tool_leg.rs
Normal file
144
rust/crates/proxy-engine/src/tool_leg.rs
Normal file
@@ -0,0 +1,144 @@
|
||||
//! Tool leg consumers — background tasks that process per-source unmerged audio.
|
||||
//!
|
||||
//! Tool legs are observer legs that receive individual audio streams from each
|
||||
//! participant in a call. The mixer pipes `ToolAudioBatch` every 20ms containing
|
||||
//! each participant's decoded PCM@48kHz f32 tagged with source leg ID.
|
||||
//!
|
||||
//! Consumers:
|
||||
//! - **Recording**: writes per-source WAV files for speaker-separated recording.
|
||||
//! - **Transcription**: stub for future Whisper integration (accumulates audio in Rust).
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::mixer::ToolAudioBatch;
|
||||
use crate::recorder::Recorder;
|
||||
use std::collections::HashMap;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Recording consumer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Spawn a recording tool leg that writes per-source WAV files.
|
||||
///
|
||||
/// Returns the channel sender (for the mixer to send batches) and the task handle.
|
||||
/// When the channel is closed (tool leg removed), all WAV files are finalized
|
||||
/// and a `tool_recording_done` event is emitted.
|
||||
pub fn spawn_recording_tool(
|
||||
tool_leg_id: String,
|
||||
call_id: String,
|
||||
base_dir: String,
|
||||
out_tx: OutTx,
|
||||
) -> (mpsc::Sender<ToolAudioBatch>, JoinHandle<()>) {
|
||||
let (tx, mut rx) = mpsc::channel::<ToolAudioBatch>(64);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let mut recorders: HashMap<String, Recorder> = HashMap::new();
|
||||
|
||||
while let Some(batch) = rx.recv().await {
|
||||
for source in &batch.sources {
|
||||
// Skip silence-only frames (near-zero = no audio activity).
|
||||
let has_audio = source.pcm_48k.iter().any(|&s| s.abs() > 1e-6);
|
||||
if !has_audio && !recorders.contains_key(&source.leg_id) {
|
||||
continue; // Don't create a file for silence-only sources.
|
||||
}
|
||||
|
||||
let recorder = recorders.entry(source.leg_id.clone()).or_insert_with(|| {
|
||||
let path = format!("{}/{}-{}.wav", base_dir, call_id, source.leg_id);
|
||||
Recorder::new_pcm(&path, 48000, None).unwrap_or_else(|e| {
|
||||
panic!("failed to create recorder for {}: {e}", source.leg_id);
|
||||
})
|
||||
});
|
||||
|
||||
// Convert f32 [-1.0, 1.0] to i16 for WAV writing.
|
||||
let pcm_i16: Vec<i16> = source
|
||||
.pcm_48k
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect();
|
||||
if !recorder.write_pcm(&pcm_i16) {
|
||||
// Max duration reached — stop recording this source.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Channel closed — finalize all recordings.
|
||||
let mut files = Vec::new();
|
||||
for (leg_id, rec) in recorders {
|
||||
let result = rec.stop();
|
||||
files.push(serde_json::json!({
|
||||
"source_leg_id": leg_id,
|
||||
"file_path": result.file_path,
|
||||
"duration_ms": result.duration_ms,
|
||||
}));
|
||||
}
|
||||
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"tool_recording_done",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"tool_leg_id": tool_leg_id,
|
||||
"files": files,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
(tx, handle)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Transcription consumer (stub — real plumbing, stub consumer)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Spawn a transcription tool leg.
|
||||
///
|
||||
/// The plumbing is fully real: it receives per-source unmerged PCM@48kHz f32 from
|
||||
/// the mixer every 20ms. The consumer is a stub that accumulates audio and
|
||||
/// reports metadata on close. Future: will stream to a Whisper HTTP endpoint.
|
||||
pub fn spawn_transcription_tool(
|
||||
tool_leg_id: String,
|
||||
call_id: String,
|
||||
out_tx: OutTx,
|
||||
) -> (mpsc::Sender<ToolAudioBatch>, JoinHandle<()>) {
|
||||
let (tx, mut rx) = mpsc::channel::<ToolAudioBatch>(64);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
// Track per-source sample counts for duration reporting.
|
||||
let mut source_samples: HashMap<String, u64> = HashMap::new();
|
||||
|
||||
while let Some(batch) = rx.recv().await {
|
||||
for source in &batch.sources {
|
||||
*source_samples.entry(source.leg_id.clone()).or_insert(0) +=
|
||||
source.pcm_48k.len() as u64;
|
||||
|
||||
// TODO: Future — accumulate chunks and stream to Whisper endpoint.
|
||||
// For now, the audio is received and counted but not processed.
|
||||
}
|
||||
}
|
||||
|
||||
// Channel closed — report metadata.
|
||||
let sources: Vec<serde_json::Value> = source_samples
|
||||
.iter()
|
||||
.map(|(leg_id, samples)| {
|
||||
serde_json::json!({
|
||||
"source_leg_id": leg_id,
|
||||
"duration_ms": (samples * 1000) / 48000,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"tool_transcription_done",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"tool_leg_id": tool_leg_id,
|
||||
"sources": sources,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
(tx, handle)
|
||||
}
|
||||
392
rust/crates/proxy-engine/src/tts.rs
Normal file
392
rust/crates/proxy-engine/src/tts.rs
Normal file
@@ -0,0 +1,392 @@
|
||||
//! Text-to-speech engine — synthesizes text to WAV files using Kokoro neural TTS.
|
||||
//!
|
||||
//! The model is loaded lazily on first use. If the model/voices files are not
|
||||
//! present, the generate command returns an error and the caller skips the prompt.
|
||||
//!
|
||||
//! Caching is handled internally via a `.meta` sidecar file next to each WAV.
|
||||
//! When `cacheable` is true, the engine checks whether the existing WAV was
|
||||
//! generated from the same text+voice; if so it returns immediately (cache hit).
|
||||
//! Callers never need to check for cached files — that is entirely this module's
|
||||
//! responsibility.
|
||||
|
||||
use crate::audio_player::pcm_to_mix_frames;
|
||||
use kokoro_tts::{KokoroTts, Voice};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use tokio::sync::{mpsc, watch};
|
||||
|
||||
pub const DEFAULT_MODEL_PATH: &str = ".nogit/tts/kokoro-v1.0.onnx";
|
||||
pub const DEFAULT_VOICES_PATH: &str = ".nogit/tts/voices.bin";
|
||||
const TTS_OUTPUT_RATE: u32 = 24000;
|
||||
const MAX_CHUNK_CHARS: usize = 220;
|
||||
const MIN_CHUNK_CHARS: usize = 80;
|
||||
|
||||
pub enum TtsStreamMessage {
|
||||
Frames(Vec<Vec<f32>>),
|
||||
Finished,
|
||||
Failed(String),
|
||||
}
|
||||
|
||||
pub struct TtsLivePrompt {
|
||||
pub initial_frames: Vec<Vec<f32>>,
|
||||
pub stream_rx: mpsc::Receiver<TtsStreamMessage>,
|
||||
pub cancel_tx: watch::Sender<bool>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TtsPromptRequest {
|
||||
pub model_path: String,
|
||||
pub voices_path: String,
|
||||
pub voice_name: String,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
/// Wraps the Kokoro TTS engine with lazy model loading.
|
||||
pub struct TtsEngine {
|
||||
tts: Option<Arc<KokoroTts>>,
|
||||
/// Path that was used to load the current model (for cache invalidation).
|
||||
loaded_model_path: String,
|
||||
loaded_voices_path: String,
|
||||
/// On-disk TTS WAVs are cacheable only within a single engine lifetime.
|
||||
/// Every restart gets a new generation token, so prior process outputs are
|
||||
/// treated as stale and regenerated on first use.
|
||||
cache_generation: String,
|
||||
}
|
||||
|
||||
impl TtsEngine {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
tts: None,
|
||||
loaded_model_path: String::new(),
|
||||
loaded_voices_path: String::new(),
|
||||
cache_generation: SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos().to_string())
|
||||
.unwrap_or_else(|_| "0".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_loaded(
|
||||
&mut self,
|
||||
model_path: &str,
|
||||
voices_path: &str,
|
||||
) -> Result<Arc<KokoroTts>, String> {
|
||||
if !Path::new(model_path).exists() {
|
||||
return Err(format!("model not found: {model_path}"));
|
||||
}
|
||||
if !Path::new(voices_path).exists() {
|
||||
return Err(format!("voices not found: {voices_path}"));
|
||||
}
|
||||
|
||||
if self.tts.is_none()
|
||||
|| self.loaded_model_path != model_path
|
||||
|| self.loaded_voices_path != voices_path
|
||||
{
|
||||
eprintln!("[tts] loading model: {model_path}");
|
||||
let tts = Arc::new(
|
||||
KokoroTts::new(model_path, voices_path)
|
||||
.await
|
||||
.map_err(|e| format!("model load failed: {e:?}"))?,
|
||||
);
|
||||
self.tts = Some(tts);
|
||||
self.loaded_model_path = model_path.to_string();
|
||||
self.loaded_voices_path = voices_path.to_string();
|
||||
}
|
||||
|
||||
Ok(self.tts.as_ref().unwrap().clone())
|
||||
}
|
||||
|
||||
pub async fn start_live_prompt(
|
||||
&mut self,
|
||||
request: TtsPromptRequest,
|
||||
) -> Result<TtsLivePrompt, String> {
|
||||
if request.text.trim().is_empty() {
|
||||
return Err("empty text".into());
|
||||
}
|
||||
|
||||
let tts = self
|
||||
.ensure_loaded(&request.model_path, &request.voices_path)
|
||||
.await?;
|
||||
let voice = select_voice(&request.voice_name);
|
||||
let chunks = chunk_text(&request.text);
|
||||
if chunks.is_empty() {
|
||||
return Err("empty text".into());
|
||||
}
|
||||
|
||||
let initial_frames = synth_text_to_mix_frames(&tts, chunks[0].as_str(), voice).await?;
|
||||
let remaining_chunks: Vec<String> = chunks.into_iter().skip(1).collect();
|
||||
let (stream_tx, stream_rx) = mpsc::channel(8);
|
||||
let (cancel_tx, cancel_rx) = watch::channel(false);
|
||||
|
||||
tokio::spawn(async move {
|
||||
stream_live_prompt_chunks(tts, voice, remaining_chunks, stream_tx, cancel_rx).await;
|
||||
});
|
||||
|
||||
Ok(TtsLivePrompt {
|
||||
initial_frames,
|
||||
stream_rx,
|
||||
cancel_tx,
|
||||
})
|
||||
}
|
||||
|
||||
/// Generate a WAV file from text.
|
||||
///
|
||||
/// Params (from IPC JSON):
|
||||
/// - `model`: path to the ONNX model file
|
||||
/// - `voices`: path to the voices.bin file
|
||||
/// - `voice`: voice name (e.g. "af_bella")
|
||||
/// - `text`: text to synthesize
|
||||
/// - `output`: output WAV file path
|
||||
/// - `cacheable`: if true, skip synthesis when the output WAV already
|
||||
/// matches the same text+voice (checked via a `.meta` sidecar file)
|
||||
pub async fn generate(
|
||||
&mut self,
|
||||
params: &serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let model_path = params
|
||||
.get("model")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing 'model' param")?;
|
||||
let voices_path = params
|
||||
.get("voices")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing 'voices' param")?;
|
||||
let voice_name = params
|
||||
.get("voice")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("af_bella");
|
||||
let text = params
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing 'text' param")?;
|
||||
let output_path = params
|
||||
.get("output")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing 'output' param")?;
|
||||
let cacheable = params
|
||||
.get("cacheable")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
if text.is_empty() {
|
||||
return Err("empty text".into());
|
||||
}
|
||||
|
||||
// Cache check: if cacheable and the sidecar matches, return immediately.
|
||||
if cacheable && self.is_cache_hit(output_path, text, voice_name) {
|
||||
eprintln!("[tts] cache hit: {output_path}");
|
||||
return Ok(serde_json::json!({ "output": output_path }));
|
||||
}
|
||||
|
||||
// Ensure parent directory exists.
|
||||
if let Some(parent) = Path::new(output_path).parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
|
||||
let tts = self.ensure_loaded(model_path, voices_path).await?;
|
||||
let voice = select_voice(voice_name);
|
||||
|
||||
eprintln!("[tts] synthesizing WAV voice '{voice_name}' to {output_path}");
|
||||
let (samples, duration) = tts
|
||||
.synth(text, voice)
|
||||
.await
|
||||
.map_err(|e| format!("synthesis failed: {e:?}"))?;
|
||||
eprintln!(
|
||||
"[tts] synthesized {} samples in {duration:?}",
|
||||
samples.len()
|
||||
);
|
||||
|
||||
// Write 24kHz 16-bit mono WAV.
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate: 24000,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let mut writer = hound::WavWriter::create(output_path, spec)
|
||||
.map_err(|e| format!("WAV create failed: {e}"))?;
|
||||
for &sample in &samples {
|
||||
let s16 = (sample * 32767.0).round().clamp(-32768.0, 32767.0) as i16;
|
||||
writer
|
||||
.write_sample(s16)
|
||||
.map_err(|e| format!("WAV write: {e}"))?;
|
||||
}
|
||||
writer
|
||||
.finalize()
|
||||
.map_err(|e| format!("WAV finalize: {e}"))?;
|
||||
|
||||
// Write sidecar for future cache checks.
|
||||
if cacheable {
|
||||
self.write_cache_meta(output_path, text, voice_name);
|
||||
}
|
||||
|
||||
eprintln!("[tts] wrote {output_path}");
|
||||
Ok(serde_json::json!({ "output": output_path }))
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Cache helpers
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/// Check if the WAV + sidecar on disk match the given text+voice.
|
||||
fn is_cache_hit(&self, output_path: &str, text: &str, voice: &str) -> bool {
|
||||
let meta_path = format!("{output_path}.meta");
|
||||
if !Path::new(output_path).exists() || !Path::new(&meta_path).exists() {
|
||||
return false;
|
||||
}
|
||||
match std::fs::read_to_string(&meta_path) {
|
||||
Ok(contents) => contents == self.cache_key(text, voice),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the sidecar `.meta` file next to the WAV.
|
||||
fn write_cache_meta(&self, output_path: &str, text: &str, voice: &str) {
|
||||
let meta_path = format!("{output_path}.meta");
|
||||
let _ = std::fs::write(&meta_path, self.cache_key(text, voice));
|
||||
}
|
||||
|
||||
/// Build the cache key from process generation + text + voice.
|
||||
fn cache_key(&self, text: &str, voice: &str) -> String {
|
||||
format!("{}\0{}\0{}", self.cache_generation, text, voice)
|
||||
}
|
||||
}
|
||||
|
||||
async fn synth_text_to_mix_frames(
|
||||
tts: &Arc<KokoroTts>,
|
||||
text: &str,
|
||||
voice: Voice,
|
||||
) -> Result<Vec<Vec<f32>>, String> {
|
||||
let (samples, duration) = tts
|
||||
.synth(text, voice)
|
||||
.await
|
||||
.map_err(|e| format!("synthesis failed: {e:?}"))?;
|
||||
eprintln!(
|
||||
"[tts] synthesized chunk ({} chars, {} samples) in {duration:?}",
|
||||
text.chars().count(),
|
||||
samples.len()
|
||||
);
|
||||
pcm_to_mix_frames(&samples, TTS_OUTPUT_RATE)
|
||||
}
|
||||
|
||||
async fn stream_live_prompt_chunks(
|
||||
tts: Arc<KokoroTts>,
|
||||
voice: Voice,
|
||||
chunks: Vec<String>,
|
||||
stream_tx: mpsc::Sender<TtsStreamMessage>,
|
||||
mut cancel_rx: watch::Receiver<bool>,
|
||||
) {
|
||||
for chunk in chunks {
|
||||
if *cancel_rx.borrow() {
|
||||
break;
|
||||
}
|
||||
|
||||
match synth_text_to_mix_frames(&tts, &chunk, voice).await {
|
||||
Ok(frames) => {
|
||||
if *cancel_rx.borrow() {
|
||||
break;
|
||||
}
|
||||
if stream_tx.send(TtsStreamMessage::Frames(frames)).await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
let _ = stream_tx.send(TtsStreamMessage::Failed(error)).await;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if cancel_rx.has_changed().unwrap_or(false) && *cancel_rx.borrow_and_update() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let _ = stream_tx.send(TtsStreamMessage::Finished).await;
|
||||
}
|
||||
|
||||
fn chunk_text(text: &str) -> Vec<String> {
|
||||
let mut chunks = Vec::new();
|
||||
let mut current = String::new();
|
||||
|
||||
for ch in text.chars() {
|
||||
current.push(ch);
|
||||
|
||||
let len = current.chars().count();
|
||||
let hard_split = len >= MAX_CHUNK_CHARS && (ch.is_whitespace() || is_soft_boundary(ch));
|
||||
let natural_split = len >= MIN_CHUNK_CHARS && is_sentence_boundary(ch);
|
||||
|
||||
if natural_split || hard_split {
|
||||
push_chunk(&mut chunks, &mut current);
|
||||
}
|
||||
}
|
||||
|
||||
push_chunk(&mut chunks, &mut current);
|
||||
|
||||
if chunks.len() >= 2 {
|
||||
let last_len = chunks.last().unwrap().chars().count();
|
||||
if last_len < (MIN_CHUNK_CHARS / 2) {
|
||||
let tail = chunks.pop().unwrap();
|
||||
if let Some(prev) = chunks.last_mut() {
|
||||
prev.push(' ');
|
||||
prev.push_str(tail.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
fn push_chunk(chunks: &mut Vec<String>, current: &mut String) {
|
||||
let trimmed = current.trim();
|
||||
if !trimmed.is_empty() {
|
||||
chunks.push(trimmed.to_string());
|
||||
}
|
||||
current.clear();
|
||||
}
|
||||
|
||||
fn is_sentence_boundary(ch: char) -> bool {
|
||||
matches!(ch, '.' | '!' | '?' | '\n' | ';' | ':')
|
||||
}
|
||||
|
||||
fn is_soft_boundary(ch: char) -> bool {
|
||||
matches!(ch, ',' | ';' | ':' | ')' | ']' | '\n')
|
||||
}
|
||||
|
||||
/// Map voice name string to Kokoro Voice enum variant.
|
||||
fn select_voice(name: &str) -> Voice {
|
||||
match name {
|
||||
"af_bella" => Voice::AfBella(1.0),
|
||||
"af_heart" => Voice::AfHeart(1.0),
|
||||
"af_jessica" => Voice::AfJessica(1.0),
|
||||
"af_nicole" => Voice::AfNicole(1.0),
|
||||
"af_nova" => Voice::AfNova(1.0),
|
||||
"af_sarah" => Voice::AfSarah(1.0),
|
||||
"af_sky" => Voice::AfSky(1.0),
|
||||
"af_river" => Voice::AfRiver(1.0),
|
||||
"af_alloy" => Voice::AfAlloy(1.0),
|
||||
"af_aoede" => Voice::AfAoede(1.0),
|
||||
"af_kore" => Voice::AfKore(1.0),
|
||||
"am_adam" => Voice::AmAdam(1.0),
|
||||
"am_echo" => Voice::AmEcho(1.0),
|
||||
"am_eric" => Voice::AmEric(1.0),
|
||||
"am_fenrir" => Voice::AmFenrir(1.0),
|
||||
"am_liam" => Voice::AmLiam(1.0),
|
||||
"am_michael" => Voice::AmMichael(1.0),
|
||||
"am_onyx" => Voice::AmOnyx(1.0),
|
||||
"am_puck" => Voice::AmPuck(1.0),
|
||||
"bf_alice" => Voice::BfAlice(1.0),
|
||||
"bf_emma" => Voice::BfEmma(1.0),
|
||||
"bf_isabella" => Voice::BfIsabella(1.0),
|
||||
"bf_lily" => Voice::BfLily(1.0),
|
||||
"bm_daniel" => Voice::BmDaniel(1.0),
|
||||
"bm_fable" => Voice::BmFable(1.0),
|
||||
"bm_george" => Voice::BmGeorge(1.0),
|
||||
"bm_lewis" => Voice::BmLewis(1.0),
|
||||
_ => {
|
||||
eprintln!("[tts] unknown voice '{name}', falling back to af_bella");
|
||||
Voice::AfBella(1.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
140
rust/crates/proxy-engine/src/voicemail.rs
Normal file
140
rust/crates/proxy-engine/src/voicemail.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
//! Voicemail session — answer → play greeting → beep → record → done.
|
||||
|
||||
use crate::audio_player::{play_beep, play_wav_file};
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::recorder::Recorder;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
|
||||
/// Run a voicemail session on an RTP port.
|
||||
///
|
||||
/// 1. Plays the greeting WAV file to the caller
|
||||
/// 2. Plays a beep tone
|
||||
/// 3. Records the caller's message until BYE or max duration
|
||||
///
|
||||
/// The RTP receive loop is separate — it feeds packets to the recorder
|
||||
/// via the returned channel.
|
||||
pub async fn run_voicemail_session(
|
||||
rtp_socket: Arc<UdpSocket>,
|
||||
provider_media: SocketAddr,
|
||||
codec_pt: u8,
|
||||
voicebox_id: Option<String>,
|
||||
greeting_wav: Option<String>,
|
||||
recording_path: String,
|
||||
max_recording_ms: u64,
|
||||
call_id: String,
|
||||
caller_number: String,
|
||||
out_tx: OutTx,
|
||||
) {
|
||||
let ssrc: u32 = rand::random();
|
||||
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"voicemail_started",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"voicebox_id": voicebox_id,
|
||||
"caller_number": caller_number,
|
||||
}),
|
||||
);
|
||||
|
||||
// Step 1: Play greeting.
|
||||
let mut next_seq: u16 = 0;
|
||||
let mut next_ts: u32 = 0;
|
||||
|
||||
if let Some(wav_path) = &greeting_wav {
|
||||
match play_wav_file(wav_path, rtp_socket.clone(), provider_media, codec_pt, ssrc).await {
|
||||
Ok(frames) => {
|
||||
next_seq = frames as u16;
|
||||
next_ts = frames * crate::rtp::rtp_clock_increment(codec_pt);
|
||||
}
|
||||
Err(e) => {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"voicemail_error",
|
||||
serde_json::json!({ "call_id": call_id, "error": format!("greeting: {e}") }),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Play beep (1kHz, 500ms).
|
||||
match play_beep(
|
||||
rtp_socket.clone(),
|
||||
provider_media,
|
||||
codec_pt,
|
||||
ssrc,
|
||||
next_seq,
|
||||
next_ts,
|
||||
1000,
|
||||
500,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok((_seq, _ts)) => {}
|
||||
Err(e) => {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"voicemail_error",
|
||||
serde_json::json!({ "call_id": call_id, "error": format!("beep: {e}") }),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Record incoming audio.
|
||||
let recorder = match Recorder::new(&recording_path, codec_pt, Some(max_recording_ms)) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"voicemail_error",
|
||||
serde_json::json!({ "call_id": call_id, "error": format!("recorder: {e}") }),
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Receive RTP and feed to recorder.
|
||||
let result = record_from_socket(rtp_socket, recorder, max_recording_ms).await;
|
||||
|
||||
// Step 4: Done — emit recording result.
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"recording_done",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"voicebox_id": voicebox_id,
|
||||
"file_path": result.file_path,
|
||||
"duration_ms": result.duration_ms,
|
||||
"caller_number": caller_number,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
/// Read RTP packets from the socket and feed them to the recorder.
|
||||
/// Returns when the socket errors out (BYE closes the call/socket)
|
||||
/// or max duration is reached.
|
||||
async fn record_from_socket(
|
||||
socket: Arc<UdpSocket>,
|
||||
mut recorder: Recorder,
|
||||
max_ms: u64,
|
||||
) -> crate::recorder::RecordingResult {
|
||||
let mut buf = vec![0u8; 65535];
|
||||
let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_millis(max_ms + 2000);
|
||||
|
||||
loop {
|
||||
let timeout = tokio::time::timeout_at(deadline, socket.recv_from(&mut buf));
|
||||
match timeout.await {
|
||||
Ok(Ok((n, _addr))) => {
|
||||
if !recorder.process_rtp(&buf[..n]) {
|
||||
break; // Max duration reached.
|
||||
}
|
||||
}
|
||||
Ok(Err(_)) => break, // Socket error (closed).
|
||||
Err(_) => break, // Timeout (max duration + grace).
|
||||
}
|
||||
}
|
||||
|
||||
recorder.stop()
|
||||
}
|
||||
328
rust/crates/proxy-engine/src/webrtc_engine.rs
Normal file
328
rust/crates/proxy-engine/src/webrtc_engine.rs
Normal file
@@ -0,0 +1,328 @@
|
||||
//! WebRTC engine — manages browser PeerConnections.
|
||||
//!
|
||||
//! Audio bridging is now channel-based:
|
||||
//! - Browser Opus audio → on_track → mixer inbound channel
|
||||
//! - Mixer outbound channel → Opus RTP → TrackLocalStaticRTP → browser
|
||||
//!
|
||||
//! The mixer handles all transcoding. The WebRTC engine just shuttles raw Opus.
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::mixer::RtpPacket;
|
||||
use codec_lib::PT_OPUS;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
use webrtc::api::media_engine::MediaEngine;
|
||||
use webrtc::api::APIBuilder;
|
||||
use webrtc::ice_transport::ice_candidate::RTCIceCandidateInit;
|
||||
use webrtc::peer_connection::configuration::RTCConfiguration;
|
||||
use webrtc::peer_connection::peer_connection_state::RTCPeerConnectionState;
|
||||
use webrtc::peer_connection::sdp::session_description::RTCSessionDescription;
|
||||
use webrtc::peer_connection::RTCPeerConnection;
|
||||
use webrtc::rtp_transceiver::rtp_codec::RTCRtpCodecCapability;
|
||||
use webrtc::track::track_local::track_local_static_rtp::TrackLocalStaticRTP;
|
||||
use webrtc::track::track_local::{TrackLocal, TrackLocalWriter};
|
||||
|
||||
/// A managed WebRTC session.
|
||||
struct WebRtcSession {
|
||||
pc: Arc<RTCPeerConnection>,
|
||||
local_track: Arc<TrackLocalStaticRTP>,
|
||||
call_id: Option<String>,
|
||||
/// Channel sender for forwarding browser Opus audio to the mixer.
|
||||
/// Set when the session is linked to a call via link_to_mixer().
|
||||
mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>>,
|
||||
}
|
||||
|
||||
/// Manages all WebRTC sessions.
|
||||
pub struct WebRtcEngine {
|
||||
sessions: HashMap<String, WebRtcSession>,
|
||||
out_tx: OutTx,
|
||||
}
|
||||
|
||||
impl WebRtcEngine {
|
||||
pub fn new(out_tx: OutTx) -> Self {
|
||||
Self {
|
||||
sessions: HashMap::new(),
|
||||
out_tx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle a WebRTC offer from a browser — create PeerConnection, return SDP answer.
|
||||
pub async fn handle_offer(
|
||||
&mut self,
|
||||
session_id: &str,
|
||||
offer_sdp: &str,
|
||||
) -> Result<String, String> {
|
||||
let mut media_engine = MediaEngine::default();
|
||||
media_engine
|
||||
.register_default_codecs()
|
||||
.map_err(|e| format!("register codecs: {e}"))?;
|
||||
|
||||
let api = APIBuilder::new().with_media_engine(media_engine).build();
|
||||
|
||||
let config = RTCConfiguration {
|
||||
ice_servers: vec![],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let pc = api
|
||||
.new_peer_connection(config)
|
||||
.await
|
||||
.map_err(|e| format!("create peer connection: {e}"))?;
|
||||
let pc = Arc::new(pc);
|
||||
|
||||
// Local audio track for sending audio to browser (Opus).
|
||||
let local_track = Arc::new(TrackLocalStaticRTP::new(
|
||||
RTCRtpCodecCapability {
|
||||
mime_type: "audio/opus".to_string(),
|
||||
clock_rate: 48000,
|
||||
channels: 1,
|
||||
..Default::default()
|
||||
},
|
||||
"audio".to_string(),
|
||||
"siprouter".to_string(),
|
||||
));
|
||||
|
||||
let _sender = pc
|
||||
.add_track(local_track.clone() as Arc<dyn TrackLocal + Send + Sync>)
|
||||
.await
|
||||
.map_err(|e| format!("add track: {e}"))?;
|
||||
|
||||
// Shared mixer channel sender (populated when linked to a call).
|
||||
let mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>> = Arc::new(Mutex::new(None));
|
||||
|
||||
// ICE candidate handler.
|
||||
let out_tx_ice = self.out_tx.clone();
|
||||
let sid_ice = session_id.to_string();
|
||||
pc.on_ice_candidate(Box::new(move |candidate| {
|
||||
let out_tx = out_tx_ice.clone();
|
||||
let sid = sid_ice.clone();
|
||||
Box::pin(async move {
|
||||
if let Some(c) = candidate {
|
||||
if let Ok(json) = c.to_json() {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_ice_candidate",
|
||||
serde_json::json!({
|
||||
"session_id": sid,
|
||||
"candidate": json.candidate,
|
||||
"sdp_mid": json.sdp_mid,
|
||||
"sdp_mline_index": json.sdp_mline_index,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
})
|
||||
}));
|
||||
|
||||
// Connection state handler.
|
||||
let out_tx_state = self.out_tx.clone();
|
||||
let sid_state = session_id.to_string();
|
||||
pc.on_peer_connection_state_change(Box::new(move |state| {
|
||||
let out_tx = out_tx_state.clone();
|
||||
let sid = sid_state.clone();
|
||||
Box::pin(async move {
|
||||
let state_str = match state {
|
||||
RTCPeerConnectionState::Connected => "connected",
|
||||
RTCPeerConnectionState::Disconnected => "disconnected",
|
||||
RTCPeerConnectionState::Failed => "failed",
|
||||
RTCPeerConnectionState::Closed => "closed",
|
||||
RTCPeerConnectionState::New => "new",
|
||||
RTCPeerConnectionState::Connecting => "connecting",
|
||||
_ => "unknown",
|
||||
};
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_state",
|
||||
serde_json::json!({ "session_id": sid, "state": state_str }),
|
||||
);
|
||||
})
|
||||
}));
|
||||
|
||||
// Track handler — receives Opus audio from the browser.
|
||||
// Forwards raw Opus payload to the mixer channel (when linked).
|
||||
let out_tx_track = self.out_tx.clone();
|
||||
let sid_track = session_id.to_string();
|
||||
let mixer_tx_for_track = mixer_tx.clone();
|
||||
pc.on_track(Box::new(move |track, _receiver, _transceiver| {
|
||||
let out_tx = out_tx_track.clone();
|
||||
let sid = sid_track.clone();
|
||||
let mixer_tx = mixer_tx_for_track.clone();
|
||||
Box::pin(async move {
|
||||
let codec_info = track.codec();
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_track",
|
||||
serde_json::json!({
|
||||
"session_id": sid,
|
||||
"kind": track.kind().to_string(),
|
||||
"codec": codec_info.capability.mime_type,
|
||||
}),
|
||||
);
|
||||
|
||||
// Spawn browser→mixer forwarding task.
|
||||
tokio::spawn(browser_to_mixer_loop(track, mixer_tx, out_tx, sid));
|
||||
})
|
||||
}));
|
||||
|
||||
// Set remote offer.
|
||||
let offer = RTCSessionDescription::offer(offer_sdp.to_string())
|
||||
.map_err(|e| format!("parse offer: {e}"))?;
|
||||
pc.set_remote_description(offer)
|
||||
.await
|
||||
.map_err(|e| format!("set remote description: {e}"))?;
|
||||
|
||||
// Create answer.
|
||||
let answer = pc
|
||||
.create_answer(None)
|
||||
.await
|
||||
.map_err(|e| format!("create answer: {e}"))?;
|
||||
let answer_sdp = answer.sdp.clone();
|
||||
pc.set_local_description(answer)
|
||||
.await
|
||||
.map_err(|e| format!("set local description: {e}"))?;
|
||||
|
||||
self.sessions.insert(
|
||||
session_id.to_string(),
|
||||
WebRtcSession {
|
||||
pc,
|
||||
local_track,
|
||||
call_id: None,
|
||||
mixer_tx,
|
||||
},
|
||||
);
|
||||
|
||||
Ok(answer_sdp)
|
||||
}
|
||||
|
||||
/// Link a WebRTC session to a call's mixer via channels.
|
||||
/// - `inbound_tx`: browser audio goes TO the mixer through this channel
|
||||
/// - `outbound_rx`: mixed audio comes FROM the mixer through this channel
|
||||
pub async fn link_to_mixer(
|
||||
&mut self,
|
||||
session_id: &str,
|
||||
call_id: &str,
|
||||
inbound_tx: mpsc::Sender<RtpPacket>,
|
||||
outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
) -> bool {
|
||||
let session = match self.sessions.get_mut(session_id) {
|
||||
Some(s) => s,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
session.call_id = Some(call_id.to_string());
|
||||
|
||||
// Set the mixer sender so the on_track loop starts forwarding.
|
||||
{
|
||||
let mut tx = session.mixer_tx.lock().await;
|
||||
*tx = Some(inbound_tx);
|
||||
}
|
||||
|
||||
// Spawn mixer→browser outbound task.
|
||||
let local_track = session.local_track.clone();
|
||||
tokio::spawn(mixer_to_browser_loop(outbound_rx, local_track));
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
pub async fn add_ice_candidate(
|
||||
&self,
|
||||
session_id: &str,
|
||||
candidate: &str,
|
||||
sdp_mid: Option<&str>,
|
||||
sdp_mline_index: Option<u16>,
|
||||
) -> Result<(), String> {
|
||||
let session = self
|
||||
.sessions
|
||||
.get(session_id)
|
||||
.ok_or_else(|| format!("session {session_id} not found"))?;
|
||||
|
||||
let init = RTCIceCandidateInit {
|
||||
candidate: candidate.to_string(),
|
||||
sdp_mid: sdp_mid.map(|s| s.to_string()),
|
||||
sdp_mline_index,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
session
|
||||
.pc
|
||||
.add_ice_candidate(init)
|
||||
.await
|
||||
.map_err(|e| format!("add ICE: {e}"))
|
||||
}
|
||||
|
||||
pub async fn close_session(&mut self, session_id: &str) -> Result<(), String> {
|
||||
if let Some(session) = self.sessions.remove(session_id) {
|
||||
session
|
||||
.pc
|
||||
.close()
|
||||
.await
|
||||
.map_err(|e| format!("close: {e}"))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Browser → Mixer audio forwarding loop.
|
||||
/// Reads Opus RTP from the browser track, sends raw Opus payload to the mixer channel.
|
||||
async fn browser_to_mixer_loop(
|
||||
track: Arc<webrtc::track::track_remote::TrackRemote>,
|
||||
mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>>,
|
||||
out_tx: OutTx,
|
||||
session_id: String,
|
||||
) {
|
||||
let mut buf = vec![0u8; 1500];
|
||||
let mut count = 0u64;
|
||||
|
||||
loop {
|
||||
match track.read(&mut buf).await {
|
||||
Ok((rtp_packet, _attributes)) => {
|
||||
count += 1;
|
||||
|
||||
let payload = &rtp_packet.payload;
|
||||
if payload.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Send raw Opus payload to mixer (if linked).
|
||||
let tx = mixer_tx.lock().await;
|
||||
if let Some(ref tx) = *tx {
|
||||
let _ = tx
|
||||
.send(RtpPacket {
|
||||
payload: payload.to_vec(),
|
||||
payload_type: PT_OPUS,
|
||||
marker: rtp_packet.header.marker,
|
||||
seq: rtp_packet.header.sequence_number,
|
||||
timestamp: rtp_packet.header.timestamp,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
drop(tx);
|
||||
|
||||
if count == 1 || count == 50 || count % 500 == 0 {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_audio_rx",
|
||||
serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"direction": "browser_to_mixer",
|
||||
"packet_count": count,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(_) => break, // Track ended.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Mixer → Browser audio forwarding loop.
|
||||
/// Reads Opus-encoded RTP packets from the mixer and writes to the WebRTC track.
|
||||
async fn mixer_to_browser_loop(
|
||||
mut outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
local_track: Arc<TrackLocalStaticRTP>,
|
||||
) {
|
||||
while let Some(rtp_data) = outbound_rx.recv().await {
|
||||
let _ = local_track.write(&rtp_data).await;
|
||||
}
|
||||
}
|
||||
8
rust/crates/sip-proto/Cargo.toml
Normal file
8
rust/crates/sip-proto/Cargo.toml
Normal file
@@ -0,0 +1,8 @@
|
||||
[package]
|
||||
name = "sip-proto"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
md-5 = "0.10"
|
||||
rand = "0.8"
|
||||
393
rust/crates/sip-proto/src/dialog.rs
Normal file
393
rust/crates/sip-proto/src/dialog.rs
Normal file
@@ -0,0 +1,393 @@
|
||||
//! SIP dialog state machine (RFC 3261 §12).
|
||||
//!
|
||||
//! Tracks local/remote tags, CSeq counters, route set, and remote target.
|
||||
//! Provides methods to build in-dialog requests (BYE, re-INVITE, ACK, CANCEL).
|
||||
//!
|
||||
//! Ported from ts/sip/dialog.ts.
|
||||
|
||||
use crate::helpers::{generate_branch, generate_tag};
|
||||
use crate::message::SipMessage;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum DialogState {
|
||||
Early,
|
||||
Confirmed,
|
||||
Terminated,
|
||||
}
|
||||
|
||||
/// SIP dialog state per RFC 3261 §12.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SipDialog {
|
||||
pub call_id: String,
|
||||
pub local_tag: String,
|
||||
pub remote_tag: Option<String>,
|
||||
pub local_uri: String,
|
||||
pub remote_uri: String,
|
||||
pub local_cseq: u32,
|
||||
pub remote_cseq: u32,
|
||||
pub route_set: Vec<String>,
|
||||
pub remote_target: String,
|
||||
pub state: DialogState,
|
||||
pub local_host: String,
|
||||
pub local_port: u16,
|
||||
}
|
||||
|
||||
impl SipDialog {
|
||||
/// Create a dialog from an INVITE we are sending (UAC side).
|
||||
/// The dialog enters Early state; call `process_response()` when responses arrive.
|
||||
pub fn from_uac_invite(invite: &SipMessage, local_host: &str, local_port: u16) -> Self {
|
||||
let from = invite.get_header("From").unwrap_or("");
|
||||
let to = invite.get_header("To").unwrap_or("");
|
||||
|
||||
let local_cseq = invite
|
||||
.get_header("CSeq")
|
||||
.and_then(|c| c.split_whitespace().next())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(1);
|
||||
|
||||
Self {
|
||||
call_id: invite.call_id().to_string(),
|
||||
local_tag: SipMessage::extract_tag(from)
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(generate_tag),
|
||||
remote_tag: None,
|
||||
local_uri: SipMessage::extract_uri(from).unwrap_or("").to_string(),
|
||||
remote_uri: SipMessage::extract_uri(to).unwrap_or("").to_string(),
|
||||
local_cseq,
|
||||
remote_cseq: 0,
|
||||
route_set: Vec::new(),
|
||||
remote_target: invite
|
||||
.request_uri()
|
||||
.or_else(|| SipMessage::extract_uri(to))
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
state: DialogState::Early,
|
||||
local_host: local_host.to_string(),
|
||||
local_port,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a dialog from an INVITE we received (UAS side).
|
||||
pub fn from_uas_invite(
|
||||
invite: &SipMessage,
|
||||
local_tag: &str,
|
||||
local_host: &str,
|
||||
local_port: u16,
|
||||
) -> Self {
|
||||
let from = invite.get_header("From").unwrap_or("");
|
||||
let to = invite.get_header("To").unwrap_or("");
|
||||
let contact = invite.get_header("Contact");
|
||||
|
||||
let remote_target = contact
|
||||
.and_then(SipMessage::extract_uri)
|
||||
.or_else(|| SipMessage::extract_uri(from))
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
Self {
|
||||
call_id: invite.call_id().to_string(),
|
||||
local_tag: local_tag.to_string(),
|
||||
remote_tag: SipMessage::extract_tag(from).map(|s| s.to_string()),
|
||||
local_uri: SipMessage::extract_uri(to).unwrap_or("").to_string(),
|
||||
remote_uri: SipMessage::extract_uri(from).unwrap_or("").to_string(),
|
||||
local_cseq: 0,
|
||||
remote_cseq: 0,
|
||||
route_set: Vec::new(),
|
||||
remote_target,
|
||||
state: DialogState::Early,
|
||||
local_host: local_host.to_string(),
|
||||
local_port,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update dialog state from a received response.
|
||||
pub fn process_response(&mut self, response: &SipMessage) {
|
||||
let to = response.get_header("To").unwrap_or("");
|
||||
let tag = SipMessage::extract_tag(to).map(|s| s.to_string());
|
||||
let code = response.status_code().unwrap_or(0);
|
||||
|
||||
// Always update remoteTag from 2xx (RFC 3261 §12.1.2).
|
||||
if let Some(ref t) = tag {
|
||||
if code >= 200 && code < 300 {
|
||||
self.remote_tag = Some(t.clone());
|
||||
} else if self.remote_tag.is_none() {
|
||||
self.remote_tag = Some(t.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Update remote target from Contact.
|
||||
if let Some(contact) = response.get_header("Contact") {
|
||||
if let Some(uri) = SipMessage::extract_uri(contact) {
|
||||
self.remote_target = uri.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
// Record-Route → route set (in reverse for UAC).
|
||||
if self.state == DialogState::Early {
|
||||
let rr: Vec<String> = response
|
||||
.headers
|
||||
.iter()
|
||||
.filter(|(n, _)| n.to_ascii_lowercase() == "record-route")
|
||||
.map(|(_, v)| v.clone())
|
||||
.collect();
|
||||
if !rr.is_empty() {
|
||||
let mut reversed = rr;
|
||||
reversed.reverse();
|
||||
self.route_set = reversed;
|
||||
}
|
||||
}
|
||||
|
||||
if code >= 200 && code < 300 {
|
||||
self.state = DialogState::Confirmed;
|
||||
} else if code >= 300 {
|
||||
self.state = DialogState::Terminated;
|
||||
}
|
||||
}
|
||||
|
||||
/// Build an in-dialog request (BYE, re-INVITE, INFO, ...).
|
||||
/// Automatically increments the local CSeq.
|
||||
pub fn create_request(
|
||||
&mut self,
|
||||
method: &str,
|
||||
body: Option<&str>,
|
||||
content_type: Option<&str>,
|
||||
extra_headers: Option<Vec<(String, String)>>,
|
||||
) -> SipMessage {
|
||||
self.local_cseq += 1;
|
||||
let branch = generate_branch();
|
||||
|
||||
let remote_tag_str = self
|
||||
.remote_tag
|
||||
.as_ref()
|
||||
.map(|t| format!(";tag={t}"))
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut headers = vec![
|
||||
(
|
||||
"Via".to_string(),
|
||||
format!(
|
||||
"SIP/2.0/UDP {}:{};branch={branch};rport",
|
||||
self.local_host, self.local_port
|
||||
),
|
||||
),
|
||||
(
|
||||
"From".to_string(),
|
||||
format!("<{}>;tag={}", self.local_uri, self.local_tag),
|
||||
),
|
||||
(
|
||||
"To".to_string(),
|
||||
format!("<{}>{remote_tag_str}", self.remote_uri),
|
||||
),
|
||||
("Call-ID".to_string(), self.call_id.clone()),
|
||||
("CSeq".to_string(), format!("{} {method}", self.local_cseq)),
|
||||
("Max-Forwards".to_string(), "70".to_string()),
|
||||
];
|
||||
|
||||
for route in &self.route_set {
|
||||
headers.push(("Route".to_string(), route.clone()));
|
||||
}
|
||||
|
||||
headers.push((
|
||||
"Contact".to_string(),
|
||||
format!("<sip:{}:{}>", self.local_host, self.local_port),
|
||||
));
|
||||
|
||||
if let Some(extra) = extra_headers {
|
||||
headers.extend(extra);
|
||||
}
|
||||
|
||||
let body_str = body.unwrap_or("");
|
||||
if !body_str.is_empty() {
|
||||
if let Some(ct) = content_type {
|
||||
headers.push(("Content-Type".to_string(), ct.to_string()));
|
||||
}
|
||||
}
|
||||
headers.push(("Content-Length".to_string(), body_str.len().to_string()));
|
||||
|
||||
let ruri = self.resolve_ruri();
|
||||
SipMessage::new(
|
||||
format!("{method} {ruri} SIP/2.0"),
|
||||
headers,
|
||||
body_str.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Build an ACK for a 2xx response to INVITE (RFC 3261 §13.2.2.4).
|
||||
pub fn create_ack(&self) -> SipMessage {
|
||||
let branch = generate_branch();
|
||||
let remote_tag_str = self
|
||||
.remote_tag
|
||||
.as_ref()
|
||||
.map(|t| format!(";tag={t}"))
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut headers = vec![
|
||||
(
|
||||
"Via".to_string(),
|
||||
format!(
|
||||
"SIP/2.0/UDP {}:{};branch={branch};rport",
|
||||
self.local_host, self.local_port
|
||||
),
|
||||
),
|
||||
(
|
||||
"From".to_string(),
|
||||
format!("<{}>;tag={}", self.local_uri, self.local_tag),
|
||||
),
|
||||
(
|
||||
"To".to_string(),
|
||||
format!("<{}>{remote_tag_str}", self.remote_uri),
|
||||
),
|
||||
("Call-ID".to_string(), self.call_id.clone()),
|
||||
("CSeq".to_string(), format!("{} ACK", self.local_cseq)),
|
||||
("Max-Forwards".to_string(), "70".to_string()),
|
||||
];
|
||||
|
||||
for route in &self.route_set {
|
||||
headers.push(("Route".to_string(), route.clone()));
|
||||
}
|
||||
|
||||
headers.push(("Content-Length".to_string(), "0".to_string()));
|
||||
|
||||
let ruri = self.resolve_ruri();
|
||||
SipMessage::new(format!("ACK {ruri} SIP/2.0"), headers, String::new())
|
||||
}
|
||||
|
||||
/// Build a CANCEL for the original INVITE (same branch, CSeq).
|
||||
pub fn create_cancel(&self, original_invite: &SipMessage) -> SipMessage {
|
||||
let via = original_invite.get_header("Via").unwrap_or("").to_string();
|
||||
let from = original_invite.get_header("From").unwrap_or("").to_string();
|
||||
let to = original_invite.get_header("To").unwrap_or("").to_string();
|
||||
|
||||
let headers = vec![
|
||||
("Via".to_string(), via),
|
||||
("From".to_string(), from),
|
||||
("To".to_string(), to),
|
||||
("Call-ID".to_string(), self.call_id.clone()),
|
||||
("CSeq".to_string(), format!("{} CANCEL", self.local_cseq)),
|
||||
("Max-Forwards".to_string(), "70".to_string()),
|
||||
("Content-Length".to_string(), "0".to_string()),
|
||||
];
|
||||
|
||||
let ruri = original_invite
|
||||
.request_uri()
|
||||
.unwrap_or(&self.remote_target)
|
||||
.to_string();
|
||||
|
||||
SipMessage::new(format!("CANCEL {ruri} SIP/2.0"), headers, String::new())
|
||||
}
|
||||
|
||||
/// Transition the dialog to terminated state.
|
||||
pub fn terminate(&mut self) {
|
||||
self.state = DialogState::Terminated;
|
||||
}
|
||||
|
||||
/// Resolve Request-URI from route set or remote target.
|
||||
fn resolve_ruri(&self) -> &str {
|
||||
if !self.route_set.is_empty() {
|
||||
if let Some(top_route) = SipMessage::extract_uri(&self.route_set[0]) {
|
||||
if top_route.contains(";lr") {
|
||||
return &self.remote_target; // loose routing
|
||||
}
|
||||
return top_route; // strict routing
|
||||
}
|
||||
}
|
||||
&self.remote_target
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::message::RequestOptions;
|
||||
|
||||
fn make_invite() -> SipMessage {
|
||||
SipMessage::create_request(
|
||||
"INVITE",
|
||||
"sip:callee@host",
|
||||
RequestOptions {
|
||||
via_host: "192.168.1.1".to_string(),
|
||||
via_port: 5070,
|
||||
via_transport: None,
|
||||
via_branch: Some("z9hG4bK-test".to_string()),
|
||||
from_uri: "sip:caller@proxy".to_string(),
|
||||
from_display_name: None,
|
||||
from_tag: Some("from-tag".to_string()),
|
||||
to_uri: "sip:callee@host".to_string(),
|
||||
to_display_name: None,
|
||||
to_tag: None,
|
||||
call_id: Some("test-dialog-call".to_string()),
|
||||
cseq: Some(1),
|
||||
contact: Some("<sip:caller@192.168.1.1:5070>".to_string()),
|
||||
max_forwards: None,
|
||||
body: None,
|
||||
content_type: None,
|
||||
extra_headers: None,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uac_dialog_lifecycle() {
|
||||
let invite = make_invite();
|
||||
let mut dialog = SipDialog::from_uac_invite(&invite, "192.168.1.1", 5070);
|
||||
|
||||
assert_eq!(dialog.state, DialogState::Early);
|
||||
assert_eq!(dialog.call_id, "test-dialog-call");
|
||||
assert_eq!(dialog.local_tag, "from-tag");
|
||||
assert!(dialog.remote_tag.is_none());
|
||||
|
||||
// Simulate 200 OK
|
||||
let response = SipMessage::create_response(
|
||||
200,
|
||||
"OK",
|
||||
&invite,
|
||||
Some(crate::message::ResponseOptions {
|
||||
to_tag: Some("remote-tag".to_string()),
|
||||
contact: Some("<sip:callee@10.0.0.1:5060>".to_string()),
|
||||
..Default::default()
|
||||
}),
|
||||
);
|
||||
|
||||
dialog.process_response(&response);
|
||||
assert_eq!(dialog.state, DialogState::Confirmed);
|
||||
assert_eq!(dialog.remote_tag.as_deref(), Some("remote-tag"));
|
||||
assert_eq!(dialog.remote_target, "sip:callee@10.0.0.1:5060");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_bye() {
|
||||
let invite = make_invite();
|
||||
let mut dialog = SipDialog::from_uac_invite(&invite, "192.168.1.1", 5070);
|
||||
dialog.remote_tag = Some("remote-tag".to_string());
|
||||
dialog.state = DialogState::Confirmed;
|
||||
|
||||
let bye = dialog.create_request("BYE", None, None, None);
|
||||
assert_eq!(bye.method(), Some("BYE"));
|
||||
assert_eq!(bye.call_id(), "test-dialog-call");
|
||||
assert_eq!(dialog.local_cseq, 2);
|
||||
let to = bye.get_header("To").unwrap();
|
||||
assert!(to.contains("tag=remote-tag"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_ack() {
|
||||
let invite = make_invite();
|
||||
let mut dialog = SipDialog::from_uac_invite(&invite, "192.168.1.1", 5070);
|
||||
dialog.remote_tag = Some("remote-tag".to_string());
|
||||
|
||||
let ack = dialog.create_ack();
|
||||
assert_eq!(ack.method(), Some("ACK"));
|
||||
assert!(ack.get_header("CSeq").unwrap().contains("ACK"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_cancel() {
|
||||
let invite = make_invite();
|
||||
let dialog = SipDialog::from_uac_invite(&invite, "192.168.1.1", 5070);
|
||||
|
||||
let cancel = dialog.create_cancel(&invite);
|
||||
assert_eq!(cancel.method(), Some("CANCEL"));
|
||||
assert!(cancel.get_header("CSeq").unwrap().contains("CANCEL"));
|
||||
assert!(cancel.start_line.contains("sip:callee@host"));
|
||||
}
|
||||
}
|
||||
339
rust/crates/sip-proto/src/helpers.rs
Normal file
339
rust/crates/sip-proto/src/helpers.rs
Normal file
@@ -0,0 +1,339 @@
|
||||
//! SIP helper utilities — ID generation, codec registry, SDP builder,
|
||||
//! Digest authentication, SDP parser, and MWI body builder.
|
||||
|
||||
use md5::{Digest, Md5};
|
||||
use rand::Rng;
|
||||
|
||||
// ---- ID generators ---------------------------------------------------------
|
||||
|
||||
/// Generate a random SIP Call-ID (32 hex chars).
|
||||
pub fn generate_call_id(domain: Option<&str>) -> String {
|
||||
let id = random_hex(16);
|
||||
match domain {
|
||||
Some(d) => format!("{id}@{d}"),
|
||||
None => id,
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a random SIP From/To tag (16 hex chars).
|
||||
pub fn generate_tag() -> String {
|
||||
random_hex(8)
|
||||
}
|
||||
|
||||
/// Generate an RFC 3261 compliant Via branch (starts with `z9hG4bK` magic cookie).
|
||||
pub fn generate_branch() -> String {
|
||||
format!("z9hG4bK-{}", random_hex(8))
|
||||
}
|
||||
|
||||
fn random_hex(bytes: usize) -> String {
|
||||
let mut rng = rand::thread_rng();
|
||||
(0..bytes)
|
||||
.map(|_| format!("{:02x}", rng.gen::<u8>()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ---- Codec registry --------------------------------------------------------
|
||||
|
||||
/// Look up the rtpmap name for a static payload type.
|
||||
pub fn codec_name(pt: u8) -> &'static str {
|
||||
match pt {
|
||||
0 => "PCMU/8000",
|
||||
3 => "GSM/8000",
|
||||
4 => "G723/8000",
|
||||
8 => "PCMA/8000",
|
||||
9 => "G722/8000",
|
||||
18 => "G729/8000",
|
||||
101 => "telephone-event/8000",
|
||||
_ => "unknown",
|
||||
}
|
||||
}
|
||||
|
||||
// ---- SDP builder -----------------------------------------------------------
|
||||
|
||||
/// Options for building an SDP body.
|
||||
pub struct SdpOptions<'a> {
|
||||
pub ip: &'a str,
|
||||
pub port: u16,
|
||||
pub payload_types: &'a [u8],
|
||||
pub session_id: Option<&'a str>,
|
||||
pub session_name: Option<&'a str>,
|
||||
pub direction: Option<&'a str>,
|
||||
pub attributes: &'a [&'a str],
|
||||
}
|
||||
|
||||
impl<'a> Default for SdpOptions<'a> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ip: "0.0.0.0",
|
||||
port: 0,
|
||||
payload_types: &[9, 0, 8, 101],
|
||||
session_id: None,
|
||||
session_name: None,
|
||||
direction: None,
|
||||
attributes: &[],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a minimal SDP body suitable for SIP INVITE offers/answers.
|
||||
pub fn build_sdp(opts: &SdpOptions) -> String {
|
||||
let session_id = opts
|
||||
.session_id
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| format!("{}", rand::thread_rng().gen_range(0..1_000_000_000u64)));
|
||||
let session_name = opts.session_name.unwrap_or("-");
|
||||
let direction = opts.direction.unwrap_or("sendrecv");
|
||||
let pts: Vec<String> = opts.payload_types.iter().map(|pt| pt.to_string()).collect();
|
||||
|
||||
let mut lines = vec![
|
||||
"v=0".to_string(),
|
||||
format!("o=- {session_id} {session_id} IN IP4 {}", opts.ip),
|
||||
format!("s={session_name}"),
|
||||
format!("c=IN IP4 {}", opts.ip),
|
||||
"t=0 0".to_string(),
|
||||
format!("m=audio {} RTP/AVP {}", opts.port, pts.join(" ")),
|
||||
];
|
||||
|
||||
for &pt in opts.payload_types {
|
||||
let name = codec_name(pt);
|
||||
if name != "unknown" {
|
||||
lines.push(format!("a=rtpmap:{pt} {name}"));
|
||||
}
|
||||
if pt == 101 {
|
||||
lines.push("a=fmtp:101 0-16".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
lines.push(format!("a={direction}"));
|
||||
for attr in opts.attributes {
|
||||
lines.push(format!("a={attr}"));
|
||||
}
|
||||
lines.push(String::new()); // trailing CRLF
|
||||
|
||||
lines.join("\r\n")
|
||||
}
|
||||
|
||||
// ---- SIP Digest authentication (RFC 2617) ----------------------------------
|
||||
|
||||
/// Parsed fields from a Proxy-Authenticate or WWW-Authenticate header.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DigestChallenge {
|
||||
pub realm: String,
|
||||
pub nonce: String,
|
||||
pub algorithm: Option<String>,
|
||||
pub opaque: Option<String>,
|
||||
pub qop: Option<String>,
|
||||
}
|
||||
|
||||
/// Parse a `Proxy-Authenticate` or `WWW-Authenticate` header value.
|
||||
pub fn parse_digest_challenge(header: &str) -> Option<DigestChallenge> {
|
||||
let lower = header.to_ascii_lowercase();
|
||||
if !lower.starts_with("digest ") {
|
||||
return None;
|
||||
}
|
||||
let params = &header[7..];
|
||||
|
||||
let get = |key: &str| -> Option<String> {
|
||||
// Try quoted value first.
|
||||
let pat = format!("{}=", key);
|
||||
if let Some(pos) = params.to_ascii_lowercase().find(&pat) {
|
||||
let after = ¶ms[pos + pat.len()..];
|
||||
let after = after.trim_start();
|
||||
if after.starts_with('"') {
|
||||
let end = after[1..].find('"')?;
|
||||
return Some(after[1..1 + end].to_string());
|
||||
}
|
||||
// Unquoted value.
|
||||
let end = after
|
||||
.find(|c: char| c == ',' || c.is_whitespace())
|
||||
.unwrap_or(after.len());
|
||||
return Some(after[..end].to_string());
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
let realm = get("realm")?;
|
||||
let nonce = get("nonce")?;
|
||||
|
||||
Some(DigestChallenge {
|
||||
realm,
|
||||
nonce,
|
||||
algorithm: get("algorithm"),
|
||||
opaque: get("opaque"),
|
||||
qop: get("qop"),
|
||||
})
|
||||
}
|
||||
|
||||
fn md5_hex(s: &str) -> String {
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(s.as_bytes());
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
/// Compute a SIP Digest Authorization header value.
|
||||
pub fn compute_digest_auth(
|
||||
username: &str,
|
||||
password: &str,
|
||||
realm: &str,
|
||||
nonce: &str,
|
||||
method: &str,
|
||||
uri: &str,
|
||||
algorithm: Option<&str>,
|
||||
opaque: Option<&str>,
|
||||
) -> String {
|
||||
let ha1 = md5_hex(&format!("{username}:{realm}:{password}"));
|
||||
let ha2 = md5_hex(&format!("{method}:{uri}"));
|
||||
let response = md5_hex(&format!("{ha1}:{nonce}:{ha2}"));
|
||||
let alg = algorithm.unwrap_or("MD5");
|
||||
|
||||
let mut header = format!(
|
||||
"Digest username=\"{username}\", realm=\"{realm}\", \
|
||||
nonce=\"{nonce}\", uri=\"{uri}\", response=\"{response}\", \
|
||||
algorithm={alg}"
|
||||
);
|
||||
if let Some(op) = opaque {
|
||||
header.push_str(&format!(", opaque=\"{op}\""));
|
||||
}
|
||||
header
|
||||
}
|
||||
|
||||
// ---- SDP parser ------------------------------------------------------------
|
||||
|
||||
use crate::Endpoint;
|
||||
|
||||
/// Parse the audio media port, connection address, and preferred codec from an SDP body.
|
||||
pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
|
||||
let mut addr: Option<&str> = None;
|
||||
let mut port: Option<u16> = None;
|
||||
let mut codec_pt: Option<u8> = None;
|
||||
|
||||
let normalized = sdp.replace("\r\n", "\n");
|
||||
for raw in normalized.split('\n') {
|
||||
let line = raw.trim();
|
||||
if let Some(rest) = line.strip_prefix("c=IN IP4 ") {
|
||||
addr = Some(rest.trim());
|
||||
} else if let Some(rest) = line.strip_prefix("m=audio ") {
|
||||
// m=audio <port> RTP/AVP <pt1> [<pt2> ...]
|
||||
let parts: Vec<&str> = rest.split_whitespace().collect();
|
||||
if !parts.is_empty() {
|
||||
port = parts[0].parse().ok();
|
||||
}
|
||||
// parts[1] is "RTP/AVP" or similar, parts[2..] are payload types.
|
||||
// The first PT is the preferred codec.
|
||||
if parts.len() > 2 {
|
||||
codec_pt = parts[2].parse::<u8>().ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match (addr, port) {
|
||||
(Some(a), Some(p)) => Some(Endpoint {
|
||||
address: a.to_string(),
|
||||
port: p,
|
||||
codec_pt,
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// ---- MWI (RFC 3842) --------------------------------------------------------
|
||||
|
||||
/// Build the body and extra headers for an MWI NOTIFY (RFC 3842 message-summary).
|
||||
pub struct MwiResult {
|
||||
pub body: String,
|
||||
pub content_type: &'static str,
|
||||
pub extra_headers: Vec<(String, String)>,
|
||||
}
|
||||
|
||||
pub fn build_mwi_body(new_messages: u32, old_messages: u32, account_uri: &str) -> MwiResult {
|
||||
let waiting = if new_messages > 0 { "yes" } else { "no" };
|
||||
let body = format!(
|
||||
"Messages-Waiting: {waiting}\r\n\
|
||||
Message-Account: {account_uri}\r\n\
|
||||
Voice-Message: {new_messages}/{old_messages}\r\n"
|
||||
);
|
||||
|
||||
MwiResult {
|
||||
body,
|
||||
content_type: "application/simple-message-summary",
|
||||
extra_headers: vec![
|
||||
("Event".to_string(), "message-summary".to_string()),
|
||||
(
|
||||
"Subscription-State".to_string(),
|
||||
"terminated;reason=noresource".to_string(),
|
||||
),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_generate_branch_has_magic_cookie() {
|
||||
let branch = generate_branch();
|
||||
assert!(branch.starts_with("z9hG4bK-"));
|
||||
assert!(branch.len() > 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_codec_name() {
|
||||
assert_eq!(codec_name(0), "PCMU/8000");
|
||||
assert_eq!(codec_name(9), "G722/8000");
|
||||
assert_eq!(codec_name(101), "telephone-event/8000");
|
||||
assert_eq!(codec_name(255), "unknown");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_sdp() {
|
||||
let sdp = build_sdp(&SdpOptions {
|
||||
ip: "192.168.1.1",
|
||||
port: 20000,
|
||||
payload_types: &[9, 0, 101],
|
||||
..Default::default()
|
||||
});
|
||||
assert!(sdp.contains("m=audio 20000 RTP/AVP 9 0 101"));
|
||||
assert!(sdp.contains("c=IN IP4 192.168.1.1"));
|
||||
assert!(sdp.contains("a=rtpmap:9 G722/8000"));
|
||||
assert!(sdp.contains("a=fmtp:101 0-16"));
|
||||
assert!(sdp.contains("a=sendrecv"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_digest_challenge() {
|
||||
let header = r#"Digest realm="asterisk", nonce="abc123", algorithm=MD5, opaque="xyz""#;
|
||||
let ch = parse_digest_challenge(header).unwrap();
|
||||
assert_eq!(ch.realm, "asterisk");
|
||||
assert_eq!(ch.nonce, "abc123");
|
||||
assert_eq!(ch.algorithm.as_deref(), Some("MD5"));
|
||||
assert_eq!(ch.opaque.as_deref(), Some("xyz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_digest_auth() {
|
||||
let auth = compute_digest_auth(
|
||||
"user", "pass", "realm", "nonce", "REGISTER", "sip:host", None, None,
|
||||
);
|
||||
assert!(auth.starts_with("Digest "));
|
||||
assert!(auth.contains("username=\"user\""));
|
||||
assert!(auth.contains("realm=\"realm\""));
|
||||
assert!(auth.contains("response=\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_sdp_endpoint() {
|
||||
let sdp = "v=0\r\nc=IN IP4 10.0.0.1\r\nm=audio 5060 RTP/AVP 0\r\n";
|
||||
let ep = parse_sdp_endpoint(sdp).unwrap();
|
||||
assert_eq!(ep.address, "10.0.0.1");
|
||||
assert_eq!(ep.port, 5060);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_mwi_body() {
|
||||
let mwi = build_mwi_body(3, 5, "sip:user@host");
|
||||
assert!(mwi.body.contains("Messages-Waiting: yes"));
|
||||
assert!(mwi.body.contains("Voice-Message: 3/5"));
|
||||
assert_eq!(mwi.content_type, "application/simple-message-summary");
|
||||
}
|
||||
}
|
||||
19
rust/crates/sip-proto/src/lib.rs
Normal file
19
rust/crates/sip-proto/src/lib.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
//! SIP protocol library for the proxy engine.
|
||||
//!
|
||||
//! Provides SIP message parsing/serialization, dialog state management,
|
||||
//! SDP handling, Digest authentication, and URI rewriting.
|
||||
//! Ported from the TypeScript `ts/sip/` library.
|
||||
|
||||
pub mod dialog;
|
||||
pub mod helpers;
|
||||
pub mod message;
|
||||
pub mod rewrite;
|
||||
|
||||
/// Network endpoint (address + port + optional negotiated codec).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Endpoint {
|
||||
pub address: String,
|
||||
pub port: u16,
|
||||
/// First payload type from the SDP `m=audio` line (the preferred codec).
|
||||
pub codec_pt: Option<u8>,
|
||||
}
|
||||
617
rust/crates/sip-proto/src/message.rs
Normal file
617
rust/crates/sip-proto/src/message.rs
Normal file
@@ -0,0 +1,617 @@
|
||||
//! SIP message parsing, serialization, inspection, mutation, and factory methods.
|
||||
//!
|
||||
//! Ported from ts/sip/message.ts.
|
||||
|
||||
use crate::helpers::{generate_branch, generate_call_id, generate_tag};
|
||||
|
||||
/// A parsed SIP message (request or response).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SipMessage {
|
||||
pub start_line: String,
|
||||
pub headers: Vec<(String, String)>,
|
||||
pub body: String,
|
||||
}
|
||||
|
||||
impl SipMessage {
|
||||
pub fn new(start_line: String, headers: Vec<(String, String)>, body: String) -> Self {
|
||||
Self {
|
||||
start_line,
|
||||
headers,
|
||||
body,
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Parsing -----------------------------------------------------------
|
||||
|
||||
/// Parse a raw buffer into a SipMessage. Returns None for invalid data.
|
||||
pub fn parse(buf: &[u8]) -> Option<Self> {
|
||||
if buf.is_empty() {
|
||||
return None;
|
||||
}
|
||||
// First byte must be ASCII A-z.
|
||||
if buf[0] < 0x41 || buf[0] > 0x7a {
|
||||
return None;
|
||||
}
|
||||
|
||||
let text = std::str::from_utf8(buf).ok()?;
|
||||
|
||||
let (head, body) = if let Some(sep) = text.find("\r\n\r\n") {
|
||||
(&text[..sep], &text[sep + 4..])
|
||||
} else if let Some(sep) = text.find("\n\n") {
|
||||
(&text[..sep], &text[sep + 2..])
|
||||
} else {
|
||||
(text, "")
|
||||
};
|
||||
|
||||
let normalized = head.replace("\r\n", "\n");
|
||||
let lines: Vec<&str> = normalized.split('\n').collect();
|
||||
if lines.is_empty() || lines[0].is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let start_line = lines[0];
|
||||
// Validate: must be a SIP request or response start line.
|
||||
if !is_sip_first_line(start_line) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut headers = Vec::new();
|
||||
for &line in &lines[1..] {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if let Some(colon) = line.find(':') {
|
||||
let name = line[..colon].trim().to_string();
|
||||
let value = line[colon + 1..].trim().to_string();
|
||||
headers.push((name, value));
|
||||
}
|
||||
}
|
||||
|
||||
Some(SipMessage {
|
||||
start_line: start_line.to_string(),
|
||||
headers,
|
||||
body: body.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
// ---- Serialization -----------------------------------------------------
|
||||
|
||||
/// Serialize the message to a byte buffer suitable for UDP transmission.
|
||||
pub fn serialize(&self) -> Vec<u8> {
|
||||
let mut head = self.start_line.clone();
|
||||
for (name, value) in &self.headers {
|
||||
head.push_str("\r\n");
|
||||
head.push_str(name);
|
||||
head.push_str(": ");
|
||||
head.push_str(value);
|
||||
}
|
||||
head.push_str("\r\n\r\n");
|
||||
|
||||
let mut buf = head.into_bytes();
|
||||
if !self.body.is_empty() {
|
||||
buf.extend_from_slice(self.body.as_bytes());
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
// ---- Inspectors --------------------------------------------------------
|
||||
|
||||
pub fn is_request(&self) -> bool {
|
||||
!self.start_line.starts_with("SIP/")
|
||||
}
|
||||
|
||||
pub fn is_response(&self) -> bool {
|
||||
self.start_line.starts_with("SIP/")
|
||||
}
|
||||
|
||||
/// Request method (INVITE, REGISTER, ...) or None for responses.
|
||||
pub fn method(&self) -> Option<&str> {
|
||||
if !self.is_request() {
|
||||
return None;
|
||||
}
|
||||
self.start_line.split_whitespace().next()
|
||||
}
|
||||
|
||||
/// Response status code or None for requests.
|
||||
pub fn status_code(&self) -> Option<u16> {
|
||||
if !self.is_response() {
|
||||
return None;
|
||||
}
|
||||
self.start_line
|
||||
.split_whitespace()
|
||||
.nth(1)
|
||||
.and_then(|s| s.parse().ok())
|
||||
}
|
||||
|
||||
pub fn call_id(&self) -> &str {
|
||||
self.get_header("Call-ID").unwrap_or("noid")
|
||||
}
|
||||
|
||||
/// Method from the CSeq header (e.g. "INVITE").
|
||||
pub fn cseq_method(&self) -> Option<&str> {
|
||||
let cseq = self.get_header("CSeq")?;
|
||||
cseq.split_whitespace().nth(1)
|
||||
}
|
||||
|
||||
/// True for INVITE, SUBSCRIBE, REFER, NOTIFY, UPDATE.
|
||||
pub fn is_dialog_establishing(&self) -> bool {
|
||||
matches!(
|
||||
self.method(),
|
||||
Some("INVITE" | "SUBSCRIBE" | "REFER" | "NOTIFY" | "UPDATE")
|
||||
)
|
||||
}
|
||||
|
||||
/// True when the body carries an SDP payload.
|
||||
pub fn has_sdp_body(&self) -> bool {
|
||||
if self.body.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let ct = self.get_header("Content-Type").unwrap_or("");
|
||||
ct.to_ascii_lowercase().starts_with("application/sdp")
|
||||
}
|
||||
|
||||
// ---- Header accessors --------------------------------------------------
|
||||
|
||||
/// Get the first header value matching `name` (case-insensitive).
|
||||
pub fn get_header(&self, name: &str) -> Option<&str> {
|
||||
let nl = name.to_ascii_lowercase();
|
||||
for (n, v) in &self.headers {
|
||||
if n.to_ascii_lowercase() == nl {
|
||||
return Some(v.as_str());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Overwrites the first header with the given name, or appends it.
|
||||
pub fn set_header(&mut self, name: &str, value: &str) -> &mut Self {
|
||||
let nl = name.to_ascii_lowercase();
|
||||
for h in &mut self.headers {
|
||||
if h.0.to_ascii_lowercase() == nl {
|
||||
h.1 = value.to_string();
|
||||
return self;
|
||||
}
|
||||
}
|
||||
self.headers.push((name.to_string(), value.to_string()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Inserts a header at the top of the header list.
|
||||
pub fn prepend_header(&mut self, name: &str, value: &str) -> &mut Self {
|
||||
self.headers
|
||||
.insert(0, (name.to_string(), value.to_string()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Removes all headers with the given name.
|
||||
pub fn remove_header(&mut self, name: &str) -> &mut Self {
|
||||
let nl = name.to_ascii_lowercase();
|
||||
self.headers.retain(|(n, _)| n.to_ascii_lowercase() != nl);
|
||||
self
|
||||
}
|
||||
|
||||
/// Recalculates Content-Length to match the current body.
|
||||
pub fn update_content_length(&mut self) -> &mut Self {
|
||||
let len = self.body.len();
|
||||
self.set_header("Content-Length", &len.to_string())
|
||||
}
|
||||
|
||||
// ---- Start-line mutation -----------------------------------------------
|
||||
|
||||
/// Replace the Request-URI (second token) of a request start line.
|
||||
pub fn set_request_uri(&mut self, uri: &str) -> &mut Self {
|
||||
if !self.is_request() {
|
||||
return self;
|
||||
}
|
||||
let parts: Vec<&str> = self.start_line.splitn(3, ' ').collect();
|
||||
if parts.len() >= 3 {
|
||||
self.start_line = format!("{} {} {}", parts[0], uri, parts[2]);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns the Request-URI (second token) of a request start line.
|
||||
pub fn request_uri(&self) -> Option<&str> {
|
||||
if !self.is_request() {
|
||||
return None;
|
||||
}
|
||||
self.start_line.split_whitespace().nth(1)
|
||||
}
|
||||
|
||||
// ---- Factory methods ---------------------------------------------------
|
||||
|
||||
/// Build a new SIP request.
|
||||
pub fn create_request(method: &str, request_uri: &str, opts: RequestOptions) -> Self {
|
||||
let branch = opts.via_branch.unwrap_or_else(|| generate_branch());
|
||||
let transport = opts.via_transport.unwrap_or_else(|| "UDP".to_string());
|
||||
let from_tag = opts.from_tag.unwrap_or_else(|| generate_tag());
|
||||
let call_id = opts.call_id.unwrap_or_else(|| generate_call_id(None));
|
||||
let cseq = opts.cseq.unwrap_or(1);
|
||||
let max_forwards = opts.max_forwards.unwrap_or(70);
|
||||
|
||||
let from_display = opts
|
||||
.from_display_name
|
||||
.map(|d| format!("\"{d}\" "))
|
||||
.unwrap_or_default();
|
||||
let to_display = opts
|
||||
.to_display_name
|
||||
.map(|d| format!("\"{d}\" "))
|
||||
.unwrap_or_default();
|
||||
let to_tag_str = opts.to_tag.map(|t| format!(";tag={t}")).unwrap_or_default();
|
||||
|
||||
let mut headers = vec![
|
||||
(
|
||||
"Via".to_string(),
|
||||
format!(
|
||||
"SIP/2.0/{transport} {}:{};branch={branch};rport",
|
||||
opts.via_host, opts.via_port
|
||||
),
|
||||
),
|
||||
(
|
||||
"From".to_string(),
|
||||
format!("{from_display}<{}>;tag={from_tag}", opts.from_uri),
|
||||
),
|
||||
(
|
||||
"To".to_string(),
|
||||
format!("{to_display}<{}>{to_tag_str}", opts.to_uri),
|
||||
),
|
||||
("Call-ID".to_string(), call_id),
|
||||
("CSeq".to_string(), format!("{cseq} {method}")),
|
||||
("Max-Forwards".to_string(), max_forwards.to_string()),
|
||||
];
|
||||
|
||||
if let Some(contact) = &opts.contact {
|
||||
headers.push(("Contact".to_string(), contact.clone()));
|
||||
}
|
||||
|
||||
if let Some(extra) = opts.extra_headers {
|
||||
headers.extend(extra);
|
||||
}
|
||||
|
||||
let body = opts.body.unwrap_or_default();
|
||||
if !body.is_empty() {
|
||||
if let Some(ct) = &opts.content_type {
|
||||
headers.push(("Content-Type".to_string(), ct.clone()));
|
||||
}
|
||||
}
|
||||
headers.push(("Content-Length".to_string(), body.len().to_string()));
|
||||
|
||||
SipMessage {
|
||||
start_line: format!("{method} {request_uri} SIP/2.0"),
|
||||
headers,
|
||||
body,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a SIP response to an incoming request.
|
||||
/// Copies Via, From, To, Call-ID, and CSeq from the original request.
|
||||
pub fn create_response(
|
||||
status_code: u16,
|
||||
reason_phrase: &str,
|
||||
request: &SipMessage,
|
||||
opts: Option<ResponseOptions>,
|
||||
) -> Self {
|
||||
let opts = opts.unwrap_or_default();
|
||||
let mut headers: Vec<(String, String)> = Vec::new();
|
||||
|
||||
// Copy all Via headers (order matters).
|
||||
for (n, v) in &request.headers {
|
||||
if n.to_ascii_lowercase() == "via" {
|
||||
headers.push(("Via".to_string(), v.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
// From — copied verbatim.
|
||||
if let Some(from) = request.get_header("From") {
|
||||
headers.push(("From".to_string(), from.to_string()));
|
||||
}
|
||||
|
||||
// To — add tag if provided and not already present.
|
||||
let mut to = request.get_header("To").unwrap_or("").to_string();
|
||||
if let Some(tag) = &opts.to_tag {
|
||||
if !to.contains("tag=") {
|
||||
to.push_str(&format!(";tag={tag}"));
|
||||
}
|
||||
}
|
||||
headers.push(("To".to_string(), to));
|
||||
|
||||
headers.push(("Call-ID".to_string(), request.call_id().to_string()));
|
||||
|
||||
if let Some(cseq) = request.get_header("CSeq") {
|
||||
headers.push(("CSeq".to_string(), cseq.to_string()));
|
||||
}
|
||||
|
||||
if let Some(contact) = &opts.contact {
|
||||
headers.push(("Contact".to_string(), contact.clone()));
|
||||
}
|
||||
|
||||
if let Some(extra) = opts.extra_headers {
|
||||
headers.extend(extra);
|
||||
}
|
||||
|
||||
let body = opts.body.unwrap_or_default();
|
||||
if !body.is_empty() {
|
||||
if let Some(ct) = &opts.content_type {
|
||||
headers.push(("Content-Type".to_string(), ct.clone()));
|
||||
}
|
||||
}
|
||||
headers.push(("Content-Length".to_string(), body.len().to_string()));
|
||||
|
||||
SipMessage {
|
||||
start_line: format!("SIP/2.0 {status_code} {reason_phrase}"),
|
||||
headers,
|
||||
body,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the tag from a From or To header value.
|
||||
pub fn extract_tag(header_value: &str) -> Option<&str> {
|
||||
let idx = header_value.find(";tag=")?;
|
||||
let rest = &header_value[idx + 5..];
|
||||
let end = rest
|
||||
.find(|c: char| c.is_whitespace() || c == ';' || c == '>')
|
||||
.unwrap_or(rest.len());
|
||||
Some(&rest[..end])
|
||||
}
|
||||
|
||||
/// Extract the URI from an addr-spec or name-addr (From/To/Contact).
|
||||
pub fn extract_uri(header_value: &str) -> Option<&str> {
|
||||
if let Some(start) = header_value.find('<') {
|
||||
let end = header_value[start..].find('>')?;
|
||||
Some(&header_value[start + 1..start + end])
|
||||
} else {
|
||||
let trimmed = header_value.trim();
|
||||
let end = trimmed
|
||||
.find(|c: char| c == ';' || c == '>')
|
||||
.unwrap_or(trimmed.len());
|
||||
let result = &trimmed[..end];
|
||||
if result.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the user part from a SIP/TEL URI or header value.
|
||||
pub fn extract_uri_user(uri_or_header_value: &str) -> Option<&str> {
|
||||
let raw = Self::extract_uri(uri_or_header_value).unwrap_or(uri_or_header_value);
|
||||
let raw = raw.trim();
|
||||
if raw.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let user_part = if raw
|
||||
.get(..5)
|
||||
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("sips:"))
|
||||
{
|
||||
&raw[5..]
|
||||
} else if raw.get(..4).is_some_and(|prefix| {
|
||||
prefix.eq_ignore_ascii_case("sip:") || prefix.eq_ignore_ascii_case("tel:")
|
||||
}) {
|
||||
&raw[4..]
|
||||
} else {
|
||||
raw
|
||||
};
|
||||
|
||||
let end = user_part
|
||||
.find(|c: char| matches!(c, '@' | ';' | '?' | '>'))
|
||||
.unwrap_or(user_part.len());
|
||||
let result = &user_part[..end];
|
||||
if result.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Options for `SipMessage::create_request`.
|
||||
pub struct RequestOptions {
|
||||
pub via_host: String,
|
||||
pub via_port: u16,
|
||||
pub via_transport: Option<String>,
|
||||
pub via_branch: Option<String>,
|
||||
pub from_uri: String,
|
||||
pub from_display_name: Option<String>,
|
||||
pub from_tag: Option<String>,
|
||||
pub to_uri: String,
|
||||
pub to_display_name: Option<String>,
|
||||
pub to_tag: Option<String>,
|
||||
pub call_id: Option<String>,
|
||||
pub cseq: Option<u32>,
|
||||
pub contact: Option<String>,
|
||||
pub max_forwards: Option<u16>,
|
||||
pub body: Option<String>,
|
||||
pub content_type: Option<String>,
|
||||
pub extra_headers: Option<Vec<(String, String)>>,
|
||||
}
|
||||
|
||||
/// Options for `SipMessage::create_response`.
|
||||
#[derive(Default)]
|
||||
pub struct ResponseOptions {
|
||||
pub to_tag: Option<String>,
|
||||
pub contact: Option<String>,
|
||||
pub body: Option<String>,
|
||||
pub content_type: Option<String>,
|
||||
pub extra_headers: Option<Vec<(String, String)>>,
|
||||
}
|
||||
|
||||
/// Check if a string matches the SIP first-line pattern.
|
||||
fn is_sip_first_line(line: &str) -> bool {
|
||||
// Request: METHOD SP URI SP SIP/X.Y
|
||||
// Response: SIP/X.Y SP STATUS SP REASON
|
||||
if line.starts_with("SIP/") {
|
||||
// Response: SIP/2.0 200 OK
|
||||
let parts: Vec<&str> = line.splitn(3, ' ').collect();
|
||||
if parts.len() >= 2 {
|
||||
return parts[1].chars().all(|c| c.is_ascii_digit());
|
||||
}
|
||||
} else {
|
||||
// Request: INVITE sip:user@host SIP/2.0
|
||||
let parts: Vec<&str> = line.splitn(3, ' ').collect();
|
||||
if parts.len() >= 3 {
|
||||
return parts[0].chars().all(|c| c.is_ascii_uppercase())
|
||||
&& parts[2].starts_with("SIP/");
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const INVITE_RAW: &str = "INVITE sip:user@host SIP/2.0\r\n\
|
||||
Via: SIP/2.0/UDP 192.168.1.1:5060;branch=z9hG4bK-test\r\n\
|
||||
From: <sip:caller@host>;tag=abc\r\n\
|
||||
To: <sip:user@host>\r\n\
|
||||
Call-ID: test-call-id\r\n\
|
||||
CSeq: 1 INVITE\r\n\
|
||||
Content-Length: 0\r\n\r\n";
|
||||
|
||||
#[test]
|
||||
fn parse_invite() {
|
||||
let msg = SipMessage::parse(INVITE_RAW.as_bytes()).unwrap();
|
||||
assert!(msg.is_request());
|
||||
assert!(!msg.is_response());
|
||||
assert_eq!(msg.method(), Some("INVITE"));
|
||||
assert_eq!(msg.call_id(), "test-call-id");
|
||||
assert_eq!(msg.cseq_method(), Some("INVITE"));
|
||||
assert!(msg.is_dialog_establishing());
|
||||
assert_eq!(msg.request_uri(), Some("sip:user@host"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_response() {
|
||||
let raw = "SIP/2.0 200 OK\r\n\
|
||||
Via: SIP/2.0/UDP 192.168.1.1:5060;branch=z9hG4bK-test\r\n\
|
||||
From: <sip:caller@host>;tag=abc\r\n\
|
||||
To: <sip:user@host>;tag=def\r\n\
|
||||
Call-ID: test-call-id\r\n\
|
||||
CSeq: 1 INVITE\r\n\
|
||||
Content-Length: 0\r\n\r\n";
|
||||
let msg = SipMessage::parse(raw.as_bytes()).unwrap();
|
||||
assert!(msg.is_response());
|
||||
assert_eq!(msg.status_code(), Some(200));
|
||||
assert_eq!(msg.cseq_method(), Some("INVITE"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_roundtrip() {
|
||||
let msg = SipMessage::parse(INVITE_RAW.as_bytes()).unwrap();
|
||||
let serialized = msg.serialize();
|
||||
let reparsed = SipMessage::parse(&serialized).unwrap();
|
||||
assert_eq!(reparsed.call_id(), "test-call-id");
|
||||
assert_eq!(reparsed.method(), Some("INVITE"));
|
||||
assert_eq!(reparsed.headers.len(), msg.headers.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn header_mutation() {
|
||||
let mut msg = SipMessage::parse(INVITE_RAW.as_bytes()).unwrap();
|
||||
msg.set_header("X-Custom", "value1");
|
||||
assert_eq!(msg.get_header("X-Custom"), Some("value1"));
|
||||
msg.set_header("X-Custom", "value2");
|
||||
assert_eq!(msg.get_header("X-Custom"), Some("value2"));
|
||||
msg.prepend_header("X-First", "first");
|
||||
assert_eq!(msg.headers[0].0, "X-First");
|
||||
msg.remove_header("X-Custom");
|
||||
assert_eq!(msg.get_header("X-Custom"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn set_request_uri() {
|
||||
let mut msg = SipMessage::parse(INVITE_RAW.as_bytes()).unwrap();
|
||||
msg.set_request_uri("sip:new@host");
|
||||
assert_eq!(msg.request_uri(), Some("sip:new@host"));
|
||||
assert!(msg.start_line.starts_with("INVITE sip:new@host SIP/2.0"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_tag_and_uri() {
|
||||
assert_eq!(
|
||||
SipMessage::extract_tag("<sip:user@host>;tag=abc123"),
|
||||
Some("abc123")
|
||||
);
|
||||
assert_eq!(SipMessage::extract_tag("<sip:user@host>"), None);
|
||||
assert_eq!(
|
||||
SipMessage::extract_uri("<sip:user@host>"),
|
||||
Some("sip:user@host")
|
||||
);
|
||||
assert_eq!(
|
||||
SipMessage::extract_uri("\"Name\" <sip:user@host>;tag=abc"),
|
||||
Some("sip:user@host")
|
||||
);
|
||||
assert_eq!(
|
||||
SipMessage::extract_uri_user("\"Name\" <sip:+49 421 219694@host>;tag=abc"),
|
||||
Some("+49 421 219694")
|
||||
);
|
||||
assert_eq!(
|
||||
SipMessage::extract_uri_user("sip:0049421219694@voip.easybell.de"),
|
||||
Some("0049421219694")
|
||||
);
|
||||
assert_eq!(
|
||||
SipMessage::extract_uri_user("tel:+49421219694;phone-context=example.com"),
|
||||
Some("+49421219694")
|
||||
);
|
||||
assert_eq!(SipMessage::extract_uri_user("SIP:user@host"), Some("user"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_request_and_response() {
|
||||
let invite = SipMessage::create_request(
|
||||
"INVITE",
|
||||
"sip:user@host",
|
||||
RequestOptions {
|
||||
via_host: "192.168.1.1".to_string(),
|
||||
via_port: 5070,
|
||||
via_transport: None,
|
||||
via_branch: None,
|
||||
from_uri: "sip:caller@proxy".to_string(),
|
||||
from_display_name: None,
|
||||
from_tag: Some("mytag".to_string()),
|
||||
to_uri: "sip:user@host".to_string(),
|
||||
to_display_name: None,
|
||||
to_tag: None,
|
||||
call_id: Some("test-123".to_string()),
|
||||
cseq: Some(1),
|
||||
contact: Some("<sip:caller@192.168.1.1:5070>".to_string()),
|
||||
max_forwards: None,
|
||||
body: None,
|
||||
content_type: None,
|
||||
extra_headers: None,
|
||||
},
|
||||
);
|
||||
assert_eq!(invite.method(), Some("INVITE"));
|
||||
assert_eq!(invite.call_id(), "test-123");
|
||||
assert!(invite
|
||||
.get_header("Via")
|
||||
.unwrap()
|
||||
.contains("192.168.1.1:5070"));
|
||||
|
||||
let response = SipMessage::create_response(
|
||||
200,
|
||||
"OK",
|
||||
&invite,
|
||||
Some(ResponseOptions {
|
||||
to_tag: Some("remotetag".to_string()),
|
||||
..Default::default()
|
||||
}),
|
||||
);
|
||||
assert!(response.is_response());
|
||||
assert_eq!(response.status_code(), Some(200));
|
||||
let to = response.get_header("To").unwrap();
|
||||
assert!(to.contains("tag=remotetag"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn has_sdp_body() {
|
||||
let mut msg = SipMessage::parse(INVITE_RAW.as_bytes()).unwrap();
|
||||
assert!(!msg.has_sdp_body());
|
||||
msg.body = "v=0\r\no=- 1 1 IN IP4 0.0.0.0\r\n".to_string();
|
||||
msg.set_header("Content-Type", "application/sdp");
|
||||
assert!(msg.has_sdp_body());
|
||||
}
|
||||
}
|
||||
134
rust/crates/sip-proto/src/rewrite.rs
Normal file
134
rust/crates/sip-proto/src/rewrite.rs
Normal file
@@ -0,0 +1,134 @@
|
||||
//! SIP URI and SDP body rewriting helpers.
|
||||
//!
|
||||
//! Ported from ts/sip/rewrite.ts.
|
||||
|
||||
use crate::Endpoint;
|
||||
|
||||
/// Replaces the host:port in every `sip:` / `sips:` URI found in `value`.
|
||||
pub fn rewrite_sip_uri(value: &str, host: &str, port: u16) -> String {
|
||||
let mut result = String::with_capacity(value.len());
|
||||
let mut i = 0;
|
||||
let bytes = value.as_bytes();
|
||||
|
||||
while i < bytes.len() {
|
||||
// Look for "sip:" or "sips:"
|
||||
let scheme_len = if i + 4 <= bytes.len()
|
||||
&& (bytes[i..].starts_with(b"sip:") || bytes[i..].starts_with(b"SIP:"))
|
||||
{
|
||||
4
|
||||
} else if i + 5 <= bytes.len()
|
||||
&& (bytes[i..].starts_with(b"sips:") || bytes[i..].starts_with(b"SIPS:"))
|
||||
{
|
||||
5
|
||||
} else {
|
||||
result.push(value[i..].chars().next().unwrap());
|
||||
i += value[i..].chars().next().unwrap().len_utf8();
|
||||
continue;
|
||||
};
|
||||
|
||||
let scheme = &value[i..i + scheme_len];
|
||||
let rest = &value[i + scheme_len..];
|
||||
|
||||
// Check for userpart (contains '@')
|
||||
let (userpart, host_start) = if let Some(at) = rest.find('@') {
|
||||
// Make sure @ comes before any delimiters
|
||||
let delim = rest.find(|c: char| c == '>' || c == ';' || c == ',' || c.is_whitespace());
|
||||
if delim.is_none() || at < delim.unwrap() {
|
||||
(&rest[..=at], at + 1)
|
||||
} else {
|
||||
("", 0)
|
||||
}
|
||||
} else {
|
||||
("", 0)
|
||||
};
|
||||
|
||||
// Find the end of the host:port portion
|
||||
let host_rest = &rest[host_start..];
|
||||
let end = host_rest
|
||||
.find(|c: char| c == '>' || c == ';' || c == ',' || c.is_whitespace())
|
||||
.unwrap_or(host_rest.len());
|
||||
|
||||
result.push_str(scheme);
|
||||
result.push_str(userpart);
|
||||
result.push_str(&format!("{host}:{port}"));
|
||||
i += scheme_len + host_start + end;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Rewrites the connection address (`c=`) and audio media port (`m=audio`)
|
||||
/// in an SDP body. Returns the rewritten body together with the original
|
||||
/// endpoint that was replaced (if any).
|
||||
pub fn rewrite_sdp(body: &str, ip: &str, port: u16) -> (String, Option<Endpoint>) {
|
||||
let mut orig_addr: Option<String> = None;
|
||||
let mut orig_port: Option<u16> = None;
|
||||
|
||||
let lines: Vec<String> = body
|
||||
.replace("\r\n", "\n")
|
||||
.split('\n')
|
||||
.map(|line| {
|
||||
if let Some(rest) = line.strip_prefix("c=IN IP4 ") {
|
||||
orig_addr = Some(rest.trim().to_string());
|
||||
format!("c=IN IP4 {ip}")
|
||||
} else if line.starts_with("m=audio ") {
|
||||
let parts: Vec<&str> = line.split(' ').collect();
|
||||
if parts.len() >= 2 {
|
||||
orig_port = parts[1].parse().ok();
|
||||
let mut rebuilt = parts[0].to_string();
|
||||
rebuilt.push(' ');
|
||||
rebuilt.push_str(&port.to_string());
|
||||
for part in &parts[2..] {
|
||||
rebuilt.push(' ');
|
||||
rebuilt.push_str(part);
|
||||
}
|
||||
return rebuilt;
|
||||
}
|
||||
line.to_string()
|
||||
} else {
|
||||
line.to_string()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let original = match (orig_addr, orig_port) {
|
||||
(Some(a), Some(p)) => Some(Endpoint {
|
||||
address: a,
|
||||
port: p,
|
||||
codec_pt: None,
|
||||
}),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
(lines.join("\r\n"), original)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_rewrite_sip_uri() {
|
||||
let input = "<sip:user@10.0.0.1:5060>";
|
||||
let result = rewrite_sip_uri(input, "192.168.1.1", 5070);
|
||||
assert_eq!(result, "<sip:user@192.168.1.1:5070>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rewrite_sip_uri_no_port() {
|
||||
let input = "sip:user@10.0.0.1";
|
||||
let result = rewrite_sip_uri(input, "192.168.1.1", 5070);
|
||||
assert_eq!(result, "sip:user@192.168.1.1:5070");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rewrite_sdp() {
|
||||
let sdp = "v=0\r\nc=IN IP4 10.0.0.1\r\nm=audio 5060 RTP/AVP 0 9\r\na=sendrecv\r\n";
|
||||
let (rewritten, orig) = rewrite_sdp(sdp, "192.168.1.1", 20000);
|
||||
assert!(rewritten.contains("c=IN IP4 192.168.1.1"));
|
||||
assert!(rewritten.contains("m=audio 20000 RTP/AVP 0 9"));
|
||||
let ep = orig.unwrap();
|
||||
assert_eq!(ep.address, "10.0.0.1");
|
||||
assert_eq!(ep.port, 5060);
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
[package]
|
||||
name = "tts-engine"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "tts-engine"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
kokoro-tts = { version = "0.3", default-features = false }
|
||||
# Pin to rc.11 matching kokoro-tts's expectation; enable vendored TLS to avoid system libssl-dev.
|
||||
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
|
||||
"std", "download-binaries", "copy-dylibs", "ndarray",
|
||||
"tls-native-vendored"
|
||||
] }
|
||||
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
|
||||
hound = "3.5"
|
||||
@@ -1,149 +0,0 @@
|
||||
/// TTS engine CLI — synthesizes text to a WAV file using Kokoro neural TTS.
|
||||
///
|
||||
/// Usage:
|
||||
/// echo "Hello world" | tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav
|
||||
/// tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav --text "Hello world"
|
||||
///
|
||||
/// Outputs 24kHz 16-bit mono WAV.
|
||||
|
||||
use kokoro_tts::{KokoroTts, Voice};
|
||||
use std::io::{self, Read};
|
||||
|
||||
fn parse_args() -> Result<(String, String, String, String, Option<String>), String> {
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
let mut model = String::new();
|
||||
let mut voices = String::new();
|
||||
let mut output = String::new();
|
||||
let mut text: Option<String> = None;
|
||||
let mut voice_name: Option<String> = None;
|
||||
|
||||
let mut i = 1;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--model" => { i += 1; model = args.get(i).cloned().unwrap_or_default(); }
|
||||
"--voices" => { i += 1; voices = args.get(i).cloned().unwrap_or_default(); }
|
||||
"--output" | "--output_file" => { i += 1; output = args.get(i).cloned().unwrap_or_default(); }
|
||||
"--text" => { i += 1; text = args.get(i).cloned(); }
|
||||
"--voice" => { i += 1; voice_name = args.get(i).cloned(); }
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
if model.is_empty() { return Err("--model required".into()); }
|
||||
if voices.is_empty() { return Err("--voices required".into()); }
|
||||
if output.is_empty() { return Err("--output required".into()); }
|
||||
|
||||
let voice_str = voice_name.unwrap_or_else(|| "af_bella".into());
|
||||
|
||||
Ok((model, voices, output, voice_str, text))
|
||||
}
|
||||
|
||||
fn select_voice(name: &str) -> Voice {
|
||||
match name {
|
||||
"af_bella" => Voice::AfBella(1.0),
|
||||
"af_heart" => Voice::AfHeart(1.0),
|
||||
"af_jessica" => Voice::AfJessica(1.0),
|
||||
"af_nicole" => Voice::AfNicole(1.0),
|
||||
"af_nova" => Voice::AfNova(1.0),
|
||||
"af_sarah" => Voice::AfSarah(1.0),
|
||||
"af_sky" => Voice::AfSky(1.0),
|
||||
"af_river" => Voice::AfRiver(1.0),
|
||||
"af_alloy" => Voice::AfAlloy(1.0),
|
||||
"af_aoede" => Voice::AfAoede(1.0),
|
||||
"af_kore" => Voice::AfKore(1.0),
|
||||
"am_adam" => Voice::AmAdam(1.0),
|
||||
"am_echo" => Voice::AmEcho(1.0),
|
||||
"am_eric" => Voice::AmEric(1.0),
|
||||
"am_fenrir" => Voice::AmFenrir(1.0),
|
||||
"am_liam" => Voice::AmLiam(1.0),
|
||||
"am_michael" => Voice::AmMichael(1.0),
|
||||
"am_onyx" => Voice::AmOnyx(1.0),
|
||||
"am_puck" => Voice::AmPuck(1.0),
|
||||
"bf_alice" => Voice::BfAlice(1.0),
|
||||
"bf_emma" => Voice::BfEmma(1.0),
|
||||
"bf_isabella" => Voice::BfIsabella(1.0),
|
||||
"bf_lily" => Voice::BfLily(1.0),
|
||||
"bm_daniel" => Voice::BmDaniel(1.0),
|
||||
"bm_fable" => Voice::BmFable(1.0),
|
||||
"bm_george" => Voice::BmGeorge(1.0),
|
||||
"bm_lewis" => Voice::BmLewis(1.0),
|
||||
_ => {
|
||||
eprintln!("[tts-engine] unknown voice '{}', falling back to af_bella", name);
|
||||
Voice::AfBella(1.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let (model_path, voices_path, output_path, voice_name, text_arg) = match parse_args() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error: {}", e);
|
||||
eprintln!("Usage: tts-engine --model <model.onnx> --voices <voices.bin> --output <output.wav> [--text <text>] [--voice <voice_name>]");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
// Get text from --text arg or stdin.
|
||||
let text = match text_arg {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
let mut buf = String::new();
|
||||
io::stdin().read_to_string(&mut buf).expect("failed to read stdin");
|
||||
buf.trim().to_string()
|
||||
}
|
||||
};
|
||||
|
||||
if text.is_empty() {
|
||||
eprintln!("[tts-engine] no text provided");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
eprintln!("[tts-engine] loading model: {}", model_path);
|
||||
let tts = match KokoroTts::new(&model_path, &voices_path).await {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
eprintln!("[tts-engine] failed to load model: {:?}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let voice = select_voice(&voice_name);
|
||||
eprintln!("[tts-engine] synthesizing with voice '{}': \"{}\"", voice_name, text);
|
||||
|
||||
let (samples, duration) = match tts.synth(&text, voice).await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
eprintln!("[tts-engine] synthesis failed: {:?}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
eprintln!("[tts-engine] synthesized {} samples in {:?}", samples.len(), duration);
|
||||
|
||||
// Write WAV: 24kHz, 16-bit, mono (same format announcement.ts expects).
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate: 24000,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let mut writer = match hound::WavWriter::create(&output_path, spec) {
|
||||
Ok(w) => w,
|
||||
Err(e) => {
|
||||
eprintln!("[tts-engine] failed to create WAV: {}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
for &sample in &samples {
|
||||
let s16 = (sample * 32767.0).round().clamp(-32768.0, 32767.0) as i16;
|
||||
writer.write_sample(s16).unwrap();
|
||||
}
|
||||
writer.finalize().unwrap();
|
||||
|
||||
eprintln!("[tts-engine] wrote {}", output_path);
|
||||
}
|
||||
1
rust/vendor/kokoro-tts/.cargo-ok
vendored
Normal file
1
rust/vendor/kokoro-tts/.cargo-ok
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"v":1}
|
||||
7
rust/vendor/kokoro-tts/.cargo_vcs_info.json
vendored
Normal file
7
rust/vendor/kokoro-tts/.cargo_vcs_info.json
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"git": {
|
||||
"sha1": "dfa3eda5e8c3f23f8b4c5d504acaebd6e7a45020",
|
||||
"dirty": true
|
||||
},
|
||||
"path_in_vcs": ""
|
||||
}
|
||||
35
rust/vendor/kokoro-tts/.github/workflows/rust.yml
vendored
Normal file
35
rust/vendor/kokoro-tts/.github/workflows/rust.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
pull_request:
|
||||
branches: [ "master" ]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
jobs:
|
||||
build:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# Ubuntu 专属依赖安装
|
||||
- name: Setup Ubuntu dependencies
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt install libasound2-dev
|
||||
|
||||
# 构建项目
|
||||
- name: Build
|
||||
run: cargo build -vv
|
||||
|
||||
# 运行测试
|
||||
- name: Run tests
|
||||
run: cargo test --workspace -vv
|
||||
5
rust/vendor/kokoro-tts/.gitignore
vendored
Normal file
5
rust/vendor/kokoro-tts/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
*.bin
|
||||
*.onnx
|
||||
Cargo.lock
|
||||
/target
|
||||
.idea
|
||||
116
rust/vendor/kokoro-tts/Cargo.toml
vendored
Normal file
116
rust/vendor/kokoro-tts/Cargo.toml
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "kokoro-tts"
|
||||
version = "0.3.2"
|
||||
build = "build.rs"
|
||||
autolib = false
|
||||
autobins = false
|
||||
autoexamples = false
|
||||
autotests = false
|
||||
autobenches = false
|
||||
description = "用于Rust的轻量级AI离线语音合成器(Kokoro TTS),可轻松交叉编译到移动端"
|
||||
readme = "README.md"
|
||||
keywords = [
|
||||
"TTS",
|
||||
"Offline",
|
||||
"Lite",
|
||||
"AI",
|
||||
"Synthesizer",
|
||||
]
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/mzdk100/kokoro.git"
|
||||
|
||||
[features]
|
||||
use-cmudict = ["cmudict-fast"]
|
||||
|
||||
[lib]
|
||||
name = "kokoro_tts"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[example]]
|
||||
name = "synth_directly_v10"
|
||||
path = "examples/synth_directly_v10.rs"
|
||||
|
||||
[[example]]
|
||||
name = "synth_directly_v11"
|
||||
path = "examples/synth_directly_v11.rs"
|
||||
|
||||
[[example]]
|
||||
name = "synth_stream"
|
||||
path = "examples/synth_stream.rs"
|
||||
|
||||
[dependencies.bincode]
|
||||
version = "2.0"
|
||||
|
||||
[dependencies.chinese-number]
|
||||
version = "0.7.8"
|
||||
features = [
|
||||
"number-to-chinese",
|
||||
"chinese-to-number",
|
||||
]
|
||||
default-features = false
|
||||
|
||||
[dependencies.cmudict-fast]
|
||||
version = "0.8.0"
|
||||
optional = true
|
||||
|
||||
[dependencies.futures]
|
||||
version = "0.3.31"
|
||||
|
||||
[dependencies.jieba-rs]
|
||||
version = "0.8.1"
|
||||
|
||||
[dependencies.log]
|
||||
version = "0.4.29"
|
||||
|
||||
[dependencies.ndarray]
|
||||
version = "0.17.2"
|
||||
|
||||
[dependencies.ort]
|
||||
version = "2.0.0-rc.11"
|
||||
|
||||
[dependencies.pin-project]
|
||||
version = "1.1.10"
|
||||
|
||||
[dependencies.pinyin]
|
||||
version = "0.11.0"
|
||||
|
||||
[dependencies.rand]
|
||||
version = "0.10.0-rc.7"
|
||||
|
||||
[dependencies.regex]
|
||||
version = "1.12.2"
|
||||
|
||||
[dependencies.tokio]
|
||||
version = "1.49.0"
|
||||
features = [
|
||||
"fs",
|
||||
"rt-multi-thread",
|
||||
"time",
|
||||
"sync",
|
||||
]
|
||||
|
||||
[dev-dependencies.anyhow]
|
||||
version = "1.0.100"
|
||||
|
||||
[dev-dependencies.tokio]
|
||||
version = "1.49.0"
|
||||
features = ["macros"]
|
||||
|
||||
[dev-dependencies.voxudio]
|
||||
version = "0.5.7"
|
||||
features = ["device"]
|
||||
|
||||
[build-dependencies.cc]
|
||||
version = "1.2.53"
|
||||
35
rust/vendor/kokoro-tts/Cargo.toml.orig
generated
vendored
Normal file
35
rust/vendor/kokoro-tts/Cargo.toml.orig
generated
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
[package]
|
||||
name = "kokoro-tts"
|
||||
description = "用于Rust的轻量级AI离线语音合成器(Kokoro TTS),可轻松交叉编译到移动端"
|
||||
version = "0.3.2"
|
||||
edition = "2024"
|
||||
keywords = ["TTS", "Offline", "Lite", "AI", "Synthesizer"]
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/mzdk100/kokoro.git"
|
||||
readme = "README.md"
|
||||
|
||||
[features]
|
||||
use-cmudict = ["cmudict-fast"]
|
||||
|
||||
[dependencies]
|
||||
bincode = "2.0"
|
||||
chinese-number = { version = "0.7.8",default-features = false,features = ["number-to-chinese", "chinese-to-number"] }
|
||||
cmudict-fast = { version = "0.8.0", optional = true }
|
||||
futures = "0.3.31"
|
||||
jieba-rs = "0.8.1"
|
||||
log = "0.4.29"
|
||||
ndarray = "0.17.2"
|
||||
ort = "2.0.0-rc.11"
|
||||
pin-project = "1.1.10"
|
||||
pinyin = "0.11.0"
|
||||
rand="0.10.0-rc.7"
|
||||
regex = "1.12.2"
|
||||
tokio = { version = "1.49.0",features = ["fs", "rt-multi-thread","time", "sync"] }
|
||||
|
||||
[dev-dependencies]
|
||||
anyhow = "1.0.100"
|
||||
tokio = {version = "1.49.0",features = ["macros"]}
|
||||
voxudio = { version = "0.5.7",features = ["device"] }
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.2.53"
|
||||
201
rust/vendor/kokoro-tts/LICENSE
vendored
Normal file
201
rust/vendor/kokoro-tts/LICENSE
vendored
Normal file
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
59
rust/vendor/kokoro-tts/README.md
vendored
Normal file
59
rust/vendor/kokoro-tts/README.md
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
# Kokoro TTS的rust推理实现
|
||||
|
||||
[Kokoro](https://github.com/hexgrad/kokoro)
|
||||
|
||||
> **Kokoro**是具有8200万参数的开放式TTS型号。
|
||||
> 尽管具有轻巧的体系结构,但它的质量与大型型号相当,同时更快,更具成本效益。使用Apache许可的权重,可以将Kokoro部署从生产环境到个人项目的任何地方。
|
||||
|
||||
|
||||
## 概述
|
||||
|
||||
本项目包含幾个示例脚本,展示了如何使用Kokoro库进行语音合成。这些示例展示了如何直接合成语音和通过流式合成来处理更长的文本。
|
||||
|
||||
## 前置条件
|
||||
|
||||
- Rust编程语言
|
||||
- Tokio异步运行时
|
||||
- Rodio音频处理和播放的库(可选)
|
||||
- 下载模型资源,在這裡可以找到[1.0模型](https://github.com/mzdk100/kokoro/releases/tag/V1.0)和[1.1模型](https://github.com/mzdk100/kokoro/releases/tag/V1.1)
|
||||
|
||||
## 特点
|
||||
- 跨平台,可以轻松在Windows、Mac OS上构建,也可以轻松交叉编译到安卓和iOS。
|
||||
- 离线推理,不依赖网络。
|
||||
- 足够轻量级,有不同尺寸的模型可以选择(最小的模型仅88M)。
|
||||
- 发音人多样化,跨越多国语言。
|
||||
|
||||
## 使用方法
|
||||
|
||||
1. 运行示例,克隆或下载本项目到本地。在项目根目录下运行:
|
||||
```shell
|
||||
cargo run --example synth_directly_v10
|
||||
cargo run --example synth_directly_v11
|
||||
```
|
||||
2. 集成到自己的项目中:
|
||||
```shell
|
||||
cargo add kokoro-tts
|
||||
```
|
||||
3. Linux依赖项
|
||||
```shell
|
||||
sudo apt install libasound2-dev
|
||||
```
|
||||
参考[examples](examples)文件夹中的示例代码进行开发。
|
||||
|
||||
|
||||
## 许可证
|
||||
|
||||
本项目采用Apache-2.0许可证。请查看项目中的LICENSE文件了解更多信息。
|
||||
|
||||
## 注意
|
||||
|
||||
- 请确保在运行示例之前已经正确加载了模型和语音数据。
|
||||
- 示例中的语音合成参数(如语音名称、文本内容、速度等)仅作为示例,实际使用时请根据需要进行调整。
|
||||
|
||||
## 贡献
|
||||
|
||||
如果您有任何改进意见或想要贡献代码,请随时提交Pull Request或创建Issue。
|
||||
|
||||
## 免责声明
|
||||
|
||||
本项目中的示例代码仅用于演示目的。在使用本项目中的代码时,请确保遵守相关法律法规和社会主义核心价值观。开发者不对因使用本项目中的代码而导致的任何后果负责。
|
||||
5
rust/vendor/kokoro-tts/build.rs
vendored
Normal file
5
rust/vendor/kokoro-tts/build.rs
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
fn main() {
|
||||
const SRC: &str = "src/transcription/en_ipa.c";
|
||||
cc::Build::new().file(SRC).compile("es");
|
||||
println!("cargo:rerun-if-changed={}", SRC);
|
||||
}
|
||||
135010
rust/vendor/kokoro-tts/dict/cmudict.dict
vendored
Normal file
135010
rust/vendor/kokoro-tts/dict/cmudict.dict
vendored
Normal file
File diff suppressed because it is too large
Load Diff
BIN
rust/vendor/kokoro-tts/dict/espeak.dict
vendored
Normal file
BIN
rust/vendor/kokoro-tts/dict/espeak.dict
vendored
Normal file
Binary file not shown.
411980
rust/vendor/kokoro-tts/dict/pinyin.dict
vendored
Normal file
411980
rust/vendor/kokoro-tts/dict/pinyin.dict
vendored
Normal file
File diff suppressed because it is too large
Load Diff
21
rust/vendor/kokoro-tts/examples/synth_directly_v10.rs
vendored
Normal file
21
rust/vendor/kokoro-tts/examples/synth_directly_v10.rs
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
use {
|
||||
kokoro_tts::{KokoroTts, Voice},
|
||||
voxudio::AudioPlayer,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let tts = KokoroTts::new("kokoro-v1.0.int8.onnx", "voices.bin").await?;
|
||||
let (audio, took) = tts
|
||||
.synth(
|
||||
"Hello, world!你好,我们是一群追逐梦想的人。我正在使用qq。",
|
||||
Voice::ZfXiaoxiao(1.2),
|
||||
)
|
||||
.await?;
|
||||
println!("Synth took: {:?}", took);
|
||||
let mut player = AudioPlayer::new()?;
|
||||
player.play()?;
|
||||
player.write::<24000>(&audio, 1).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
21
rust/vendor/kokoro-tts/examples/synth_directly_v11.rs
vendored
Normal file
21
rust/vendor/kokoro-tts/examples/synth_directly_v11.rs
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
use {
|
||||
kokoro_tts::{KokoroTts, Voice},
|
||||
voxudio::AudioPlayer,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let tts = KokoroTts::new("kokoro-v1.1-zh.onnx", "voices-v1.1-zh.bin").await?;
|
||||
let (audio, took) = tts
|
||||
.synth(
|
||||
"Hello, world!你好,我们是一群追逐梦想的人。我正在使用qq。",
|
||||
Voice::Zm045(1),
|
||||
)
|
||||
.await?;
|
||||
println!("Synth took: {:?}", took);
|
||||
let mut player = AudioPlayer::new()?;
|
||||
player.play()?;
|
||||
player.write::<24000>(&audio, 1).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
51
rust/vendor/kokoro-tts/examples/synth_stream.rs
vendored
Normal file
51
rust/vendor/kokoro-tts/examples/synth_stream.rs
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
use {
|
||||
futures::StreamExt,
|
||||
kokoro_tts::{KokoroTts, Voice},
|
||||
voxudio::AudioPlayer,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let tts = KokoroTts::new("kokoro-v1.1-zh.onnx", "voices-v1.1-zh.bin").await?;
|
||||
let (mut sink, mut stream) = tts.stream(Voice::Zm098(1));
|
||||
sink.synth("hello world.").await?;
|
||||
sink.synth("你好,我们是一群追逐梦想的人。").await?;
|
||||
sink.set_voice(Voice::Zf032(2));
|
||||
sink.synth("我正在使用qq。").await?;
|
||||
sink.set_voice(Voice::Zf090(3));
|
||||
sink.synth("今天天气如何?").await?;
|
||||
sink.set_voice(Voice::Zm045(1));
|
||||
sink.synth("你在使用Rust编程语言吗?").await?;
|
||||
sink.set_voice(Voice::Zf039(1));
|
||||
sink.synth(
|
||||
"你轻轻地走过那
|
||||
在风雨花丛中
|
||||
每一点一滴带走
|
||||
是我醒来的梦
|
||||
是在那天空上
|
||||
最美丽的云朵
|
||||
在那彩虹 最温柔的风",
|
||||
)
|
||||
.await?;
|
||||
sink.set_voice(Voice::Zf088(1));
|
||||
sink.synth(
|
||||
"你静静看着我们
|
||||
最不舍的面容
|
||||
像流星划过夜空
|
||||
转瞬即逝的梦
|
||||
是最深情的脸 在这一瞬间
|
||||
在遥远天边
|
||||
",
|
||||
)
|
||||
.await?;
|
||||
drop(sink);
|
||||
|
||||
let mut player = AudioPlayer::new()?;
|
||||
player.play()?;
|
||||
while let Some((audio, took)) = stream.next().await {
|
||||
player.write::<24000>(&audio, 1).await?;
|
||||
println!("Synth took: {:?}", took);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
514
rust/vendor/kokoro-tts/g2p.py
vendored
Normal file
514
rust/vendor/kokoro-tts/g2p.py
vendored
Normal file
@@ -0,0 +1,514 @@
|
||||
import re
|
||||
from typing import List, Optional, Tuple
|
||||
from jieba import posseg, cut_for_search
|
||||
from pypinyin import lazy_pinyin, load_phrases_dict, Style
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class MToken:
|
||||
tag: str
|
||||
whitespace: str
|
||||
phonemes: Optional[str] = None
|
||||
|
||||
ZH_MAP = {"b":"ㄅ","p":"ㄆ","m":"ㄇ","f":"ㄈ","d":"ㄉ","t":"ㄊ","n":"ㄋ","l":"ㄌ","g":"ㄍ","k":"ㄎ","h":"ㄏ","j":"ㄐ","q":"ㄑ","x":"ㄒ","zh":"ㄓ","ch":"ㄔ","sh":"ㄕ","r":"ㄖ","z":"ㄗ","c":"ㄘ","s":"ㄙ","a":"ㄚ","o":"ㄛ","e":"ㄜ","ie":"ㄝ","ai":"ㄞ","ei":"ㄟ","ao":"ㄠ","ou":"ㄡ","an":"ㄢ","en":"ㄣ","ang":"ㄤ","eng":"ㄥ","er":"ㄦ","i":"ㄧ","u":"ㄨ","v":"ㄩ","ii":"ㄭ","iii":"十","ve":"月","ia":"压","ian":"言","iang":"阳","iao":"要","in":"阴","ing":"应","iong":"用","iou":"又","ong":"中","ua":"穵","uai":"外","uan":"万","uang":"王","uei":"为","uen":"文","ueng":"瓮","uo":"我","van":"元","vn":"云"}
|
||||
for p in ';:,.!?/—…"()“” 12345R':
|
||||
assert p not in ZH_MAP, p
|
||||
ZH_MAP[p] = p
|
||||
|
||||
unk = '❓'
|
||||
punc = frozenset(';:,.!?—…"()“”')
|
||||
phrases_dict = {
|
||||
'开户行': [['ka1i'], ['hu4'], ['hang2']],
|
||||
'发卡行': [['fa4'], ['ka3'], ['hang2']],
|
||||
'放款行': [['fa4ng'], ['kua3n'], ['hang2']],
|
||||
'茧行': [['jia3n'], ['hang2']],
|
||||
'行号': [['hang2'], ['ha4o']],
|
||||
'各地': [['ge4'], ['di4']],
|
||||
'借还款': [['jie4'], ['hua2n'], ['kua3n']],
|
||||
'时间为': [['shi2'], ['jia1n'], ['we2i']],
|
||||
'为准': [['we2i'], ['zhu3n']],
|
||||
'色差': [['se4'], ['cha1']],
|
||||
'嗲': [['dia3']],
|
||||
'呗': [['bei5']],
|
||||
'不': [['bu4']],
|
||||
'咗': [['zuo5']],
|
||||
'嘞': [['lei5']],
|
||||
'掺和': [['chan1'], ['huo5']]
|
||||
}
|
||||
must_erhua = {
|
||||
"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
|
||||
}
|
||||
must_not_neural_tone_words = {
|
||||
'男子', '女子', '分子', '原子', '量子', '莲子', '石子', '瓜子', '电子', '人人', '虎虎',
|
||||
'幺幺', '干嘛', '学子', '哈哈', '数数', '袅袅', '局地', '以下', '娃哈哈', '花花草草', '留得',
|
||||
'耕地', '想想', '熙熙', '攘攘', '卵子', '死死', '冉冉', '恳恳', '佼佼', '吵吵', '打打',
|
||||
'考考', '整整', '莘莘', '落地', '算子', '家家户户', '青青'
|
||||
}
|
||||
must_neural_tone_words = {
|
||||
'麻烦', '麻利', '鸳鸯', '高粱', '骨头', '骆驼', '马虎', '首饰', '馒头', '馄饨', '风筝',
|
||||
'难为', '队伍', '阔气', '闺女', '门道', '锄头', '铺盖', '铃铛', '铁匠', '钥匙', '里脊',
|
||||
'里头', '部分', '那么', '道士', '造化', '迷糊', '连累', '这么', '这个', '运气', '过去',
|
||||
'软和', '转悠', '踏实', '跳蚤', '跟头', '趔趄', '财主', '豆腐', '讲究', '记性', '记号',
|
||||
'认识', '规矩', '见识', '裁缝', '补丁', '衣裳', '衣服', '衙门', '街坊', '行李', '行当',
|
||||
'蛤蟆', '蘑菇', '薄荷', '葫芦', '葡萄', '萝卜', '荸荠', '苗条', '苗头', '苍蝇', '芝麻',
|
||||
'舒服', '舒坦', '舌头', '自在', '膏药', '脾气', '脑袋', '脊梁', '能耐', '胳膊', '胭脂',
|
||||
'胡萝', '胡琴', '胡同', '聪明', '耽误', '耽搁', '耷拉', '耳朵', '老爷', '老实', '老婆',
|
||||
'戏弄', '将军', '翻腾', '罗嗦', '罐头', '编辑', '结实', '红火', '累赘', '糨糊', '糊涂',
|
||||
'精神', '粮食', '簸箕', '篱笆', '算计', '算盘', '答应', '笤帚', '笑语', '笑话', '窟窿',
|
||||
'窝囊', '窗户', '稳当', '稀罕', '称呼', '秧歌', '秀气', '秀才', '福气', '祖宗', '砚台',
|
||||
'码头', '石榴', '石头', '石匠', '知识', '眼睛', '眯缝', '眨巴', '眉毛', '相声', '盘算',
|
||||
'白净', '痢疾', '痛快', '疟疾', '疙瘩', '疏忽', '畜生', '生意', '甘蔗', '琵琶', '琢磨',
|
||||
'琉璃', '玻璃', '玫瑰', '玄乎', '狐狸', '状元', '特务', '牲口', '牙碜', '牌楼', '爽快',
|
||||
'爱人', '热闹', '烧饼', '烟筒', '烂糊', '点心', '炊帚', '灯笼', '火候', '漂亮', '滑溜',
|
||||
'溜达', '温和', '清楚', '消息', '浪头', '活泼', '比方', '正经', '欺负', '模糊', '槟榔',
|
||||
'棺材', '棒槌', '棉花', '核桃', '栅栏', '柴火', '架势', '枕头', '枇杷', '机灵', '本事',
|
||||
'木头', '木匠', '朋友', '月饼', '月亮', '暖和', '明白', '时候', '新鲜', '故事', '收拾',
|
||||
'收成', '提防', '挖苦', '挑剔', '指甲', '指头', '拾掇', '拳头', '拨弄', '招牌', '招呼',
|
||||
'抬举', '护士', '折腾', '扫帚', '打量', '打算', '打扮', '打听', '打发', '扎实', '扁担',
|
||||
'戒指', '懒得', '意识', '意思', '悟性', '怪物', '思量', '怎么', '念头', '念叨', '别人',
|
||||
'快活', '忙活', '志气', '心思', '得罪', '张罗', '弟兄', '开通', '应酬', '庄稼', '干事',
|
||||
'帮手', '帐篷', '希罕', '师父', '师傅', '巴结', '巴掌', '差事', '工夫', '岁数', '屁股',
|
||||
'尾巴', '少爷', '小气', '小伙', '将就', '对头', '对付', '寡妇', '家伙', '客气', '实在',
|
||||
'官司', '学问', '字号', '嫁妆', '媳妇', '媒人', '婆家', '娘家', '委屈', '姑娘', '姐夫',
|
||||
'妯娌', '妥当', '妖精', '奴才', '女婿', '头发', '太阳', '大爷', '大方', '大意', '大夫',
|
||||
'多少', '多么', '外甥', '壮实', '地道', '地方', '在乎', '困难', '嘴巴', '嘱咐', '嘟囔',
|
||||
'嘀咕', '喜欢', '喇嘛', '喇叭', '商量', '唾沫', '哑巴', '哈欠', '哆嗦', '咳嗽', '和尚',
|
||||
'告诉', '告示', '含糊', '吓唬', '后头', '名字', '名堂', '合同', '吆喝', '叫唤', '口袋',
|
||||
'厚道', '厉害', '千斤', '包袱', '包涵', '匀称', '勤快', '动静', '动弹', '功夫', '力气',
|
||||
'前头', '刺猬', '刺激', '别扭', '利落', '利索', '利害', '分析', '出息', '凑合', '凉快',
|
||||
'冷战', '冤枉', '冒失', '养活', '关系', '先生', '兄弟', '便宜', '使唤', '佩服', '作坊',
|
||||
'体面', '位置', '似的', '伙计', '休息', '什么', '人家', '亲戚', '亲家', '交情', '云彩',
|
||||
'事情', '买卖', '主意', '丫头', '丧气', '两口', '东西', '东家', '世故', '不由', '下水',
|
||||
'下巴', '上头', '上司', '丈夫', '丈人', '一辈', '那个', '菩萨', '父亲', '母亲', '咕噜',
|
||||
'邋遢', '费用', '冤家', '甜头', '介绍', '荒唐', '大人', '泥鳅', '幸福', '熟悉', '计划',
|
||||
'扑腾', '蜡烛', '姥爷', '照顾', '喉咙', '吉他', '弄堂', '蚂蚱', '凤凰', '拖沓', '寒碜',
|
||||
'糟蹋', '倒腾', '报复', '逻辑', '盘缠', '喽啰', '牢骚', '咖喱', '扫把', '惦记'
|
||||
}
|
||||
not_erhua = {
|
||||
"虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
|
||||
"拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿",
|
||||
"流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿",
|
||||
"孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿",
|
||||
"狗儿", "少儿"
|
||||
}
|
||||
BU = '不'
|
||||
YI = '一'
|
||||
X_ENG = frozenset(['x', 'eng'])
|
||||
|
||||
# g2p
|
||||
load_phrases_dict(phrases_dict)
|
||||
|
||||
def get_initials_finals(word: str) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Get word initial and final by pypinyin or g2pM
|
||||
"""
|
||||
initials = []
|
||||
finals = []
|
||||
orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
|
||||
orig_finals = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
|
||||
print(orig_initials, orig_finals)
|
||||
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
|
||||
en_index = [index for index, c in enumerate(word) if c == "嗯"]
|
||||
for i in en_index:
|
||||
orig_finals[i] = "n2"
|
||||
|
||||
for c, v in zip(orig_initials, orig_finals):
|
||||
if re.match(r'i\d', v):
|
||||
if c in ['z', 'c', 's']:
|
||||
# zi, ci, si
|
||||
v = re.sub('i', 'ii', v)
|
||||
elif c in ['zh', 'ch', 'sh', 'r']:
|
||||
# zhi, chi, shi
|
||||
v = re.sub('i', 'iii', v)
|
||||
initials.append(c)
|
||||
finals.append(v)
|
||||
|
||||
return initials, finals
|
||||
|
||||
def merge_erhua(initials: List[str], finals: List[str], word: str, pos: str) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Do erhub.
|
||||
"""
|
||||
# fix er1
|
||||
for i, phn in enumerate(finals):
|
||||
if i == len(finals) - 1 and word[i] == "儿" and phn == 'er1':
|
||||
finals[i] = 'er2'
|
||||
|
||||
# 发音
|
||||
if word not in must_erhua and (word in not_erhua or pos in {"a", "j", "nr"}):
|
||||
return initials, finals
|
||||
|
||||
# "……" 等情况直接返回
|
||||
if len(finals) != len(word):
|
||||
return initials, finals
|
||||
|
||||
assert len(finals) == len(word)
|
||||
|
||||
# 不发音
|
||||
new_initials = []
|
||||
new_finals = []
|
||||
for i, phn in enumerate(finals):
|
||||
if i == len(finals) - 1 and word[i] == "儿" and phn in {"er2", "er5"} and word[-2:] not in not_erhua and new_finals:
|
||||
new_finals[-1] = new_finals[-1][:-1] + "R" + new_finals[-1][-1]
|
||||
else:
|
||||
new_initials.append(initials[i])
|
||||
new_finals.append(phn)
|
||||
|
||||
return new_initials, new_finals
|
||||
|
||||
# merge "不" and the word behind it
|
||||
# if don't merge, "不" sometimes appears alone according to jieba, which may occur sandhi error
|
||||
def merge_bu(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if pos not in X_ENG:
|
||||
last_word = None
|
||||
if i > 0:
|
||||
last_word, _ = seg[i - 1]
|
||||
if last_word == BU:
|
||||
word = last_word + word
|
||||
next_pos = None
|
||||
if i + 1 < len(seg):
|
||||
_, next_pos = seg[i + 1]
|
||||
if word != BU or next_pos is None or next_pos in X_ENG:
|
||||
new_seg.append((word, pos))
|
||||
return new_seg
|
||||
|
||||
# function 1: merge "一" and reduplication words in it's left and right, e.g. "听","一","听" ->"听一听"
|
||||
# function 2: merge single "一" and the word behind it
|
||||
# if don't merge, "一" sometimes appears alone according to jieba, which may occur sandhi error
|
||||
# e.g.
|
||||
# input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')]
|
||||
# output seg: [['听一听', 'v']]
|
||||
def merge_yi(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
skip_next = False
|
||||
# function 1
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if skip_next:
|
||||
skip_next = False
|
||||
continue
|
||||
if i - 1 >= 0 and word == YI and i + 1 < len(seg) and seg[i - 1][0] == seg[i + 1][0] and seg[i - 1][1] == "v" and seg[i + 1][1] not in X_ENG:
|
||||
new_seg[-1] = (new_seg[-1][0] + YI + seg[i + 1][0], new_seg[-1][1])
|
||||
skip_next = True
|
||||
else:
|
||||
new_seg.append((word, pos))
|
||||
seg = new_seg
|
||||
new_seg = []
|
||||
# function 2
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if new_seg and new_seg[-1][0] == YI and pos not in X_ENG:
|
||||
new_seg[-1] = (new_seg[-1][0] + word, new_seg[-1][1])
|
||||
else:
|
||||
new_seg.append((word, pos))
|
||||
return new_seg
|
||||
|
||||
def merge_reduplication(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if new_seg and word == new_seg[-1][0] and pos not in X_ENG:
|
||||
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
return new_seg
|
||||
|
||||
def is_reduplication(word: str) -> bool:
|
||||
return len(word) == 2 and word[0] == word[1]
|
||||
|
||||
# the first and the second words are all_tone_three
|
||||
def merge_continuous_three_tones(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
sub_finals_list = []
|
||||
for (word, pos) in seg:
|
||||
if pos in X_ENG:
|
||||
sub_finals_list.append(['0'])
|
||||
continue
|
||||
orig_finals = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
|
||||
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
|
||||
en_index = [index for index, c in enumerate(word) if c == "嗯"]
|
||||
for i in en_index:
|
||||
orig_finals[i] = "n2"
|
||||
sub_finals_list.append(orig_finals)
|
||||
|
||||
assert len(sub_finals_list) == len(seg)
|
||||
merge_last = [False] * len(seg)
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if pos not in X_ENG and i - 1 >= 0 and all_tone_three(sub_finals_list[i - 1]) and all_tone_three(sub_finals_list[i]) and not merge_last[i - 1]:
|
||||
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
||||
if not is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
|
||||
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
||||
merge_last[i] = True
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
|
||||
return new_seg
|
||||
|
||||
# the last char of first word and the first char of second word is tone_three
|
||||
def merge_continuous_three_tones_2(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
sub_finals_list = []
|
||||
for (word, pos) in seg:
|
||||
if pos in X_ENG:
|
||||
sub_finals_list.append(['0'])
|
||||
continue
|
||||
orig_finals = lazy_pinyin(
|
||||
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
|
||||
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
|
||||
en_index = [index for index, c in enumerate(word) if c == "嗯"]
|
||||
for i in en_index:
|
||||
orig_finals[i] = "n2"
|
||||
sub_finals_list.append(orig_finals)
|
||||
assert len(sub_finals_list) == len(seg)
|
||||
merge_last = [False] * len(seg)
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if pos not in X_ENG and i - 1 >= 0 and sub_finals_list[i - 1][-1][-1] == "3" and sub_finals_list[i][0][-1] == "3" and not merge_last[i - 1]:
|
||||
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
||||
if not is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
|
||||
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
||||
merge_last[i] = True
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
return new_seg
|
||||
|
||||
def merge_er(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if i - 1 >= 0 and word == "儿" and new_seg[-1][1] not in X_ENG:
|
||||
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
return new_seg
|
||||
|
||||
def pre_merge_for_modify(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
seg: [(word, pos), ...]
|
||||
"""
|
||||
seg = merge_bu(seg)
|
||||
seg = merge_yi(seg)
|
||||
seg = merge_reduplication(seg)
|
||||
seg = merge_continuous_three_tones(seg)
|
||||
seg = merge_continuous_three_tones_2(seg)
|
||||
return merge_er(seg)
|
||||
|
||||
def bu_sandhi(word: str, finals: List[str]) -> List[str]:
|
||||
# e.g. 看不懂
|
||||
if len(word) == 3 and word[1] == BU:
|
||||
finals[1] = finals[1][:-1] + "5"
|
||||
else:
|
||||
for i, char in enumerate(word):
|
||||
# "不" before tone4 should be bu2, e.g. 不怕
|
||||
if char == BU and i + 1 < len(word) and finals[i + 1][-1] == "4":
|
||||
finals[i] = finals[i][:-1] + "2"
|
||||
return finals
|
||||
|
||||
def yi_sandhi(word: str, finals: List[str]) -> List[str]:
|
||||
# "一" in number sequences, e.g. 一零零, 二一零
|
||||
if word.find(YI) != -1 and all(
|
||||
[item.isnumeric() for item in word if item != YI]):
|
||||
return finals
|
||||
# "一" between reduplication words shold be yi5, e.g. 看一看
|
||||
elif len(word) == 3 and word[1] == YI and word[0] == word[-1]:
|
||||
finals[1] = finals[1][:-1] + "5"
|
||||
# when "一" is ordinal word, it should be yi1
|
||||
elif word.startswith("第一"):
|
||||
finals[1] = finals[1][:-1] + "1"
|
||||
else:
|
||||
for i, char in enumerate(word):
|
||||
if char == YI and i + 1 < len(word):
|
||||
# "一" before tone4 should be yi2, e.g. 一段
|
||||
if finals[i + 1][-1] in {'4', '5'}:
|
||||
finals[i] = finals[i][:-1] + "2"
|
||||
# "一" before non-tone4 should be yi4, e.g. 一天
|
||||
else:
|
||||
# "一" 后面如果是标点,还读一声
|
||||
if word[i + 1] not in punc:
|
||||
finals[i] = finals[i][:-1] + "4"
|
||||
return finals
|
||||
|
||||
def split_word(word: str) -> List[str]:
|
||||
word_list = cut_for_search(word)
|
||||
word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
|
||||
first_subword = word_list[0]
|
||||
first_begin_idx = word.find(first_subword)
|
||||
if first_begin_idx == 0:
|
||||
second_subword = word[len(first_subword):]
|
||||
new_word_list = [first_subword, second_subword]
|
||||
else:
|
||||
second_subword = word[:-len(first_subword)]
|
||||
new_word_list = [second_subword, first_subword]
|
||||
return new_word_list
|
||||
|
||||
# the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041
|
||||
# e.g.
|
||||
# word: "家里"
|
||||
# pos: "s"
|
||||
# finals: ['ia1', 'i3']
|
||||
def neural_sandhi(word: str, pos: str, finals: List[str]) -> List[str]:
|
||||
if word in must_not_neural_tone_words:
|
||||
return finals
|
||||
# reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
|
||||
for j, item in enumerate(word):
|
||||
if j - 1 >= 0 and item == word[j - 1] and pos[0] in {"n", "v", "a"}:
|
||||
finals[j] = finals[j][:-1] + "5"
|
||||
ge_idx = word.find("个")
|
||||
if len(word) >= 1 and word[-1] in "吧呢啊呐噻嘛吖嗨呐哦哒滴哩哟喽啰耶喔诶":
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
elif len(word) >= 1 and word[-1] in "的地得":
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
# e.g. 走了, 看着, 去过
|
||||
elif len(word) == 1 and word in "了着过" and pos in {"ul", "uz", "ug"}:
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
elif len(word) > 1 and word[-1] in "们子" and pos in {"r", "n"}:
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
# e.g. 桌上, 地下
|
||||
elif len(word) > 1 and word[-1] in "上下" and pos in {"s", "l", "f"}:
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
# e.g. 上来, 下去
|
||||
elif len(word) > 1 and word[-1] in "来去" and word[-2] in "上下进出回过起开":
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
# 个做量词
|
||||
elif (ge_idx >= 1 and (word[ge_idx - 1].isnumeric() or word[ge_idx - 1] in "几有两半多各整每做是")) or word == '个':
|
||||
finals[ge_idx] = finals[ge_idx][:-1] + "5"
|
||||
else:
|
||||
if word in must_neural_tone_words or word[-2:] in must_neural_tone_words:
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
|
||||
word_list = split_word(word)
|
||||
finals_list = [finals[:len(word_list[0])], finals[len(word_list[0]):]]
|
||||
for i, word in enumerate(word_list):
|
||||
# conventional neural in Chinese
|
||||
if word in must_neural_tone_words or word[-2:] in must_neural_tone_words:
|
||||
finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
|
||||
finals = sum(finals_list, [])
|
||||
return finals
|
||||
|
||||
def all_tone_three(finals: List[str]) -> bool:
|
||||
return all(x[-1] == "3" for x in finals)
|
||||
|
||||
def three_sandhi(word: str, finals: List[str]) -> List[str]:
|
||||
if len(word) == 2 and all_tone_three(finals):
|
||||
finals[0] = finals[0][:-1] + "2"
|
||||
elif len(word) == 3:
|
||||
word_list = split_word(word)
|
||||
if all_tone_three(finals):
|
||||
# disyllabic + monosyllabic, e.g. 蒙古/包
|
||||
if len(word_list[0]) == 2:
|
||||
finals[0] = finals[0][:-1] + "2"
|
||||
finals[1] = finals[1][:-1] + "2"
|
||||
# monosyllabic + disyllabic, e.g. 纸/老虎
|
||||
elif len(word_list[0]) == 1:
|
||||
finals[1] = finals[1][:-1] + "2"
|
||||
else:
|
||||
finals_list = [finals[:len(word_list[0])], finals[len(word_list[0]):]]
|
||||
if len(finals_list) == 2:
|
||||
for i, sub in enumerate(finals_list):
|
||||
# e.g. 所有/人
|
||||
if all_tone_three(sub) and len(sub) == 2:
|
||||
finals_list[i][0] = finals_list[i][0][:-1] + "2"
|
||||
# e.g. 好/喜欢
|
||||
elif i == 1 and not all_tone_three(sub) and finals_list[i][0][-1] == "3" and finals_list[0][-1][-1] == "3":
|
||||
finals_list[0][-1] = finals_list[0][-1][:-1] + "2"
|
||||
finals = sum(finals_list, [])
|
||||
# split idiom into two words who's length is 2
|
||||
elif len(word) == 4:
|
||||
finals_list = [finals[:2], finals[2:]]
|
||||
finals = []
|
||||
for sub in finals_list:
|
||||
if all_tone_three(sub):
|
||||
sub[0] = sub[0][:-1] + "2"
|
||||
finals += sub
|
||||
|
||||
return finals
|
||||
|
||||
def modified_tone(word: str, pos: str, finals: List[str]) -> List[str]:
|
||||
"""
|
||||
word: 分词
|
||||
pos: 词性
|
||||
finals: 带调韵母, [final1, ..., finaln]
|
||||
"""
|
||||
finals = bu_sandhi(word, finals)
|
||||
finals = yi_sandhi(word, finals)
|
||||
finals = neural_sandhi(word, pos, finals)
|
||||
return three_sandhi(word, finals)
|
||||
|
||||
def g2p(text: str, with_erhua: bool = True) -> str:
|
||||
"""
|
||||
Return: string of phonemes.
|
||||
'ㄋㄧ2ㄏㄠ3/ㄕ十4ㄐㄝ4'
|
||||
"""
|
||||
tokens = []
|
||||
seg_cut = posseg.lcut(text)
|
||||
# fix wordseg bad case for sandhi
|
||||
seg_cut = pre_merge_for_modify(seg_cut)
|
||||
|
||||
# 为了多音词获得更好的效果,这里采用整句预测
|
||||
initials = []
|
||||
finals = []
|
||||
# pypinyin, g2pM
|
||||
for word, pos in seg_cut:
|
||||
if pos == 'x' and '\u4E00' <= min(word) and max(word) <= '\u9FFF':
|
||||
pos = 'X'
|
||||
elif pos != 'x' and word in punc:
|
||||
pos = 'x'
|
||||
tk = MToken(tag=pos, whitespace='')
|
||||
if pos in X_ENG:
|
||||
if not word.isspace():
|
||||
if pos == 'x' and word in punc:
|
||||
tk.phonemes = word
|
||||
tokens.append(tk)
|
||||
elif tokens:
|
||||
tokens[-1].whitespace += word
|
||||
continue
|
||||
elif tokens and tokens[-1].tag not in X_ENG and not tokens[-1].whitespace:
|
||||
tokens[-1].whitespace = '/'
|
||||
|
||||
# g2p
|
||||
sub_initials, sub_finals = get_initials_finals(word)
|
||||
# tone sandhi
|
||||
sub_finals = modified_tone(word, pos, sub_finals)
|
||||
# er hua
|
||||
if with_erhua:
|
||||
sub_initials, sub_finals = merge_erhua(sub_initials, sub_finals, word, pos)
|
||||
|
||||
initials.append(sub_initials)
|
||||
finals.append(sub_finals)
|
||||
# assert len(sub_initials) == len(sub_finals) == len(word)
|
||||
|
||||
# sum(iterable[, start])
|
||||
# initials = sum(initials, [])
|
||||
# finals = sum(finals, [])
|
||||
|
||||
phones = []
|
||||
for c, v in zip(sub_initials, sub_finals):
|
||||
# NOTE: post process for pypinyin outputs
|
||||
# we discriminate i, ii and iii
|
||||
if c:
|
||||
phones.append(c)
|
||||
# replace punctuation by ` `
|
||||
# if c and c in punc:
|
||||
# phones.append(c)
|
||||
if v and (v not in punc or v != c):# and v not in rhy_phns:
|
||||
phones.append(v)
|
||||
phones = '_'.join(phones).replace('_eR', '_er').replace('R', '_R')
|
||||
phones = re.sub(r'(?=\d)', '_', phones).split('_')
|
||||
print(phones)
|
||||
tk.phonemes = ''.join(ZH_MAP.get(p, unk) for p in phones)
|
||||
tokens.append(tk)
|
||||
|
||||
return ''.join((unk if tk.phonemes is None else tk.phonemes) + tk.whitespace for tk in tokens)
|
||||
|
||||
print(g2p('时间为。Hello, world!你好,我们是一群追逐梦想的人。我正在使用qq。忽略卢驴'))
|
||||
seg = posseg.lcut('不好看', True)
|
||||
print(seg, merge_bu(seg))
|
||||
seg = merge_bu(posseg.lcut('听一听一个', True))
|
||||
print(seg, merge_yi(seg))
|
||||
seg = merge_bu(posseg.lcut('谢谢谢谢', True))
|
||||
print(seg, merge_reduplication(seg))
|
||||
seg = merge_bu(posseg.lcut('小美好', True))
|
||||
print(seg, merge_continuous_three_tones(seg))
|
||||
seg = merge_bu(posseg.lcut('风景好', True))
|
||||
print(seg, merge_continuous_three_tones_2(seg))
|
||||
3
rust/vendor/kokoro-tts/run.bat
vendored
Normal file
3
rust/vendor/kokoro-tts/run.bat
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
set PATH=%PATH%;D:\msys64\mingw64\bin
|
||||
cargo run --example synth_directly_v11
|
||||
pause
|
||||
80
rust/vendor/kokoro-tts/src/error.rs
vendored
Normal file
80
rust/vendor/kokoro-tts/src/error.rs
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
use crate::G2PError;
|
||||
use bincode::error::DecodeError;
|
||||
use ndarray::ShapeError;
|
||||
use ort::Error as OrtError;
|
||||
use std::{
|
||||
error::Error,
|
||||
fmt::{Debug, Display, Formatter, Result as FmtResult},
|
||||
io::Error as IoError,
|
||||
time::SystemTimeError,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum KokoroError {
|
||||
Decode(DecodeError),
|
||||
G2P(G2PError),
|
||||
Io(IoError),
|
||||
ModelReleased,
|
||||
Ort(OrtError),
|
||||
Send(String),
|
||||
Shape(ShapeError),
|
||||
SystemTime(SystemTimeError),
|
||||
VoiceNotFound(String),
|
||||
VoiceVersionInvalid(String),
|
||||
}
|
||||
|
||||
impl Display for KokoroError {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
|
||||
write!(f, "KokoroError: ")?;
|
||||
match self {
|
||||
Self::Decode(e) => Display::fmt(e, f),
|
||||
Self::G2P(e) => Display::fmt(e, f),
|
||||
Self::Io(e) => Display::fmt(e, f),
|
||||
Self::Ort(e) => Display::fmt(e, f),
|
||||
Self::ModelReleased => write!(f, "ModelReleased"),
|
||||
Self::Send(e) => Display::fmt(e, f),
|
||||
Self::Shape(e) => Display::fmt(e, f),
|
||||
Self::SystemTime(e) => Display::fmt(e, f),
|
||||
Self::VoiceNotFound(name) => write!(f, "VoiceNotFound({})", name),
|
||||
Self::VoiceVersionInvalid(msg) => write!(f, "VoiceVersionInvalid({})", msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for KokoroError {}
|
||||
|
||||
impl From<IoError> for KokoroError {
|
||||
fn from(value: IoError) -> Self {
|
||||
Self::Io(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DecodeError> for KokoroError {
|
||||
fn from(value: DecodeError) -> Self {
|
||||
Self::Decode(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<OrtError> for KokoroError {
|
||||
fn from(value: OrtError) -> Self {
|
||||
Self::Ort(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<G2PError> for KokoroError {
|
||||
fn from(value: G2PError) -> Self {
|
||||
Self::G2P(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ShapeError> for KokoroError {
|
||||
fn from(value: ShapeError) -> Self {
|
||||
Self::Shape(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SystemTimeError> for KokoroError {
|
||||
fn from(value: SystemTimeError) -> Self {
|
||||
Self::SystemTime(value)
|
||||
}
|
||||
}
|
||||
321
rust/vendor/kokoro-tts/src/g2p.rs
vendored
Normal file
321
rust/vendor/kokoro-tts/src/g2p.rs
vendored
Normal file
@@ -0,0 +1,321 @@
|
||||
/// 文本到国际音标的转换
|
||||
mod v10;
|
||||
mod v11;
|
||||
|
||||
use super::PinyinError;
|
||||
use chinese_number::{ChineseCase, ChineseCountMethod, ChineseVariant, NumberToChinese};
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
use cmudict_fast::{Cmudict, Error as CmudictError};
|
||||
use pinyin::ToPinyin;
|
||||
use regex::{Captures, Error as RegexError, Regex};
|
||||
use std::{
|
||||
error::Error,
|
||||
fmt::{Display, Formatter, Result as FmtResult},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum G2PError {
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
CmudictError(CmudictError),
|
||||
EnptyData,
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
Nul(std::ffi::NulError),
|
||||
Pinyin(PinyinError),
|
||||
Regex(RegexError),
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
Utf8(std::str::Utf8Error),
|
||||
}
|
||||
|
||||
impl Display for G2PError {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
|
||||
write!(f, "G2PError: ")?;
|
||||
match self {
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
Self::CmudictError(e) => Display::fmt(e, f),
|
||||
Self::EnptyData => Display::fmt("EmptyData", f),
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
Self::Nul(e) => Display::fmt(e, f),
|
||||
Self::Pinyin(e) => Display::fmt(e, f),
|
||||
Self::Regex(e) => Display::fmt(e, f),
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
Self::Utf8(e) => Display::fmt(e, f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for G2PError {}
|
||||
|
||||
impl From<PinyinError> for G2PError {
|
||||
fn from(value: PinyinError) -> Self {
|
||||
Self::Pinyin(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RegexError> for G2PError {
|
||||
fn from(value: RegexError) -> Self {
|
||||
Self::Regex(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
impl From<CmudictError> for G2PError {
|
||||
fn from(value: CmudictError) -> Self {
|
||||
Self::CmudictError(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
impl From<std::ffi::NulError> for G2PError {
|
||||
fn from(value: std::ffi::NulError) -> Self {
|
||||
Self::Nul(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
impl From<std::str::Utf8Error> for G2PError {
|
||||
fn from(value: std::str::Utf8Error) -> Self {
|
||||
Self::Utf8(value)
|
||||
}
|
||||
}
|
||||
|
||||
fn word2ipa_zh(word: &str) -> Result<String, G2PError> {
|
||||
let iter = word.chars().map(|i| match i.to_pinyin() {
|
||||
None => Ok(i.to_string()),
|
||||
Some(p) => v10::py2ipa(p.with_tone_num_end()),
|
||||
});
|
||||
|
||||
let mut result = String::new();
|
||||
for i in iter {
|
||||
result.push_str(&i?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
fn word2ipa_en(word: &str) -> Result<String, G2PError> {
|
||||
use super::{arpa_to_ipa, letters_to_ipa};
|
||||
use std::{
|
||||
io::{Error as IoError, ErrorKind},
|
||||
str::FromStr,
|
||||
sync::LazyLock,
|
||||
};
|
||||
|
||||
fn get_cmudict<'a>() -> Result<&'a Cmudict, CmudictError> {
|
||||
static CMUDICT: LazyLock<Result<Cmudict, CmudictError>> =
|
||||
LazyLock::new(|| Cmudict::from_str(include_str!("../dict/cmudict.dict")));
|
||||
CMUDICT.as_ref().map_err(|i| match i {
|
||||
CmudictError::IoErr(e) => CmudictError::IoErr(IoError::new(ErrorKind::Other, e)),
|
||||
CmudictError::InvalidLine(e) => CmudictError::InvalidLine(*e),
|
||||
CmudictError::RuleParseError(e) => CmudictError::RuleParseError(e.clone()),
|
||||
})
|
||||
}
|
||||
|
||||
if word.chars().count() < 4 && word.chars().all(|c| c.is_ascii_uppercase()) {
|
||||
return Ok(letters_to_ipa(word));
|
||||
}
|
||||
|
||||
let dict = get_cmudict()?;
|
||||
let upper = word.to_ascii_uppercase();
|
||||
let lower = word.to_ascii_lowercase();
|
||||
let Some(rules) = dict
|
||||
.get(word)
|
||||
.or_else(|| dict.get(&upper))
|
||||
.or_else(|| dict.get(&lower))
|
||||
else {
|
||||
return Ok(letters_to_ipa(word));
|
||||
};
|
||||
if rules.is_empty() {
|
||||
return Ok(word.to_owned());
|
||||
}
|
||||
let i = rand::random_range(0..rules.len());
|
||||
let result = rules[i]
|
||||
.pronunciation()
|
||||
.iter()
|
||||
.map(|i| arpa_to_ipa(&i.to_string()).unwrap_or_default())
|
||||
.collect::<String>();
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
fn word2ipa_en(word: &str) -> Result<String, G2PError> {
|
||||
use super::letters_to_ipa;
|
||||
use std::{
|
||||
ffi::{CStr, CString, c_char},
|
||||
sync::Once,
|
||||
};
|
||||
|
||||
if word.chars().count() < 4 && word.chars().all(|c| c.is_ascii_uppercase()) {
|
||||
return Ok(letters_to_ipa(word));
|
||||
}
|
||||
|
||||
unsafe extern "C" {
|
||||
fn TextToPhonemes(text: *const c_char) -> *const ::std::os::raw::c_char;
|
||||
fn Initialize(data_dictlist: *const c_char);
|
||||
}
|
||||
|
||||
unsafe {
|
||||
static INIT: Once = Once::new();
|
||||
INIT.call_once(|| {
|
||||
static DATA: &[u8] = include_bytes!("../dict/espeak.dict");
|
||||
Initialize(DATA.as_ptr() as _);
|
||||
});
|
||||
|
||||
let word = CString::new(word.to_lowercase())?.into_raw() as *const c_char;
|
||||
let res = TextToPhonemes(word);
|
||||
Ok(CStr::from_ptr(res).to_str()?.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn to_half_shape(text: &str) -> String {
|
||||
let mut result = String::with_capacity(text.len() * 2); // 预分配合理空间
|
||||
let chars = text.chars().peekable();
|
||||
|
||||
for c in chars {
|
||||
match c {
|
||||
// 处理需要后看的情况
|
||||
'«' | '《' => result.push('“'),
|
||||
'»' | '》' => result.push('”'),
|
||||
'(' => result.push('('),
|
||||
')' => result.push(')'),
|
||||
// 简单替换规则
|
||||
'、' | ',' => result.push(','),
|
||||
'。' => result.push('.'),
|
||||
'!' => result.push('!'),
|
||||
':' => result.push(':'),
|
||||
';' => result.push(';'),
|
||||
'?' => result.push('?'),
|
||||
// 默认字符
|
||||
_ => result.push(c),
|
||||
}
|
||||
}
|
||||
|
||||
// 清理多余空格并返回
|
||||
result
|
||||
}
|
||||
|
||||
fn num_repr(text: &str) -> Result<String, G2PError> {
|
||||
let regex = Regex::new(r#"\d+(\.\d+)?"#)?;
|
||||
Ok(regex
|
||||
.replace(text, |caps: &Captures| {
|
||||
let text = &caps[0];
|
||||
if let Ok(num) = text.parse::<f64>() {
|
||||
num.to_chinese(
|
||||
ChineseVariant::Traditional,
|
||||
ChineseCase::Lower,
|
||||
ChineseCountMethod::Low,
|
||||
)
|
||||
.map_or(text.to_owned(), |i| i)
|
||||
} else if let Ok(num) = text.parse::<i64>() {
|
||||
num.to_chinese(
|
||||
ChineseVariant::Traditional,
|
||||
ChineseCase::Lower,
|
||||
ChineseCountMethod::Low,
|
||||
)
|
||||
.map_or(text.to_owned(), |i| i)
|
||||
} else {
|
||||
text.to_owned()
|
||||
}
|
||||
})
|
||||
.to_string())
|
||||
}
|
||||
|
||||
pub fn g2p(text: &str, use_v11: bool) -> Result<String, G2PError> {
|
||||
let text = num_repr(text)?;
|
||||
let sentence_pattern = Regex::new(
|
||||
r#"([\u4E00-\u9FFF]+)|([,。:·?、!《》()【】〖〗〔〕“”‘’〈〉…— ]+)|([\u0000-\u00FF]+)+"#,
|
||||
)?;
|
||||
let en_word_pattern = Regex::new("\\w+|\\W+")?;
|
||||
let jieba = jieba_rs::Jieba::new();
|
||||
let mut result = String::new();
|
||||
for i in sentence_pattern.captures_iter(&text) {
|
||||
match (i.get(1), i.get(2), i.get(3)) {
|
||||
(Some(text), _, _) => {
|
||||
let text = to_half_shape(text.as_str());
|
||||
if use_v11 {
|
||||
if !result.is_empty() && !result.ends_with(' ') {
|
||||
result.push(' ');
|
||||
}
|
||||
result.push_str(&v11::g2p(&text, true));
|
||||
result.push(' ');
|
||||
} else {
|
||||
for i in jieba.cut(&text, true) {
|
||||
result.push_str(&word2ipa_zh(i)?);
|
||||
result.push(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
(_, Some(text), _) => {
|
||||
let text = to_half_shape(text.as_str());
|
||||
result = result.trim_end().to_string();
|
||||
result.push_str(&text);
|
||||
result.push(' ');
|
||||
}
|
||||
(_, _, Some(text)) => {
|
||||
for i in en_word_pattern.captures_iter(text.as_str()) {
|
||||
let c = (i[0]).chars().next().unwrap_or_default();
|
||||
if c == '\''
|
||||
|| c == '_'
|
||||
|| c == '-'
|
||||
|| c.is_ascii_lowercase()
|
||||
|| c.is_ascii_uppercase()
|
||||
{
|
||||
let i = &i[0];
|
||||
if result.trim_end().ends_with(['.', ',', '!', '?'])
|
||||
&& !result.ends_with(' ')
|
||||
{
|
||||
result.push(' ');
|
||||
}
|
||||
result.push_str(&word2ipa_en(i)?);
|
||||
} else if c == ' ' && result.ends_with(' ') {
|
||||
result.push_str((i[0]).trim_start());
|
||||
} else {
|
||||
result.push_str(&i[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
|
||||
Ok(result.trim().to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
#[test]
|
||||
fn test_word2ipa_en() -> Result<(), super::G2PError> {
|
||||
use super::word2ipa_en;
|
||||
|
||||
// println!("{:?}", espeak_rs::text_to_phonemes("days", "en", None, true, false));
|
||||
assert_eq!("kjˌuːkjˈuː", word2ipa_en("qq")?);
|
||||
assert_eq!("həlˈəʊ", word2ipa_en("hello")?);
|
||||
assert_eq!("wˈɜːld", word2ipa_en("world")?);
|
||||
assert_eq!("ˈapəl", word2ipa_en("apple")?);
|
||||
assert_eq!("tʃˈɪldɹɛn", word2ipa_en("children")?);
|
||||
assert_eq!("ˈaʊə", word2ipa_en("hour")?);
|
||||
assert_eq!("dˈeɪz", word2ipa_en("days")?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
#[test]
|
||||
fn test_word2ipa_en_is_case_insensitive_for_dictionary_words() -> Result<(), super::G2PError> {
|
||||
use super::word2ipa_en;
|
||||
|
||||
assert_eq!(word2ipa_en("Welcome")?, word2ipa_en("welcome")?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_g2p() -> Result<(), super::G2PError> {
|
||||
use super::g2p;
|
||||
|
||||
assert_eq!("ni↓xau↓ ʂɻ↘ʨje↘", g2p("你好世界", false)?);
|
||||
assert_eq!("ㄋㄧ2ㄏㄠ3/ㄕ十4ㄐㄝ4", g2p("你好世界", true)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
62
rust/vendor/kokoro-tts/src/g2p/v10.rs
vendored
Normal file
62
rust/vendor/kokoro-tts/src/g2p/v10.rs
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
use crate::{G2PError, pinyin_to_ipa};
|
||||
|
||||
fn retone(p: &str) -> String {
|
||||
let chars: Vec<char> = p.chars().collect();
|
||||
let mut result = String::with_capacity(p.len());
|
||||
let mut i = 0;
|
||||
|
||||
while i < chars.len() {
|
||||
match () {
|
||||
// 三声调优先处理
|
||||
_ if i + 2 < chars.len()
|
||||
&& chars[i] == '˧'
|
||||
&& chars[i + 1] == '˩'
|
||||
&& chars[i + 2] == '˧' =>
|
||||
{
|
||||
result.push('↓');
|
||||
i += 3;
|
||||
}
|
||||
// 二声调
|
||||
_ if i + 1 < chars.len() && chars[i] == '˧' && chars[i + 1] == '˥' => {
|
||||
result.push('↗');
|
||||
i += 2;
|
||||
}
|
||||
// 四声调
|
||||
_ if i + 1 < chars.len() && chars[i] == '˥' && chars[i + 1] == '˩' => {
|
||||
result.push('↘');
|
||||
i += 2;
|
||||
}
|
||||
// 一声调
|
||||
_ if chars[i] == '˥' => {
|
||||
result.push('→');
|
||||
i += 1;
|
||||
}
|
||||
// 组合字符替换(ɻ̩ 和 ɱ̩)
|
||||
_ if !(i + 1 >= chars.len() || chars[i+1] != '\u{0329}' || chars[i] != '\u{027B}' && chars[i] != '\u{0271}') =>
|
||||
{
|
||||
result.push('ɨ');
|
||||
i += 2;
|
||||
}
|
||||
// 默认情况
|
||||
_ => {
|
||||
result.push(chars[i]);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
!result.contains('\u{0329}'),
|
||||
"Unexpected combining mark in: {}",
|
||||
result
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
pub(super) fn py2ipa(py: &str) -> Result<String, G2PError> {
|
||||
pinyin_to_ipa(py)?
|
||||
.first()
|
||||
.map_or(Err(G2PError::EnptyData), |i| {
|
||||
Ok(i.iter().map(|i| retone(i)).collect::<String>())
|
||||
})
|
||||
}
|
||||
1263
rust/vendor/kokoro-tts/src/g2p/v11.rs
vendored
Normal file
1263
rust/vendor/kokoro-tts/src/g2p/v11.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
83
rust/vendor/kokoro-tts/src/lib.rs
vendored
Normal file
83
rust/vendor/kokoro-tts/src/lib.rs
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
mod error;
|
||||
mod g2p;
|
||||
mod stream;
|
||||
mod synthesizer;
|
||||
mod tokenizer;
|
||||
mod transcription;
|
||||
mod voice;
|
||||
|
||||
use {
|
||||
bincode::{config::standard, decode_from_slice},
|
||||
ort::{execution_providers::CUDAExecutionProvider, session::Session},
|
||||
std::{collections::HashMap, path::Path, sync::Arc, time::Duration},
|
||||
tokio::{fs::read, sync::Mutex},
|
||||
};
|
||||
pub use {error::*, g2p::*, stream::*, tokenizer::*, transcription::*, voice::*};
|
||||
|
||||
pub struct KokoroTts {
|
||||
model: Arc<Mutex<Session>>,
|
||||
voices: Arc<HashMap<String, Vec<Vec<Vec<f32>>>>>,
|
||||
}
|
||||
|
||||
impl KokoroTts {
|
||||
pub async fn new<P: AsRef<Path>>(model_path: P, voices_path: P) -> Result<Self, KokoroError> {
|
||||
let voices = read(voices_path).await?;
|
||||
let (voices, _) = decode_from_slice(&voices, standard())?;
|
||||
|
||||
let model = Session::builder()?
|
||||
.with_execution_providers([CUDAExecutionProvider::default().build()])?
|
||||
.commit_from_file(model_path)?;
|
||||
Ok(Self {
|
||||
model: Arc::new(model.into()),
|
||||
voices,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn new_from_bytes<B>(model: B, voices: B) -> Result<Self, KokoroError>
|
||||
where
|
||||
B: AsRef<[u8]>,
|
||||
{
|
||||
let (voices, _) = decode_from_slice(voices.as_ref(), standard())?;
|
||||
|
||||
let model = Session::builder()?
|
||||
.with_execution_providers([CUDAExecutionProvider::default().build()])?
|
||||
.commit_from_memory(model.as_ref())?;
|
||||
Ok(Self {
|
||||
model: Arc::new(model.into()),
|
||||
voices,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn synth<S>(&self, text: S, voice: Voice) -> Result<(Vec<f32>, Duration), KokoroError>
|
||||
where
|
||||
S: AsRef<str>,
|
||||
{
|
||||
let name = voice.get_name();
|
||||
let pack = self
|
||||
.voices
|
||||
.get(name)
|
||||
.ok_or(KokoroError::VoiceNotFound(name.to_owned()))?;
|
||||
synthesizer::synth(Arc::downgrade(&self.model), text, pack, voice).await
|
||||
}
|
||||
|
||||
pub fn stream<S>(&self, voice: Voice) -> (SynthSink<S>, SynthStream)
|
||||
where
|
||||
S: AsRef<str> + Send + 'static,
|
||||
{
|
||||
let voices = Arc::downgrade(&self.voices);
|
||||
let model = Arc::downgrade(&self.model);
|
||||
|
||||
start_synth_session(voice, move |text, voice| {
|
||||
let voices = voices.clone();
|
||||
let model = model.clone();
|
||||
async move {
|
||||
let name = voice.get_name();
|
||||
let voices = voices.upgrade().ok_or(KokoroError::ModelReleased)?;
|
||||
let pack = voices
|
||||
.get(name)
|
||||
.ok_or(KokoroError::VoiceNotFound(name.to_owned()))?;
|
||||
synthesizer::synth(model, text, pack, voice).await
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
157
rust/vendor/kokoro-tts/src/stream.rs
vendored
Normal file
157
rust/vendor/kokoro-tts/src/stream.rs
vendored
Normal file
@@ -0,0 +1,157 @@
|
||||
use {
|
||||
crate::{KokoroError, Voice},
|
||||
futures::{Sink, SinkExt, Stream},
|
||||
pin_project::pin_project,
|
||||
std::{
|
||||
pin::Pin,
|
||||
task::{Context, Poll},
|
||||
time::Duration,
|
||||
},
|
||||
tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
|
||||
};
|
||||
|
||||
struct Request<S> {
|
||||
voice: Voice,
|
||||
text: S,
|
||||
}
|
||||
|
||||
struct Response {
|
||||
data: Vec<f32>,
|
||||
took: Duration,
|
||||
}
|
||||
|
||||
/// 语音合成流
|
||||
///
|
||||
/// 该结构体用于通过流式合成来处理更长的文本。它实现了`Stream` trait,可以用于异步迭代合成后的音频数据。
|
||||
#[pin_project]
|
||||
pub struct SynthStream {
|
||||
#[pin]
|
||||
rx: UnboundedReceiver<Response>,
|
||||
}
|
||||
|
||||
impl Stream for SynthStream {
|
||||
type Item = (Vec<f32>, Duration);
|
||||
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
Pin::new(&mut self.project().rx)
|
||||
.poll_recv(cx)
|
||||
.map(|i| i.map(|Response { data, took }| (data, took)))
|
||||
}
|
||||
}
|
||||
|
||||
/// 语音合成发送端
|
||||
///
|
||||
/// 该结构体用于发送语音合成请求。它实现了`Sink` trait,可以用于异步发送合成请求。
|
||||
#[pin_project]
|
||||
pub struct SynthSink<S> {
|
||||
tx: UnboundedSender<Request<S>>,
|
||||
voice: Voice,
|
||||
}
|
||||
|
||||
impl<S> SynthSink<S> {
|
||||
/// 设置语音名称
|
||||
///
|
||||
/// 该方法用于设置要合成的语音名称。
|
||||
///
|
||||
/// # 参数
|
||||
///
|
||||
/// * `voice_name` - 语音名称,用于选择要合成的语音。
|
||||
///
|
||||
/// # 示例
|
||||
///
|
||||
/// ```rust
|
||||
/// use kokoro_tts::{KokoroTts, Voice};
|
||||
///
|
||||
/// #[tokio::main]
|
||||
/// async fn main() {
|
||||
/// let Ok(tts) = KokoroTts::new("../kokoro-v1.0.int8.onnx", "../voices.bin").await else {
|
||||
/// return;
|
||||
/// };
|
||||
/// // speed: 1.0
|
||||
/// let (mut sink, _) = tts.stream::<&str>(Voice::ZfXiaoxiao(1.0));
|
||||
/// // speed: 1.8
|
||||
/// sink.set_voice(Voice::ZmYunxi(1.8));
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
pub fn set_voice(&mut self, voice: Voice) {
|
||||
self.voice = voice
|
||||
}
|
||||
|
||||
/// 发送合成请求
|
||||
///
|
||||
/// 该方法用于发送语音合成请求。
|
||||
///
|
||||
/// # 参数
|
||||
///
|
||||
/// * `text` - 要合成的文本内容。
|
||||
///
|
||||
/// # 返回值
|
||||
///
|
||||
/// 如果发送成功,将返回`Ok(())`;如果发送失败,将返回一个`KokoroError`类型的错误。
|
||||
///
|
||||
/// # 示例
|
||||
///
|
||||
/// ```rust
|
||||
/// use kokoro_tts::{KokoroTts, Voice};
|
||||
///
|
||||
/// #[tokio::main]
|
||||
/// async fn main() {
|
||||
/// let Ok(tts) = KokoroTts::new("../kokoro-v1.1-zh.onnx", "../voices-v1.1-zh.bin").await else {
|
||||
/// return;
|
||||
/// };
|
||||
/// let (mut sink, _) =tts.stream(Voice::Zf003(2));
|
||||
/// let _ = sink.synth("hello world.").await;
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
pub async fn synth(&mut self, text: S) -> Result<(), KokoroError> {
|
||||
self.send((self.voice, text)).await
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> Sink<(Voice, S)> for SynthSink<S> {
|
||||
type Error = KokoroError;
|
||||
|
||||
fn poll_ready(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
|
||||
fn start_send(self: Pin<&mut Self>, (voice, text): (Voice, S)) -> Result<(), Self::Error> {
|
||||
self.tx
|
||||
.send(Request { voice, text })
|
||||
.map_err(|e| KokoroError::Send(e.to_string()))
|
||||
}
|
||||
|
||||
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
|
||||
fn poll_close(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn start_synth_session<F, R, S>(
|
||||
voice: Voice,
|
||||
synth_request_callback: F,
|
||||
) -> (SynthSink<S>, SynthStream)
|
||||
where
|
||||
F: Fn(S, Voice) -> R + Send + 'static,
|
||||
R: Future<Output = Result<(Vec<f32>, Duration), KokoroError>> + Send,
|
||||
S: AsRef<str> + Send + 'static,
|
||||
{
|
||||
let (tx, mut rx) = unbounded_channel::<Request<S>>();
|
||||
let (tx2, rx2) = unbounded_channel();
|
||||
tokio::spawn(async move {
|
||||
while let Some(req) = rx.recv().await {
|
||||
let (data, took) = synth_request_callback(req.text, req.voice).await?;
|
||||
tx2.send(Response { data, took })
|
||||
.map_err(|e| KokoroError::Send(e.to_string()))?;
|
||||
}
|
||||
|
||||
Ok::<_, KokoroError>(())
|
||||
});
|
||||
|
||||
(SynthSink { tx, voice }, SynthStream { rx: rx2 })
|
||||
}
|
||||
123
rust/vendor/kokoro-tts/src/synthesizer.rs
vendored
Normal file
123
rust/vendor/kokoro-tts/src/synthesizer.rs
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
use {
|
||||
crate::{KokoroError, Voice, g2p, get_token_ids},
|
||||
ndarray::Array,
|
||||
ort::{
|
||||
inputs,
|
||||
session::{RunOptions, Session},
|
||||
value::TensorRef,
|
||||
},
|
||||
std::{
|
||||
cmp::min,
|
||||
sync::Weak,
|
||||
time::{Duration, SystemTime},
|
||||
},
|
||||
tokio::sync::Mutex,
|
||||
};
|
||||
|
||||
async fn synth_v10<P, S>(
|
||||
model: Weak<Mutex<Session>>,
|
||||
phonemes: S,
|
||||
pack: P,
|
||||
speed: f32,
|
||||
) -> Result<(Vec<f32>, Duration), KokoroError>
|
||||
where
|
||||
P: AsRef<Vec<Vec<Vec<f32>>>>,
|
||||
S: AsRef<str>,
|
||||
{
|
||||
let model = model.upgrade().ok_or(KokoroError::ModelReleased)?;
|
||||
let phonemes = get_token_ids(phonemes.as_ref(), false);
|
||||
let phonemes = Array::from_shape_vec((1, phonemes.len()), phonemes)?;
|
||||
let ref_s = pack.as_ref()[phonemes.len() - 1]
|
||||
.first()
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
let style = Array::from_shape_vec((1, ref_s.len()), ref_s)?;
|
||||
let speed = Array::from_vec(vec![speed]);
|
||||
let options = RunOptions::new()?;
|
||||
let mut model = model.lock().await;
|
||||
let t = SystemTime::now();
|
||||
let kokoro_output = model
|
||||
.run_async(
|
||||
inputs![
|
||||
"tokens" => TensorRef::from_array_view(&phonemes)?,
|
||||
"style" => TensorRef::from_array_view(&style)?,
|
||||
"speed" => TensorRef::from_array_view(&speed)?,
|
||||
],
|
||||
&options,
|
||||
)?
|
||||
.await?;
|
||||
let elapsed = t.elapsed()?;
|
||||
let (_, audio) = kokoro_output["audio"].try_extract_tensor::<f32>()?;
|
||||
|
||||
Ok((audio.to_owned(), elapsed))
|
||||
}
|
||||
|
||||
async fn synth_v11<P, S>(
|
||||
model: Weak<Mutex<Session>>,
|
||||
phonemes: S,
|
||||
pack: P,
|
||||
speed: i32,
|
||||
) -> Result<(Vec<f32>, Duration), KokoroError>
|
||||
where
|
||||
P: AsRef<Vec<Vec<Vec<f32>>>>,
|
||||
S: AsRef<str>,
|
||||
{
|
||||
let model = model.upgrade().ok_or(KokoroError::ModelReleased)?;
|
||||
let mut phonemes = get_token_ids(phonemes.as_ref(), true);
|
||||
|
||||
let mut ret = Vec::new();
|
||||
let mut elapsed = Duration::ZERO;
|
||||
while let p = phonemes.drain(..min(pack.as_ref().len(), phonemes.len()))
|
||||
&& p.len() != 0
|
||||
{
|
||||
let phonemes = Array::from_shape_vec((1, p.len()), p.collect())?;
|
||||
let ref_s = pack.as_ref()[phonemes.len() - 1]
|
||||
.first()
|
||||
.cloned()
|
||||
.unwrap_or(vec![0.; 256]);
|
||||
|
||||
let style = Array::from_shape_vec((1, ref_s.len()), ref_s)?;
|
||||
let speed = Array::from_vec(vec![speed]);
|
||||
let options = RunOptions::new()?;
|
||||
let mut model = model.lock().await;
|
||||
let t = SystemTime::now();
|
||||
let kokoro_output = model
|
||||
.run_async(
|
||||
inputs![
|
||||
"input_ids" => TensorRef::from_array_view(&phonemes)?,
|
||||
"style" => TensorRef::from_array_view(&style)?,
|
||||
"speed" => TensorRef::from_array_view(&speed)?,
|
||||
],
|
||||
&options,
|
||||
)?
|
||||
.await?;
|
||||
elapsed = t.elapsed()?;
|
||||
let (_, audio) = kokoro_output["waveform"].try_extract_tensor::<f32>()?;
|
||||
let (_, _duration) = kokoro_output["duration"].try_extract_tensor::<i64>()?;
|
||||
// let _ = dbg!(duration.len());
|
||||
ret.extend_from_slice(audio);
|
||||
}
|
||||
|
||||
Ok((ret, elapsed))
|
||||
}
|
||||
|
||||
pub(super) async fn synth<P, S>(
|
||||
model: Weak<Mutex<Session>>,
|
||||
text: S,
|
||||
pack: P,
|
||||
voice: Voice,
|
||||
) -> Result<(Vec<f32>, Duration), KokoroError>
|
||||
where
|
||||
P: AsRef<Vec<Vec<Vec<f32>>>>,
|
||||
S: AsRef<str>,
|
||||
{
|
||||
let phonemes = g2p(text.as_ref(), voice.is_v11_supported())?;
|
||||
// #[cfg(debug_assertions)]
|
||||
// println!("{}", phonemes);
|
||||
match voice {
|
||||
v if v.is_v11_supported() => synth_v11(model, phonemes, pack, v.get_speed_v11()?).await,
|
||||
v if v.is_v10_supported() => synth_v10(model, phonemes, pack, v.get_speed_v10()?).await,
|
||||
v => Err(KokoroError::VoiceVersionInvalid(v.get_name().to_owned())),
|
||||
}
|
||||
}
|
||||
324
rust/vendor/kokoro-tts/src/tokenizer.rs
vendored
Normal file
324
rust/vendor/kokoro-tts/src/tokenizer.rs
vendored
Normal file
@@ -0,0 +1,324 @@
|
||||
use {
|
||||
log::warn,
|
||||
std::{collections::HashMap, sync::LazyLock},
|
||||
};
|
||||
static VOCAB_V10: LazyLock<HashMap<char, u8>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
|
||||
map.insert(';', 1);
|
||||
map.insert(':', 2);
|
||||
map.insert(',', 3);
|
||||
map.insert('.', 4);
|
||||
map.insert('!', 5);
|
||||
map.insert('?', 6);
|
||||
map.insert('—', 9);
|
||||
map.insert('…', 10);
|
||||
map.insert('"', 11);
|
||||
map.insert('(', 12);
|
||||
map.insert(')', 13);
|
||||
map.insert('“', 14);
|
||||
map.insert('”', 15);
|
||||
map.insert(' ', 16);
|
||||
map.insert('\u{0303}', 17); // Unicode escape for combining tilde
|
||||
map.insert('ʣ', 18);
|
||||
map.insert('ʥ', 19);
|
||||
map.insert('ʦ', 20);
|
||||
map.insert('ʨ', 21);
|
||||
map.insert('ᵝ', 22);
|
||||
map.insert('\u{AB67}', 23); // Unicode escape
|
||||
map.insert('A', 24);
|
||||
map.insert('I', 25);
|
||||
map.insert('O', 31);
|
||||
map.insert('Q', 33);
|
||||
map.insert('S', 35);
|
||||
map.insert('T', 36);
|
||||
map.insert('W', 39);
|
||||
map.insert('Y', 41);
|
||||
map.insert('ᵊ', 42);
|
||||
map.insert('a', 43);
|
||||
map.insert('b', 44);
|
||||
map.insert('c', 45);
|
||||
map.insert('d', 46);
|
||||
map.insert('e', 47);
|
||||
map.insert('f', 48);
|
||||
map.insert('h', 50);
|
||||
map.insert('i', 51);
|
||||
map.insert('j', 52);
|
||||
map.insert('k', 53);
|
||||
map.insert('l', 54);
|
||||
map.insert('m', 55);
|
||||
map.insert('n', 56);
|
||||
map.insert('o', 57);
|
||||
map.insert('p', 58);
|
||||
map.insert('q', 59);
|
||||
map.insert('r', 60);
|
||||
map.insert('s', 61);
|
||||
map.insert('t', 62);
|
||||
map.insert('u', 63);
|
||||
map.insert('v', 64);
|
||||
map.insert('w', 65);
|
||||
map.insert('x', 66);
|
||||
map.insert('y', 67);
|
||||
map.insert('z', 68);
|
||||
map.insert('ɑ', 69);
|
||||
map.insert('ɐ', 70);
|
||||
map.insert('ɒ', 71);
|
||||
map.insert('æ', 72);
|
||||
map.insert('β', 75);
|
||||
map.insert('ɔ', 76);
|
||||
map.insert('ɕ', 77);
|
||||
map.insert('ç', 78);
|
||||
map.insert('ɖ', 80);
|
||||
map.insert('ð', 81);
|
||||
map.insert('ʤ', 82);
|
||||
map.insert('ə', 83);
|
||||
map.insert('ɚ', 85);
|
||||
map.insert('ɛ', 86);
|
||||
map.insert('ɜ', 87);
|
||||
map.insert('ɟ', 90);
|
||||
map.insert('ɡ', 92);
|
||||
map.insert('ɥ', 99);
|
||||
map.insert('ɨ', 101);
|
||||
map.insert('ɪ', 102);
|
||||
map.insert('ʝ', 103);
|
||||
map.insert('ɯ', 110);
|
||||
map.insert('ɰ', 111);
|
||||
map.insert('ŋ', 112);
|
||||
map.insert('ɳ', 113);
|
||||
map.insert('ɲ', 114);
|
||||
map.insert('ɴ', 115);
|
||||
map.insert('ø', 116);
|
||||
map.insert('ɸ', 118);
|
||||
map.insert('θ', 119);
|
||||
map.insert('œ', 120);
|
||||
map.insert('ɹ', 123);
|
||||
map.insert('ɾ', 125);
|
||||
map.insert('ɻ', 126);
|
||||
map.insert('ʁ', 128);
|
||||
map.insert('ɽ', 129);
|
||||
map.insert('ʂ', 130);
|
||||
map.insert('ʃ', 131);
|
||||
map.insert('ʈ', 132);
|
||||
map.insert('ʧ', 133);
|
||||
map.insert('ʊ', 135);
|
||||
map.insert('ʋ', 136);
|
||||
map.insert('ʌ', 138);
|
||||
map.insert('ɣ', 139);
|
||||
map.insert('ɤ', 140);
|
||||
map.insert('χ', 142);
|
||||
map.insert('ʎ', 143);
|
||||
map.insert('ʒ', 147);
|
||||
map.insert('ʔ', 148);
|
||||
map.insert('ˈ', 156);
|
||||
map.insert('ˌ', 157);
|
||||
map.insert('ː', 158);
|
||||
map.insert('ʰ', 162);
|
||||
map.insert('ʲ', 164);
|
||||
map.insert('↓', 169);
|
||||
map.insert('→', 171);
|
||||
map.insert('↗', 172);
|
||||
map.insert('↘', 173);
|
||||
map.insert('ᵻ', 177);
|
||||
map
|
||||
});
|
||||
|
||||
static VOCAB_V11: LazyLock<HashMap<char, u8>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
|
||||
map.insert(';', 1);
|
||||
map.insert(':', 2);
|
||||
map.insert(',', 3);
|
||||
map.insert('.', 4);
|
||||
map.insert('!', 5);
|
||||
map.insert('?', 6);
|
||||
map.insert('/', 7);
|
||||
map.insert('—', 9);
|
||||
map.insert('…', 10);
|
||||
map.insert('"', 11);
|
||||
map.insert('(', 12);
|
||||
map.insert(')', 13);
|
||||
map.insert('“', 14);
|
||||
map.insert('”', 15);
|
||||
map.insert(' ', 16);
|
||||
map.insert('\u{0303}', 17); // Unicode escape for combining tilde
|
||||
map.insert('ʣ', 18);
|
||||
map.insert('ʥ', 19);
|
||||
map.insert('ʦ', 20);
|
||||
map.insert('ʨ', 21);
|
||||
map.insert('ᵝ', 22);
|
||||
map.insert('ㄓ', 23);
|
||||
map.insert('A', 24);
|
||||
map.insert('I', 25);
|
||||
map.insert('ㄅ', 30);
|
||||
map.insert('O', 31);
|
||||
map.insert('ㄆ', 32);
|
||||
map.insert('Q', 33);
|
||||
map.insert('R', 34);
|
||||
map.insert('S', 35);
|
||||
map.insert('T', 36);
|
||||
map.insert('ㄇ', 37);
|
||||
map.insert('ㄈ', 38);
|
||||
map.insert('W', 39);
|
||||
map.insert('ㄉ', 40);
|
||||
map.insert('Y', 41);
|
||||
map.insert('ᵊ', 42);
|
||||
map.insert('a', 43);
|
||||
map.insert('b', 44);
|
||||
map.insert('c', 45);
|
||||
map.insert('d', 46);
|
||||
map.insert('e', 47);
|
||||
map.insert('f', 48);
|
||||
map.insert('ㄊ', 49);
|
||||
map.insert('h', 50);
|
||||
map.insert('i', 51);
|
||||
map.insert('j', 52);
|
||||
map.insert('k', 53);
|
||||
map.insert('l', 54);
|
||||
map.insert('m', 55);
|
||||
map.insert('n', 56);
|
||||
map.insert('o', 57);
|
||||
map.insert('p', 58);
|
||||
map.insert('q', 59);
|
||||
map.insert('r', 60);
|
||||
map.insert('s', 61);
|
||||
map.insert('t', 62);
|
||||
map.insert('u', 63);
|
||||
map.insert('v', 64);
|
||||
map.insert('w', 65);
|
||||
map.insert('x', 66);
|
||||
map.insert('y', 67);
|
||||
map.insert('z', 68);
|
||||
map.insert('ɑ', 69);
|
||||
map.insert('ɐ', 70);
|
||||
map.insert('ɒ', 71);
|
||||
map.insert('æ', 72);
|
||||
map.insert('ㄋ', 73);
|
||||
map.insert('ㄌ', 74);
|
||||
map.insert('β', 75);
|
||||
map.insert('ɔ', 76);
|
||||
map.insert('ɕ', 77);
|
||||
map.insert('ç', 78);
|
||||
map.insert('ㄍ', 79);
|
||||
map.insert('ɖ', 80);
|
||||
map.insert('ð', 81);
|
||||
map.insert('ʤ', 82);
|
||||
map.insert('ə', 83);
|
||||
map.insert('ㄎ', 84);
|
||||
map.insert('ㄦ', 85);
|
||||
map.insert('ɛ', 86);
|
||||
map.insert('ɜ', 87);
|
||||
map.insert('ㄏ', 88);
|
||||
map.insert('ㄐ', 89);
|
||||
map.insert('ɟ', 90);
|
||||
map.insert('ㄑ', 91);
|
||||
map.insert('ɡ', 92);
|
||||
map.insert('ㄒ', 93);
|
||||
map.insert('ㄔ', 94);
|
||||
map.insert('ㄕ', 95);
|
||||
map.insert('ㄗ', 96);
|
||||
map.insert('ㄘ', 97);
|
||||
map.insert('ㄙ', 98);
|
||||
map.insert('月', 99);
|
||||
map.insert('ㄚ', 100);
|
||||
map.insert('ɨ', 101);
|
||||
map.insert('ɪ', 102);
|
||||
map.insert('ʝ', 103);
|
||||
map.insert('ㄛ', 104);
|
||||
map.insert('ㄝ', 105);
|
||||
map.insert('ㄞ', 106);
|
||||
map.insert('ㄟ', 107);
|
||||
map.insert('ㄠ', 108);
|
||||
map.insert('ㄡ', 109);
|
||||
map.insert('ɯ', 110);
|
||||
map.insert('ɰ', 111);
|
||||
map.insert('ŋ', 112);
|
||||
map.insert('ɳ', 113);
|
||||
map.insert('ɲ', 114);
|
||||
map.insert('ɴ', 115);
|
||||
map.insert('ø', 116);
|
||||
map.insert('ㄢ', 117);
|
||||
map.insert('ɸ', 118);
|
||||
map.insert('θ', 119);
|
||||
map.insert('œ', 120);
|
||||
map.insert('ㄣ', 121);
|
||||
map.insert('ㄤ', 122);
|
||||
map.insert('ɹ', 123);
|
||||
map.insert('ㄥ', 124);
|
||||
map.insert('ɾ', 125);
|
||||
map.insert('ㄖ', 126);
|
||||
map.insert('ㄧ', 127);
|
||||
map.insert('ʁ', 128);
|
||||
map.insert('ɽ', 129);
|
||||
map.insert('ʂ', 130);
|
||||
map.insert('ʃ', 131);
|
||||
map.insert('ʈ', 132);
|
||||
map.insert('ʧ', 133);
|
||||
map.insert('ㄨ', 134);
|
||||
map.insert('ʊ', 135);
|
||||
map.insert('ʋ', 136);
|
||||
map.insert('ㄩ', 137);
|
||||
map.insert('ʌ', 138);
|
||||
map.insert('ɣ', 139);
|
||||
map.insert('ㄜ', 140);
|
||||
map.insert('ㄭ', 141);
|
||||
map.insert('χ', 142);
|
||||
map.insert('ʎ', 143);
|
||||
map.insert('十', 144);
|
||||
map.insert('压', 145);
|
||||
map.insert('言', 146);
|
||||
map.insert('ʒ', 147);
|
||||
map.insert('ʔ', 148);
|
||||
map.insert('阳', 149);
|
||||
map.insert('要', 150);
|
||||
map.insert('阴', 151);
|
||||
map.insert('应', 152);
|
||||
map.insert('用', 153);
|
||||
map.insert('又', 154);
|
||||
map.insert('中', 155);
|
||||
map.insert('ˈ', 156);
|
||||
map.insert('ˌ', 157);
|
||||
map.insert('ː', 158);
|
||||
map.insert('穵', 159);
|
||||
map.insert('外', 160);
|
||||
map.insert('万', 161);
|
||||
map.insert('ʰ', 162);
|
||||
map.insert('王', 163);
|
||||
map.insert('ʲ', 164);
|
||||
map.insert('为', 165);
|
||||
map.insert('文', 166);
|
||||
map.insert('瓮', 167);
|
||||
map.insert('我', 168);
|
||||
map.insert('3', 169);
|
||||
map.insert('5', 170);
|
||||
map.insert('1', 171);
|
||||
map.insert('2', 172);
|
||||
map.insert('4', 173);
|
||||
map.insert('元', 175);
|
||||
map.insert('云', 176);
|
||||
map.insert('ᵻ', 177);
|
||||
map
|
||||
});
|
||||
|
||||
pub fn get_token_ids(phonemes: &str, v11: bool) -> Vec<i64> {
|
||||
let mut tokens = Vec::with_capacity(phonemes.len() + 2);
|
||||
tokens.push(0);
|
||||
|
||||
for i in phonemes.chars() {
|
||||
let v = if v11 {
|
||||
VOCAB_V11.get(&i).copied()
|
||||
} else {
|
||||
VOCAB_V10.get(&i).copied()
|
||||
};
|
||||
match v {
|
||||
Some(t) => {
|
||||
tokens.push(t as _);
|
||||
}
|
||||
_ => {
|
||||
warn!("Unknown phone {}, skipped.", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tokens.push(0);
|
||||
tokens
|
||||
}
|
||||
4
rust/vendor/kokoro-tts/src/transcription.rs
vendored
Normal file
4
rust/vendor/kokoro-tts/src/transcription.rs
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
mod en;
|
||||
mod zh;
|
||||
|
||||
pub use {en::*, zh::*};
|
||||
147
rust/vendor/kokoro-tts/src/transcription/en.rs
vendored
Normal file
147
rust/vendor/kokoro-tts/src/transcription/en.rs
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
use regex::Regex;
|
||||
use std::{collections::HashMap, sync::LazyLock};
|
||||
|
||||
static LETTERS_IPA_MAP: LazyLock<HashMap<char, &'static str>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert('a', "ɐ");
|
||||
map.insert('b', "bˈi");
|
||||
map.insert('c', "sˈi");
|
||||
map.insert('d', "dˈi");
|
||||
map.insert('e', "ˈi");
|
||||
map.insert('f', "ˈɛf");
|
||||
map.insert('g', "ʤˈi");
|
||||
map.insert('h', "ˈAʧ");
|
||||
map.insert('i', "ˈI");
|
||||
map.insert('j', "ʤˈA");
|
||||
map.insert('k', "kˈA");
|
||||
map.insert('l', "ˈɛl");
|
||||
map.insert('m', "ˈɛm");
|
||||
map.insert('n', "ˈɛn");
|
||||
map.insert('o', "ˈO");
|
||||
map.insert('p', "pˈi");
|
||||
map.insert('q', "kjˈu");
|
||||
map.insert('r', "ˈɑɹ");
|
||||
map.insert('s', "ˈɛs");
|
||||
map.insert('t', "tˈi");
|
||||
map.insert('u', "jˈu");
|
||||
map.insert('v', "vˈi");
|
||||
map.insert('w', "dˈʌbᵊlju");
|
||||
map.insert('x', "ˈɛks");
|
||||
map.insert('y', "wˈI");
|
||||
map.insert('z', "zˈi");
|
||||
map.insert('A', "ˈA");
|
||||
map.insert('B', "bˈi");
|
||||
map.insert('C', "sˈi");
|
||||
map.insert('D', "dˈi");
|
||||
map.insert('E', "ˈi");
|
||||
map.insert('F', "ˈɛf");
|
||||
map.insert('G', "ʤˈi");
|
||||
map.insert('H', "ˈAʧ");
|
||||
map.insert('I', "ˈI");
|
||||
map.insert('J', "ʤˈA");
|
||||
map.insert('K', "kˈA");
|
||||
map.insert('L', "ˈɛl");
|
||||
map.insert('M', "ˈɛm");
|
||||
map.insert('N', "ˈɛn");
|
||||
map.insert('O', "ˈO");
|
||||
map.insert('P', "pˈi");
|
||||
map.insert('Q', "kjˈu");
|
||||
map.insert('R', "ˈɑɹ");
|
||||
map.insert('S', "ˈɛs");
|
||||
map.insert('T', "tˈi");
|
||||
map.insert('U', "jˈu");
|
||||
map.insert('V', "vˈi");
|
||||
map.insert('W', "dˈʌbᵊlju");
|
||||
map.insert('X', "ˈɛks");
|
||||
map.insert('Y', "wˈI");
|
||||
map.insert('Z', "zˈi");
|
||||
map
|
||||
});
|
||||
static ARPA_IPA_MAP: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("AA", "ɑ");
|
||||
map.insert("AE", "æ");
|
||||
map.insert("AH", "ə");
|
||||
map.insert("AO", "ɔ");
|
||||
map.insert("AW", "aʊ");
|
||||
map.insert("AY", "aɪ");
|
||||
map.insert("B", "b");
|
||||
map.insert("CH", "tʃ");
|
||||
map.insert("D", "d");
|
||||
map.insert("DH", "ð");
|
||||
map.insert("EH", "ɛ");
|
||||
map.insert("ER", "ɝ");
|
||||
map.insert("EY", "eɪ");
|
||||
map.insert("F", "f");
|
||||
map.insert("G", "ɡ");
|
||||
map.insert("HH", "h");
|
||||
map.insert("IH", "ɪ");
|
||||
map.insert("IY", "i");
|
||||
map.insert("JH", "dʒ");
|
||||
map.insert("K", "k");
|
||||
map.insert("L", "l");
|
||||
map.insert("M", "m");
|
||||
map.insert("N", "n");
|
||||
map.insert("NG", "ŋ");
|
||||
map.insert("OW", "oʊ");
|
||||
map.insert("OY", "ɔɪ");
|
||||
map.insert("P", "p");
|
||||
map.insert("R", "ɹ");
|
||||
map.insert("S", "s");
|
||||
map.insert("SH", "ʃ");
|
||||
map.insert("T", "t");
|
||||
map.insert("TH", "θ");
|
||||
map.insert("UH", "ʊ");
|
||||
map.insert("UW", "u");
|
||||
map.insert("V", "v");
|
||||
map.insert("W", "w");
|
||||
map.insert("Y", "j");
|
||||
map.insert("Z", "z");
|
||||
map.insert("ZH", "ʒ");
|
||||
map.insert("SIL", "");
|
||||
map
|
||||
});
|
||||
|
||||
/// 支持2025新增符号(如:吸气音ʘ)
|
||||
const SPECIAL_CASES: [(&str, &str); 3] = [("CLICK!", "ʘ"), ("TSK!", "ǀ"), ("TUT!", "ǁ")];
|
||||
|
||||
pub fn arpa_to_ipa(arpa: &str) -> Result<String, regex::Error> {
|
||||
let re = Regex::new(r"([A-Z!]+)(\d*)")?;
|
||||
|
||||
let Some(caps) = re.captures(arpa) else {
|
||||
return Ok(Default::default());
|
||||
};
|
||||
|
||||
// 处理特殊符号(2025新增)
|
||||
if let Some(sc) = SPECIAL_CASES.iter().find(|&&(s, _)| s == &caps[1]) {
|
||||
return Ok(sc.1.to_string());
|
||||
}
|
||||
|
||||
// 获取IPA映射
|
||||
let phoneme = ARPA_IPA_MAP
|
||||
.get(&caps[1])
|
||||
.map_or_else(|| letters_to_ipa(arpa), |i| i.to_string());
|
||||
|
||||
let mut result = String::with_capacity(arpa.len() * 2);
|
||||
// 添加重音标记(支持三级重音)
|
||||
result.push(match &caps[2] {
|
||||
"1" => 'ˈ',
|
||||
"2" => 'ˌ',
|
||||
"3" => '˧', // 2025新增中级重音
|
||||
_ => '\0',
|
||||
});
|
||||
|
||||
result.push_str(&phoneme);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn letters_to_ipa(letters: &str) -> String {
|
||||
let mut res = String::with_capacity(letters.len());
|
||||
for i in letters.chars() {
|
||||
if let Some(p) = LETTERS_IPA_MAP.get(&i) {
|
||||
res.push_str(p);
|
||||
}
|
||||
}
|
||||
res
|
||||
}
|
||||
3597
rust/vendor/kokoro-tts/src/transcription/en_ipa.c
vendored
Normal file
3597
rust/vendor/kokoro-tts/src/transcription/en_ipa.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
364
rust/vendor/kokoro-tts/src/transcription/zh.rs
vendored
Normal file
364
rust/vendor/kokoro-tts/src/transcription/zh.rs
vendored
Normal file
@@ -0,0 +1,364 @@
|
||||
/// 汉语拼音到国际音标的转换
|
||||
/// 参考了python的misaki库的zh.py。
|
||||
use std::{collections::HashMap, error::Error, fmt, sync::LazyLock};
|
||||
|
||||
const VALID_FINALS: [&str; 37] = [
|
||||
"i", "u", "ü", "a", "ia", "ua", "o", "uo", "e", "ie", "üe", "ai", "uai", "ei", "uei", "ao",
|
||||
"iao", "ou", "iou", "an", "ian", "uan", "üan", "en", "in", "uen", "ün", "ang", "iang", "uang",
|
||||
"eng", "ing", "ueng", "ong", "iong", "er", "ê",
|
||||
];
|
||||
const INITIALS: [&str; 21] = [
|
||||
"zh", "ch", "sh", "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s",
|
||||
"t", "x", "z",
|
||||
];
|
||||
|
||||
// 错误类型定义
|
||||
#[derive(Debug)]
|
||||
pub enum PinyinError {
|
||||
FinalNotFound(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for PinyinError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
PinyinError::FinalNotFound(tip) => write!(f, "Final not found: {}", tip),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for PinyinError {}
|
||||
|
||||
static INITIAL_MAPPING: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
|
||||
map.insert("b", vec![vec!["p"]]);
|
||||
map.insert("c", vec![vec!["ʦʰ"]]);
|
||||
map.insert("ch", vec![vec!["ꭧʰ"]]);
|
||||
map.insert("d", vec![vec!["t"]]);
|
||||
map.insert("f", vec![vec!["f"]]);
|
||||
map.insert("g", vec![vec!["k"]]);
|
||||
map.insert("h", vec![vec!["x"], vec!["h"]]);
|
||||
map.insert("j", vec![vec!["ʨ"]]);
|
||||
map.insert("k", vec![vec!["kʰ"]]);
|
||||
map.insert("l", vec![vec!["l"]]);
|
||||
map.insert("m", vec![vec!["m"]]);
|
||||
map.insert("n", vec![vec!["n"]]);
|
||||
map.insert("p", vec![vec!["pʰ"]]);
|
||||
map.insert("q", vec![vec!["ʨʰ"]]);
|
||||
map.insert("r", vec![vec!["ɻ"], vec!["ʐ"]]);
|
||||
map.insert("s", vec![vec!["s"]]);
|
||||
map.insert("sh", vec![vec!["ʂ"]]);
|
||||
map.insert("t", vec![vec!["tʰ"]]);
|
||||
map.insert("x", vec![vec!["ɕ"]]);
|
||||
map.insert("z", vec![vec!["ʦ"]]);
|
||||
map.insert("zh", vec![vec!["ꭧ"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static SYLLABIC_CONSONANT_MAPPINGS: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("hm", vec![vec!["h", "m0"]]);
|
||||
map.insert("hng", vec![vec!["h", "ŋ0"]]);
|
||||
map.insert("m", vec![vec!["m0"]]);
|
||||
map.insert("n", vec![vec!["n0"]]);
|
||||
map.insert("ng", vec![vec!["ŋ0"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static INTERJECTION_MAPPINGS: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("io", vec![vec!["j", "ɔ0"]]);
|
||||
map.insert("ê", vec![vec!["ɛ0"]]);
|
||||
map.insert("er", vec![vec!["ɚ0"], vec!["aɚ̯0"]]);
|
||||
map.insert("o", vec![vec!["ɔ0"]]);
|
||||
map
|
||||
});
|
||||
|
||||
/// Duanmu (2000, p. 37) and Lin (2007, p. 68f)
|
||||
/// Diphtongs from Duanmu (2007, p. 40): au, əu, əi, ai
|
||||
/// Diphthongs from Lin (2007, p. 68f): au̯, ou̯, ei̯, ai̯
|
||||
static FINAL_MAPPING: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("a", vec![vec!["a0"]]);
|
||||
map.insert("ai", vec![vec!["ai0"]]);
|
||||
map.insert("an", vec![vec!["a0", "n"]]);
|
||||
map.insert("ang", vec![vec!["a0", "ŋ"]]);
|
||||
map.insert("ao", vec![vec!["au0"]]);
|
||||
map.insert("e", vec![vec!["ɤ0"]]);
|
||||
map.insert("ei", vec![vec!["ei0"]]);
|
||||
map.insert("en", vec![vec!["ə0", "n"]]);
|
||||
map.insert("eng", vec![vec!["ə0", "ŋ"]]);
|
||||
map.insert("i", vec![vec!["i0"]]);
|
||||
map.insert("ia", vec![vec!["j", "a0"]]);
|
||||
map.insert("ian", vec![vec!["j", "ɛ0", "n"]]);
|
||||
map.insert("iang", vec![vec!["j", "a0", "ŋ"]]);
|
||||
map.insert("iao", vec![vec!["j", "au0"]]);
|
||||
map.insert("ie", vec![vec!["j", "e0"]]);
|
||||
map.insert("in", vec![vec!["i0", "n"]]);
|
||||
map.insert("iou", vec![vec!["j", "ou0"]]);
|
||||
map.insert("ing", vec![vec!["i0", "ŋ"]]);
|
||||
map.insert("iong", vec![vec!["j", "ʊ0", "ŋ"]]);
|
||||
map.insert("ong", vec![vec!["ʊ0", "ŋ"]]);
|
||||
map.insert("ou", vec![vec!["ou0"]]);
|
||||
map.insert("u", vec![vec!["u0"]]);
|
||||
map.insert("uei", vec![vec!["w", "ei0"]]);
|
||||
map.insert("ua", vec![vec!["w", "a0"]]);
|
||||
map.insert("uai", vec![vec!["w", "ai0"]]);
|
||||
map.insert("uan", vec![vec!["w", "a0", "n"]]);
|
||||
map.insert("uen", vec![vec!["w", "ə0", "n"]]);
|
||||
map.insert("uang", vec![vec!["w", "a0", "ŋ"]]);
|
||||
map.insert("ueng", vec![vec!["w", "ə0", "ŋ"]]);
|
||||
map.insert("ui", vec![vec!["w", "ei0"]]);
|
||||
map.insert("un", vec![vec!["w", "ə0", "n"]]);
|
||||
map.insert("uo", vec![vec!["w", "o0"]]);
|
||||
map.insert("o", vec![vec!["w", "o0"]]); // 注意:这里'o'的映射可能与预期不符,根据注释可能需要特殊处理
|
||||
map.insert("ü", vec![vec!["y0"]]);
|
||||
map.insert("üe", vec![vec!["ɥ", "e0"]]);
|
||||
map.insert("üan", vec![vec!["ɥ", "ɛ0", "n"]]);
|
||||
map.insert("ün", vec![vec!["y0", "n"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static FINAL_MAPPING_AFTER_ZH_CH_SH_R: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("i", vec![vec!["ɻ0"], vec!["ʐ0"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static FINAL_MAPPING_AFTER_Z_C_S: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("i", vec![vec!["ɹ0"], vec!["z0"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static TONE_MAPPING: LazyLock<HashMap<u8, &'static str>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert(1u8, "˥");
|
||||
map.insert(2u8, "˧˥");
|
||||
map.insert(3u8, "˧˩˧");
|
||||
map.insert(4u8, "˥˩");
|
||||
map.insert(5u8, "");
|
||||
map
|
||||
});
|
||||
|
||||
pub(crate) fn split_tone(pinyin: &str) -> (&str, u8) {
|
||||
if let Some(t) = pinyin
|
||||
.chars()
|
||||
.last()
|
||||
.and_then(|c| c.to_digit(10).map(|n| n as u8))
|
||||
{
|
||||
return (&pinyin[..pinyin.len() - 1], t);
|
||||
}
|
||||
(pinyin, 5)
|
||||
}
|
||||
|
||||
/// uen 转换,还原原始的韵母
|
||||
/// iou,uei,uen前面加声母的时候,写成iu,ui,un。
|
||||
/// 例如niu(牛),gui(归),lun(论)。
|
||||
fn convert_uen(s: &str) -> String {
|
||||
match s.strip_suffix('n') {
|
||||
Some(stem) if stem.ends_with(['u', 'ū', 'ú', 'ǔ', 'ù']) => {
|
||||
format!("{}en", stem)
|
||||
}
|
||||
_ => s.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// ü 转换,还原原始的韵母
|
||||
/// ü行的韵母跟声母j,q,x拼的时候,写成ju(居),qu(区),xu(虚), ü上两点也省略;
|
||||
/// 但是跟声母n,l拼的时候,仍然写成nü(女),lü(吕)
|
||||
fn convert_uv(pinyin: &str) -> String {
|
||||
let chars = pinyin.chars().collect::<Vec<_>>();
|
||||
|
||||
match chars.as_slice() {
|
||||
[
|
||||
c @ ('j' | 'q' | 'x'),
|
||||
tone @ ('u' | 'ū' | 'ú' | 'ǔ' | 'ù'),
|
||||
rest @ ..,
|
||||
] => {
|
||||
let new_tone = match tone {
|
||||
'u' => 'ü',
|
||||
'ū' => 'ǖ',
|
||||
'ú' => 'ǘ',
|
||||
'ǔ' => 'ǚ',
|
||||
'ù' => 'ǜ',
|
||||
_ => unreachable!(),
|
||||
};
|
||||
format!("{}{}{}", c, new_tone, rest.iter().collect::<String>())
|
||||
}
|
||||
_ => pinyin.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// iou 转换,还原原始的韵母
|
||||
/// iou,uei,uen前面加声母的时候,写成iu,ui,un。
|
||||
/// 例如niu(牛),gui(归),lun(论)。
|
||||
fn convert_iou(pinyin: &str) -> String {
|
||||
let chars = pinyin.chars().collect::<Vec<_>>();
|
||||
|
||||
match chars.as_slice() {
|
||||
// 处理 iu 系列
|
||||
[.., 'i', u @ ('u' | 'ū' | 'ú' | 'ǔ' | 'ù')] => {
|
||||
format!("{}o{}", &pinyin[..pinyin.len() - 1], u)
|
||||
}
|
||||
|
||||
// 其他情况保持原样
|
||||
_ => pinyin.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// uei 转换,还原原始的韵母
|
||||
/// iou,uei,uen前面加声母的时候,写成iu,ui,un。
|
||||
/// 例如niu(牛),gui(归),lun(论)。
|
||||
fn convert_uei(pinyin: &str) -> String {
|
||||
let chars = pinyin.chars().collect::<Vec<_>>();
|
||||
|
||||
match chars.as_slice() {
|
||||
// 处理 ui 系列
|
||||
[.., 'u', i @ ('i' | 'ī' | 'í' | 'ǐ' | 'ì')] => {
|
||||
format!("{}e{}", &pinyin[..pinyin.len() - 1], i)
|
||||
}
|
||||
|
||||
// 其他情况保持原样
|
||||
_ => pinyin.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// 零声母转换,还原原始的韵母
|
||||
/// i行的韵母,前面没有声母的时候,写成yi(衣),ya(呀),ye(耶),yao(腰),you(忧),yan(烟),yin(因),yang(央),ying(英),yong(雍)。
|
||||
/// u行的韵母,前面没有声母的时候,写成wu(乌),wa(蛙),wo(窝),wai(歪),wei(威),wan(弯),wen(温),wang(汪),weng(翁)。
|
||||
/// ü行的韵母,前面没有声母的时候,写成yu(迂),yue(约),yuan(冤),yun(晕);ü上两点省略。"""
|
||||
pub(crate) fn convert_zero_consonant(pinyin: &str) -> String {
|
||||
let mut buffer = String::with_capacity(pinyin.len() + 2);
|
||||
let chars: Vec<char> = pinyin.chars().collect();
|
||||
|
||||
match chars.as_slice() {
|
||||
// 处理Y系转换
|
||||
['y', 'u', rest @ ..] => {
|
||||
buffer.push('ü');
|
||||
buffer.extend(rest.iter());
|
||||
}
|
||||
['y', u @ ('ū' | 'ú' | 'ǔ' | 'ù'), rest @ ..] => {
|
||||
buffer.push(match u {
|
||||
'ū' => 'ǖ', // ü 第一声
|
||||
'ú' => 'ǘ', // ü 第二声
|
||||
'ǔ' => 'ǚ', // ü 第三声
|
||||
'ù' => 'ǜ', // ü 第四声
|
||||
_ => unreachable!(),
|
||||
});
|
||||
buffer.extend(rest.iter());
|
||||
}
|
||||
['y', i @ ('i' | 'ī' | 'í' | 'ǐ' | 'ì'), rest @ ..] => {
|
||||
buffer.push(*i);
|
||||
buffer.extend(rest.iter());
|
||||
}
|
||||
['y', rest @ ..] => {
|
||||
buffer.push('i');
|
||||
buffer.extend(rest);
|
||||
}
|
||||
|
||||
// 处理W系转换
|
||||
['w', u @ ('u' | 'ū' | 'ú' | 'ǔ' | 'ù'), rest @ ..] => {
|
||||
buffer.push(*u);
|
||||
buffer.extend(rest.iter());
|
||||
}
|
||||
['w', rest @ ..] => {
|
||||
buffer.push('u');
|
||||
buffer.extend(rest);
|
||||
}
|
||||
|
||||
// 无需转换的情况
|
||||
_ => return pinyin.to_string(),
|
||||
}
|
||||
|
||||
// 有效性验证
|
||||
if VALID_FINALS.contains(&buffer.as_str()) {
|
||||
buffer
|
||||
} else {
|
||||
pinyin.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn split_initial(pinyin: &str) -> (&'static str, &str) {
|
||||
for &initial in &INITIALS {
|
||||
if let Some(stripped) = pinyin.strip_prefix(initial) {
|
||||
return (initial, stripped);
|
||||
}
|
||||
}
|
||||
("", pinyin)
|
||||
}
|
||||
|
||||
fn apply_tone(variants: &[Vec<&str>], tone: u8) -> Vec<Vec<String>> {
|
||||
let tone_str = TONE_MAPPING.get(&tone).unwrap_or(&"");
|
||||
variants
|
||||
.iter()
|
||||
.map(|v| v.iter().map(|s| s.replace("0", tone_str)).collect())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn pinyin_to_ipa(pinyin: &str) -> Result<Vec<Vec<String>>, PinyinError> {
|
||||
let (pinyin, tone) = split_tone(pinyin);
|
||||
let pinyin = convert_zero_consonant(pinyin);
|
||||
let pinyin = convert_uv(&pinyin);
|
||||
let pinyin = convert_iou(&pinyin);
|
||||
let pinyin = convert_uei(&pinyin);
|
||||
let pinyin = convert_uen(&pinyin);
|
||||
|
||||
// 处理特殊成音节辅音和感叹词
|
||||
if let Some(ipa) = SYLLABIC_CONSONANT_MAPPINGS.get(pinyin.as_str()) {
|
||||
return Ok(apply_tone(ipa, tone)
|
||||
.into_iter()
|
||||
.map(|i| i.into_iter().collect())
|
||||
.collect());
|
||||
}
|
||||
if let Some(ipa) = INTERJECTION_MAPPINGS.get(pinyin.as_str()) {
|
||||
return Ok(apply_tone(ipa, tone)
|
||||
.into_iter()
|
||||
.map(|i| i.into_iter().collect())
|
||||
.collect());
|
||||
}
|
||||
|
||||
// 分解声母韵母
|
||||
let (initial_part, final_part) = split_initial(pinyin.as_str());
|
||||
|
||||
// 获取韵母IPA
|
||||
let final_ipa = match initial_part {
|
||||
"zh" | "ch" | "sh" | "r" if FINAL_MAPPING_AFTER_ZH_CH_SH_R.contains_key(final_part) => {
|
||||
FINAL_MAPPING_AFTER_ZH_CH_SH_R.get(final_part)
|
||||
}
|
||||
"z" | "c" | "s" if FINAL_MAPPING_AFTER_Z_C_S.contains_key(final_part) => {
|
||||
FINAL_MAPPING_AFTER_Z_C_S.get(final_part)
|
||||
}
|
||||
_ => FINAL_MAPPING.get(final_part),
|
||||
}
|
||||
.ok_or(PinyinError::FinalNotFound(final_part.to_owned()))?;
|
||||
|
||||
// 组合所有可能
|
||||
let mut result = Vec::<Vec<String>>::new();
|
||||
let initials = INITIAL_MAPPING
|
||||
.get(initial_part)
|
||||
.map_or(vec![vec![Default::default()]], |i| {
|
||||
i.iter()
|
||||
.map(|i| i.iter().map(|i| i.to_string()).collect())
|
||||
.collect()
|
||||
});
|
||||
|
||||
for i in initials.into_iter() {
|
||||
for j in apply_tone(final_ipa, tone).into_iter() {
|
||||
result.push(
|
||||
i.iter()
|
||||
.chain(j.iter())
|
||||
.map(|i| i.to_owned())
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
673
rust/vendor/kokoro-tts/src/voice.rs
vendored
Normal file
673
rust/vendor/kokoro-tts/src/voice.rs
vendored
Normal file
@@ -0,0 +1,673 @@
|
||||
use crate::KokoroError;
|
||||
|
||||
//noinspection SpellCheckingInspection
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum Voice {
|
||||
// v1.0
|
||||
ZmYunyang(f32),
|
||||
ZfXiaoni(f32),
|
||||
AfJessica(f32),
|
||||
BfLily(f32),
|
||||
ZfXiaobei(f32),
|
||||
ZmYunxia(f32),
|
||||
AfHeart(f32),
|
||||
BfEmma(f32),
|
||||
AmPuck(f32),
|
||||
BfAlice(f32),
|
||||
HfAlpha(f32),
|
||||
BfIsabella(f32),
|
||||
AfNova(f32),
|
||||
AmFenrir(f32),
|
||||
EmAlex(f32),
|
||||
ImNicola(f32),
|
||||
PmAlex(f32),
|
||||
AfAlloy(f32),
|
||||
ZmYunxi(f32),
|
||||
AfSarah(f32),
|
||||
JfNezumi(f32),
|
||||
BmDaniel(f32),
|
||||
JfTebukuro(f32),
|
||||
JfAlpha(f32),
|
||||
JmKumo(f32),
|
||||
EmSanta(f32),
|
||||
AmLiam(f32),
|
||||
AmSanta(f32),
|
||||
AmEric(f32),
|
||||
BmFable(f32),
|
||||
AfBella(f32),
|
||||
BmLewis(f32),
|
||||
PfDora(f32),
|
||||
AfNicole(f32),
|
||||
BmGeorge(f32),
|
||||
AmOnyx(f32),
|
||||
HmPsi(f32),
|
||||
HfBeta(f32),
|
||||
HmOmega(f32),
|
||||
ZfXiaoxiao(f32),
|
||||
FfSiwis(f32),
|
||||
EfDora(f32),
|
||||
AfAoede(f32),
|
||||
AmEcho(f32),
|
||||
AmMichael(f32),
|
||||
AfKore(f32),
|
||||
ZfXiaoyi(f32),
|
||||
JfGongitsune(f32),
|
||||
AmAdam(f32),
|
||||
IfSara(f32),
|
||||
AfSky(f32),
|
||||
PmSanta(f32),
|
||||
AfRiver(f32),
|
||||
ZmYunjian(f32),
|
||||
|
||||
// v1.1
|
||||
Zm029(i32),
|
||||
Zf048(i32),
|
||||
Zf008(i32),
|
||||
Zm014(i32),
|
||||
Zf003(i32),
|
||||
Zf047(i32),
|
||||
Zm080(i32),
|
||||
Zf094(i32),
|
||||
Zf046(i32),
|
||||
Zm054(i32),
|
||||
Zf001(i32),
|
||||
Zm062(i32),
|
||||
BfVale(i32),
|
||||
Zf044(i32),
|
||||
Zf005(i32),
|
||||
Zf028(i32),
|
||||
Zf059(i32),
|
||||
Zm030(i32),
|
||||
Zf074(i32),
|
||||
Zm009(i32),
|
||||
Zf004(i32),
|
||||
Zf021(i32),
|
||||
Zm095(i32),
|
||||
Zm041(i32),
|
||||
Zf087(i32),
|
||||
Zf039(i32),
|
||||
Zm031(i32),
|
||||
Zf007(i32),
|
||||
Zf038(i32),
|
||||
Zf092(i32),
|
||||
Zm056(i32),
|
||||
Zf099(i32),
|
||||
Zm010(i32),
|
||||
Zm069(i32),
|
||||
Zm016(i32),
|
||||
Zm068(i32),
|
||||
Zf083(i32),
|
||||
Zf093(i32),
|
||||
Zf006(i32),
|
||||
Zf026(i32),
|
||||
Zm053(i32),
|
||||
Zm064(i32),
|
||||
AfSol(i32),
|
||||
Zf042(i32),
|
||||
Zf084(i32),
|
||||
Zf073(i32),
|
||||
Zf067(i32),
|
||||
Zm025(i32),
|
||||
Zm020(i32),
|
||||
Zm050(i32),
|
||||
Zf070(i32),
|
||||
Zf002(i32),
|
||||
Zf032(i32),
|
||||
Zm091(i32),
|
||||
Zm066(i32),
|
||||
Zm089(i32),
|
||||
Zm034(i32),
|
||||
Zm100(i32),
|
||||
Zf086(i32),
|
||||
Zf040(i32),
|
||||
Zm011(i32),
|
||||
Zm098(i32),
|
||||
Zm015(i32),
|
||||
Zf051(i32),
|
||||
Zm065(i32),
|
||||
Zf076(i32),
|
||||
Zf036(i32),
|
||||
Zm033(i32),
|
||||
Zf018(i32),
|
||||
Zf017(i32),
|
||||
Zf049(i32),
|
||||
AfMaple(i32),
|
||||
Zm082(i32),
|
||||
Zm057(i32),
|
||||
Zf079(i32),
|
||||
Zf022(i32),
|
||||
Zm063(i32),
|
||||
Zf060(i32),
|
||||
Zf019(i32),
|
||||
Zm097(i32),
|
||||
Zm096(i32),
|
||||
Zf023(i32),
|
||||
Zf027(i32),
|
||||
Zf085(i32),
|
||||
Zf077(i32),
|
||||
Zm035(i32),
|
||||
Zf088(i32),
|
||||
Zf024(i32),
|
||||
Zf072(i32),
|
||||
Zm055(i32),
|
||||
Zm052(i32),
|
||||
Zf071(i32),
|
||||
Zm061(i32),
|
||||
Zf078(i32),
|
||||
Zm013(i32),
|
||||
Zm081(i32),
|
||||
Zm037(i32),
|
||||
Zf090(i32),
|
||||
Zf043(i32),
|
||||
Zm058(i32),
|
||||
Zm012(i32),
|
||||
Zm045(i32),
|
||||
Zf075(i32),
|
||||
}
|
||||
|
||||
impl Voice {
|
||||
//noinspection SpellCheckingInspection
|
||||
pub(super) fn get_name(&self) -> &str {
|
||||
match self {
|
||||
Self::ZmYunyang(_) => "zm_yunyang",
|
||||
Self::ZfXiaoni(_) => "zf_xiaoni",
|
||||
Self::AfJessica(_) => "af_jessica",
|
||||
Self::BfLily(_) => "bf_lily",
|
||||
Self::ZfXiaobei(_) => "zf_xiaobei",
|
||||
Self::ZmYunxia(_) => "zm_yunxia",
|
||||
Self::AfHeart(_) => "af_heart",
|
||||
Self::BfEmma(_) => "bf_emma",
|
||||
Self::AmPuck(_) => "am_puck",
|
||||
Self::BfAlice(_) => "bf_alice",
|
||||
Self::HfAlpha(_) => "hf_alpha",
|
||||
Self::BfIsabella(_) => "bf_isabella",
|
||||
Self::AfNova(_) => "af_nova",
|
||||
Self::AmFenrir(_) => "am_fenrir",
|
||||
Self::EmAlex(_) => "em_alex",
|
||||
Self::ImNicola(_) => "im_nicola",
|
||||
Self::PmAlex(_) => "pm_alex",
|
||||
Self::AfAlloy(_) => "af_alloy",
|
||||
Self::ZmYunxi(_) => "zm_yunxi",
|
||||
Self::AfSarah(_) => "af_sarah",
|
||||
Self::JfNezumi(_) => "jf_nezumi",
|
||||
Self::BmDaniel(_) => "bm_daniel",
|
||||
Self::JfTebukuro(_) => "jf_tebukuro",
|
||||
Self::JfAlpha(_) => "jf_alpha",
|
||||
Self::JmKumo(_) => "jm_kumo",
|
||||
Self::EmSanta(_) => "em_santa",
|
||||
Self::AmLiam(_) => "am_liam",
|
||||
Self::AmSanta(_) => "am_santa",
|
||||
Self::AmEric(_) => "am_eric",
|
||||
Self::BmFable(_) => "bm_fable",
|
||||
Self::AfBella(_) => "af_bella",
|
||||
Self::BmLewis(_) => "bm_lewis",
|
||||
Self::PfDora(_) => "pf_dora",
|
||||
Self::AfNicole(_) => "af_nicole",
|
||||
Self::BmGeorge(_) => "bm_george",
|
||||
Self::AmOnyx(_) => "am_onyx",
|
||||
Self::HmPsi(_) => "hm_psi",
|
||||
Self::HfBeta(_) => "hf_beta",
|
||||
Self::HmOmega(_) => "hm_omega",
|
||||
Self::ZfXiaoxiao(_) => "zf_xiaoxiao",
|
||||
Self::FfSiwis(_) => "ff_siwis",
|
||||
Self::EfDora(_) => "ef_dora",
|
||||
Self::AfAoede(_) => "af_aoede",
|
||||
Self::AmEcho(_) => "am_echo",
|
||||
Self::AmMichael(_) => "am_michael",
|
||||
Self::AfKore(_) => "af_kore",
|
||||
Self::ZfXiaoyi(_) => "zf_xiaoyi",
|
||||
Self::JfGongitsune(_) => "jf_gongitsune",
|
||||
Self::AmAdam(_) => "am_adam",
|
||||
Self::IfSara(_) => "if_sara",
|
||||
Self::AfSky(_) => "af_sky",
|
||||
Self::PmSanta(_) => "pm_santa",
|
||||
Self::AfRiver(_) => "af_river",
|
||||
Self::ZmYunjian(_) => "zm_yunjian",
|
||||
Self::Zm029(_) => "zm_029",
|
||||
Self::Zf048(_) => "zf_048",
|
||||
Self::Zf008(_) => "zf_008",
|
||||
Self::Zm014(_) => "zm_014",
|
||||
Self::Zf003(_) => "zf_003",
|
||||
Self::Zf047(_) => "zf_047",
|
||||
Self::Zm080(_) => "zm_080",
|
||||
Self::Zf094(_) => "zf_094",
|
||||
Self::Zf046(_) => "zf_046",
|
||||
Self::Zm054(_) => "zm_054",
|
||||
Self::Zf001(_) => "zf_001",
|
||||
Self::Zm062(_) => "zm_062",
|
||||
Self::BfVale(_) => "bf_vale",
|
||||
Self::Zf044(_) => "zf_044",
|
||||
Self::Zf005(_) => "zf_005",
|
||||
Self::Zf028(_) => "zf_028",
|
||||
Self::Zf059(_) => "zf_059",
|
||||
Self::Zm030(_) => "zm_030",
|
||||
Self::Zf074(_) => "zf_074",
|
||||
Self::Zm009(_) => "zm_009",
|
||||
Self::Zf004(_) => "zf_004",
|
||||
Self::Zf021(_) => "zf_021",
|
||||
Self::Zm095(_) => "zm_095",
|
||||
Self::Zm041(_) => "zm_041",
|
||||
Self::Zf087(_) => "zf_087",
|
||||
Self::Zf039(_) => "zf_039",
|
||||
Self::Zm031(_) => "zm_031",
|
||||
Self::Zf007(_) => "zf_007",
|
||||
Self::Zf038(_) => "zf_038",
|
||||
Self::Zf092(_) => "zf_092",
|
||||
Self::Zm056(_) => "zm_056",
|
||||
Self::Zf099(_) => "zf_099",
|
||||
Self::Zm010(_) => "zm_010",
|
||||
Self::Zm069(_) => "zm_069",
|
||||
Self::Zm016(_) => "zm_016",
|
||||
Self::Zm068(_) => "zm_068",
|
||||
Self::Zf083(_) => "zf_083",
|
||||
Self::Zf093(_) => "zf_093",
|
||||
Self::Zf006(_) => "zf_006",
|
||||
Self::Zf026(_) => "zf_026",
|
||||
Self::Zm053(_) => "zm_053",
|
||||
Self::Zm064(_) => "zm_064",
|
||||
Self::AfSol(_) => "af_sol",
|
||||
Self::Zf042(_) => "zf_042",
|
||||
Self::Zf084(_) => "zf_084",
|
||||
Self::Zf073(_) => "zf_073",
|
||||
Self::Zf067(_) => "zf_067",
|
||||
Self::Zm025(_) => "zm_025",
|
||||
Self::Zm020(_) => "zm_020",
|
||||
Self::Zm050(_) => "zm_050",
|
||||
Self::Zf070(_) => "zf_070",
|
||||
Self::Zf002(_) => "zf_002",
|
||||
Self::Zf032(_) => "zf_032",
|
||||
Self::Zm091(_) => "zm_091",
|
||||
Self::Zm066(_) => "zm_066",
|
||||
Self::Zm089(_) => "zm_089",
|
||||
Self::Zm034(_) => "zm_034",
|
||||
Self::Zm100(_) => "zm_100",
|
||||
Self::Zf086(_) => "zf_086",
|
||||
Self::Zf040(_) => "zf_040",
|
||||
Self::Zm011(_) => "zm_011",
|
||||
Self::Zm098(_) => "zm_098",
|
||||
Self::Zm015(_) => "zm_015",
|
||||
Self::Zf051(_) => "zf_051",
|
||||
Self::Zm065(_) => "zm_065",
|
||||
Self::Zf076(_) => "zf_076",
|
||||
Self::Zf036(_) => "zf_036",
|
||||
Self::Zm033(_) => "zm_033",
|
||||
Self::Zf018(_) => "zf_018",
|
||||
Self::Zf017(_) => "zf_017",
|
||||
Self::Zf049(_) => "zf_049",
|
||||
Self::AfMaple(_) => "af_maple",
|
||||
Self::Zm082(_) => "zm_082",
|
||||
Self::Zm057(_) => "zm_057",
|
||||
Self::Zf079(_) => "zf_079",
|
||||
Self::Zf022(_) => "zf_022",
|
||||
Self::Zm063(_) => "zm_063",
|
||||
Self::Zf060(_) => "zf_060",
|
||||
Self::Zf019(_) => "zf_019",
|
||||
Self::Zm097(_) => "zm_097",
|
||||
Self::Zm096(_) => "zm_096",
|
||||
Self::Zf023(_) => "zf_023",
|
||||
Self::Zf027(_) => "zf_027",
|
||||
Self::Zf085(_) => "zf_085",
|
||||
Self::Zf077(_) => "zf_077",
|
||||
Self::Zm035(_) => "zm_035",
|
||||
Self::Zf088(_) => "zf_088",
|
||||
Self::Zf024(_) => "zf_024",
|
||||
Self::Zf072(_) => "zf_072",
|
||||
Self::Zm055(_) => "zm_055",
|
||||
Self::Zm052(_) => "zm_052",
|
||||
Self::Zf071(_) => "zf_071",
|
||||
Self::Zm061(_) => "zm_061",
|
||||
Self::Zf078(_) => "zf_078",
|
||||
Self::Zm013(_) => "zm_013",
|
||||
Self::Zm081(_) => "zm_081",
|
||||
Self::Zm037(_) => "zm_037",
|
||||
Self::Zf090(_) => "zf_090",
|
||||
Self::Zf043(_) => "zf_043",
|
||||
Self::Zm058(_) => "zm_058",
|
||||
Self::Zm012(_) => "zm_012",
|
||||
Self::Zm045(_) => "zm_045",
|
||||
Self::Zf075(_) => "zf_075",
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn is_v10_supported(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::ZmYunyang(_)
|
||||
| Self::ZfXiaoni(_)
|
||||
| Self::AfJessica(_)
|
||||
| Self::BfLily(_)
|
||||
| Self::ZfXiaobei(_)
|
||||
| Self::ZmYunxia(_)
|
||||
| Self::AfHeart(_)
|
||||
| Self::BfEmma(_)
|
||||
| Self::AmPuck(_)
|
||||
| Self::BfAlice(_)
|
||||
| Self::HfAlpha(_)
|
||||
| Self::BfIsabella(_)
|
||||
| Self::AfNova(_)
|
||||
| Self::AmFenrir(_)
|
||||
| Self::EmAlex(_)
|
||||
| Self::ImNicola(_)
|
||||
| Self::PmAlex(_)
|
||||
| Self::AfAlloy(_)
|
||||
| Self::ZmYunxi(_)
|
||||
| Self::AfSarah(_)
|
||||
| Self::JfNezumi(_)
|
||||
| Self::BmDaniel(_)
|
||||
| Self::JfTebukuro(_)
|
||||
| Self::JfAlpha(_)
|
||||
| Self::JmKumo(_)
|
||||
| Self::EmSanta(_)
|
||||
| Self::AmLiam(_)
|
||||
| Self::AmSanta(_)
|
||||
| Self::AmEric(_)
|
||||
| Self::BmFable(_)
|
||||
| Self::AfBella(_)
|
||||
| Self::BmLewis(_)
|
||||
| Self::PfDora(_)
|
||||
| Self::AfNicole(_)
|
||||
| Self::BmGeorge(_)
|
||||
| Self::AmOnyx(_)
|
||||
| Self::HmPsi(_)
|
||||
| Self::HfBeta(_)
|
||||
| Self::HmOmega(_)
|
||||
| Self::ZfXiaoxiao(_)
|
||||
| Self::FfSiwis(_)
|
||||
| Self::EfDora(_)
|
||||
| Self::AfAoede(_)
|
||||
| Self::AmEcho(_)
|
||||
| Self::AmMichael(_)
|
||||
| Self::AfKore(_)
|
||||
| Self::ZfXiaoyi(_)
|
||||
| Self::JfGongitsune(_)
|
||||
| Self::AmAdam(_)
|
||||
| Self::IfSara(_)
|
||||
| Self::AfSky(_)
|
||||
| Self::PmSanta(_)
|
||||
| Self::AfRiver(_)
|
||||
| Self::ZmYunjian(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn is_v11_supported(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::Zm029(_)
|
||||
| Self::Zf048(_)
|
||||
| Self::Zf008(_)
|
||||
| Self::Zm014(_)
|
||||
| Self::Zf003(_)
|
||||
| Self::Zf047(_)
|
||||
| Self::Zm080(_)
|
||||
| Self::Zf094(_)
|
||||
| Self::Zf046(_)
|
||||
| Self::Zm054(_)
|
||||
| Self::Zf001(_)
|
||||
| Self::Zm062(_)
|
||||
| Self::BfVale(_)
|
||||
| Self::Zf044(_)
|
||||
| Self::Zf005(_)
|
||||
| Self::Zf028(_)
|
||||
| Self::Zf059(_)
|
||||
| Self::Zm030(_)
|
||||
| Self::Zf074(_)
|
||||
| Self::Zm009(_)
|
||||
| Self::Zf004(_)
|
||||
| Self::Zf021(_)
|
||||
| Self::Zm095(_)
|
||||
| Self::Zm041(_)
|
||||
| Self::Zf087(_)
|
||||
| Self::Zf039(_)
|
||||
| Self::Zm031(_)
|
||||
| Self::Zf007(_)
|
||||
| Self::Zf038(_)
|
||||
| Self::Zf092(_)
|
||||
| Self::Zm056(_)
|
||||
| Self::Zf099(_)
|
||||
| Self::Zm010(_)
|
||||
| Self::Zm069(_)
|
||||
| Self::Zm016(_)
|
||||
| Self::Zm068(_)
|
||||
| Self::Zf083(_)
|
||||
| Self::Zf093(_)
|
||||
| Self::Zf006(_)
|
||||
| Self::Zf026(_)
|
||||
| Self::Zm053(_)
|
||||
| Self::Zm064(_)
|
||||
| Self::AfSol(_)
|
||||
| Self::Zf042(_)
|
||||
| Self::Zf084(_)
|
||||
| Self::Zf073(_)
|
||||
| Self::Zf067(_)
|
||||
| Self::Zm025(_)
|
||||
| Self::Zm020(_)
|
||||
| Self::Zm050(_)
|
||||
| Self::Zf070(_)
|
||||
| Self::Zf002(_)
|
||||
| Self::Zf032(_)
|
||||
| Self::Zm091(_)
|
||||
| Self::Zm066(_)
|
||||
| Self::Zm089(_)
|
||||
| Self::Zm034(_)
|
||||
| Self::Zm100(_)
|
||||
| Self::Zf086(_)
|
||||
| Self::Zf040(_)
|
||||
| Self::Zm011(_)
|
||||
| Self::Zm098(_)
|
||||
| Self::Zm015(_)
|
||||
| Self::Zf051(_)
|
||||
| Self::Zm065(_)
|
||||
| Self::Zf076(_)
|
||||
| Self::Zf036(_)
|
||||
| Self::Zm033(_)
|
||||
| Self::Zf018(_)
|
||||
| Self::Zf017(_)
|
||||
| Self::Zf049(_)
|
||||
| Self::AfMaple(_)
|
||||
| Self::Zm082(_)
|
||||
| Self::Zm057(_)
|
||||
| Self::Zf079(_)
|
||||
| Self::Zf022(_)
|
||||
| Self::Zm063(_)
|
||||
| Self::Zf060(_)
|
||||
| Self::Zf019(_)
|
||||
| Self::Zm097(_)
|
||||
| Self::Zm096(_)
|
||||
| Self::Zf023(_)
|
||||
| Self::Zf027(_)
|
||||
| Self::Zf085(_)
|
||||
| Self::Zf077(_)
|
||||
| Self::Zm035(_)
|
||||
| Self::Zf088(_)
|
||||
| Self::Zf024(_)
|
||||
| Self::Zf072(_)
|
||||
| Self::Zm055(_)
|
||||
| Self::Zm052(_)
|
||||
| Self::Zf071(_)
|
||||
| Self::Zm061(_)
|
||||
| Self::Zf078(_)
|
||||
| Self::Zm013(_)
|
||||
| Self::Zm081(_)
|
||||
| Self::Zm037(_)
|
||||
| Self::Zf090(_)
|
||||
| Self::Zf043(_)
|
||||
| Self::Zm058(_)
|
||||
| Self::Zm012(_)
|
||||
| Self::Zm045(_)
|
||||
| Self::Zf075(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn get_speed_v10(&self) -> Result<f32, KokoroError> {
|
||||
match self {
|
||||
Self::ZmYunyang(v)
|
||||
| Self::ZfXiaoni(v)
|
||||
| Self::AfJessica(v)
|
||||
| Self::BfLily(v)
|
||||
| Self::ZfXiaobei(v)
|
||||
| Self::ZmYunxia(v)
|
||||
| Self::AfHeart(v)
|
||||
| Self::BfEmma(v)
|
||||
| Self::AmPuck(v)
|
||||
| Self::BfAlice(v)
|
||||
| Self::HfAlpha(v)
|
||||
| Self::BfIsabella(v)
|
||||
| Self::AfNova(v)
|
||||
| Self::AmFenrir(v)
|
||||
| Self::EmAlex(v)
|
||||
| Self::ImNicola(v)
|
||||
| Self::PmAlex(v)
|
||||
| Self::AfAlloy(v)
|
||||
| Self::ZmYunxi(v)
|
||||
| Self::AfSarah(v)
|
||||
| Self::JfNezumi(v)
|
||||
| Self::BmDaniel(v)
|
||||
| Self::JfTebukuro(v)
|
||||
| Self::JfAlpha(v)
|
||||
| Self::JmKumo(v)
|
||||
| Self::EmSanta(v)
|
||||
| Self::AmLiam(v)
|
||||
| Self::AmSanta(v)
|
||||
| Self::AmEric(v)
|
||||
| Self::BmFable(v)
|
||||
| Self::AfBella(v)
|
||||
| Self::BmLewis(v)
|
||||
| Self::PfDora(v)
|
||||
| Self::AfNicole(v)
|
||||
| Self::BmGeorge(v)
|
||||
| Self::AmOnyx(v)
|
||||
| Self::HmPsi(v)
|
||||
| Self::HfBeta(v)
|
||||
| Self::HmOmega(v)
|
||||
| Self::ZfXiaoxiao(v)
|
||||
| Self::FfSiwis(v)
|
||||
| Self::EfDora(v)
|
||||
| Self::AfAoede(v)
|
||||
| Self::AmEcho(v)
|
||||
| Self::AmMichael(v)
|
||||
| Self::AfKore(v)
|
||||
| Self::ZfXiaoyi(v)
|
||||
| Self::JfGongitsune(v)
|
||||
| Self::AmAdam(v)
|
||||
| Self::IfSara(v)
|
||||
| Self::AfSky(v)
|
||||
| Self::PmSanta(v)
|
||||
| Self::AfRiver(v)
|
||||
| Self::ZmYunjian(v) => Ok(*v),
|
||||
_ => Err(KokoroError::VoiceVersionInvalid(
|
||||
"Expect version 1.0".to_owned(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn get_speed_v11(&self) -> Result<i32, KokoroError> {
|
||||
match self {
|
||||
Self::Zm029(v)
|
||||
| Self::Zf048(v)
|
||||
| Self::Zf008(v)
|
||||
| Self::Zm014(v)
|
||||
| Self::Zf003(v)
|
||||
| Self::Zf047(v)
|
||||
| Self::Zm080(v)
|
||||
| Self::Zf094(v)
|
||||
| Self::Zf046(v)
|
||||
| Self::Zm054(v)
|
||||
| Self::Zf001(v)
|
||||
| Self::Zm062(v)
|
||||
| Self::BfVale(v)
|
||||
| Self::Zf044(v)
|
||||
| Self::Zf005(v)
|
||||
| Self::Zf028(v)
|
||||
| Self::Zf059(v)
|
||||
| Self::Zm030(v)
|
||||
| Self::Zf074(v)
|
||||
| Self::Zm009(v)
|
||||
| Self::Zf004(v)
|
||||
| Self::Zf021(v)
|
||||
| Self::Zm095(v)
|
||||
| Self::Zm041(v)
|
||||
| Self::Zf087(v)
|
||||
| Self::Zf039(v)
|
||||
| Self::Zm031(v)
|
||||
| Self::Zf007(v)
|
||||
| Self::Zf038(v)
|
||||
| Self::Zf092(v)
|
||||
| Self::Zm056(v)
|
||||
| Self::Zf099(v)
|
||||
| Self::Zm010(v)
|
||||
| Self::Zm069(v)
|
||||
| Self::Zm016(v)
|
||||
| Self::Zm068(v)
|
||||
| Self::Zf083(v)
|
||||
| Self::Zf093(v)
|
||||
| Self::Zf006(v)
|
||||
| Self::Zf026(v)
|
||||
| Self::Zm053(v)
|
||||
| Self::Zm064(v)
|
||||
| Self::AfSol(v)
|
||||
| Self::Zf042(v)
|
||||
| Self::Zf084(v)
|
||||
| Self::Zf073(v)
|
||||
| Self::Zf067(v)
|
||||
| Self::Zm025(v)
|
||||
| Self::Zm020(v)
|
||||
| Self::Zm050(v)
|
||||
| Self::Zf070(v)
|
||||
| Self::Zf002(v)
|
||||
| Self::Zf032(v)
|
||||
| Self::Zm091(v)
|
||||
| Self::Zm066(v)
|
||||
| Self::Zm089(v)
|
||||
| Self::Zm034(v)
|
||||
| Self::Zm100(v)
|
||||
| Self::Zf086(v)
|
||||
| Self::Zf040(v)
|
||||
| Self::Zm011(v)
|
||||
| Self::Zm098(v)
|
||||
| Self::Zm015(v)
|
||||
| Self::Zf051(v)
|
||||
| Self::Zm065(v)
|
||||
| Self::Zf076(v)
|
||||
| Self::Zf036(v)
|
||||
| Self::Zm033(v)
|
||||
| Self::Zf018(v)
|
||||
| Self::Zf017(v)
|
||||
| Self::Zf049(v)
|
||||
| Self::AfMaple(v)
|
||||
| Self::Zm082(v)
|
||||
| Self::Zm057(v)
|
||||
| Self::Zf079(v)
|
||||
| Self::Zf022(v)
|
||||
| Self::Zm063(v)
|
||||
| Self::Zf060(v)
|
||||
| Self::Zf019(v)
|
||||
| Self::Zm097(v)
|
||||
| Self::Zm096(v)
|
||||
| Self::Zf023(v)
|
||||
| Self::Zf027(v)
|
||||
| Self::Zf085(v)
|
||||
| Self::Zf077(v)
|
||||
| Self::Zm035(v)
|
||||
| Self::Zf088(v)
|
||||
| Self::Zf024(v)
|
||||
| Self::Zf072(v)
|
||||
| Self::Zm055(v)
|
||||
| Self::Zm052(v)
|
||||
| Self::Zf071(v)
|
||||
| Self::Zm061(v)
|
||||
| Self::Zf078(v)
|
||||
| Self::Zm013(v)
|
||||
| Self::Zm081(v)
|
||||
| Self::Zm037(v)
|
||||
| Self::Zf090(v)
|
||||
| Self::Zf043(v)
|
||||
| Self::Zm058(v)
|
||||
| Self::Zm012(v)
|
||||
| Self::Zm045(v)
|
||||
| Self::Zf075(v) => Ok(*v),
|
||||
_ => Err(KokoroError::VoiceVersionInvalid(
|
||||
"Expect version 1.1".to_owned(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: 'siprouter',
|
||||
version: '1.10.0',
|
||||
version: '1.25.2',
|
||||
description: 'undefined'
|
||||
}
|
||||
|
||||
@@ -1,332 +0,0 @@
|
||||
/**
|
||||
* TTS announcement module — pre-generates audio announcements using espeak-ng
|
||||
* and caches them as encoded RTP packets for playback during call setup.
|
||||
*
|
||||
* On startup, generates the announcement WAV via espeak-ng (formant-based TTS
|
||||
* with highly accurate pronunciation), encodes each 20ms frame to G.722 (for
|
||||
* SIP) and Opus (for WebRTC) via the Rust transcoder, and caches the packets.
|
||||
*
|
||||
* Falls back to the Rust tts-engine (Kokoro neural TTS) if espeak-ng is not
|
||||
* installed, and disables announcements if neither is available.
|
||||
*/
|
||||
|
||||
import { execSync } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import { buildRtpHeader, rtpClockIncrement } from './call/leg.ts';
|
||||
import { encodePcm, isCodecReady } from './opusbridge.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A pre-encoded announcement ready for RTP playback. */
|
||||
export interface IAnnouncementCache {
|
||||
/** G.722 encoded frames (each is a 20ms frame payload, no RTP header). */
|
||||
g722Frames: Buffer[];
|
||||
/** Opus encoded frames for WebRTC playback. */
|
||||
opusFrames: Buffer[];
|
||||
/** Total duration in milliseconds. */
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// State
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let cachedAnnouncement: IAnnouncementCache | null = null;
|
||||
|
||||
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
|
||||
const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now.";
|
||||
const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav');
|
||||
|
||||
// Kokoro fallback constants.
|
||||
const KOKORO_MODEL = 'kokoro-v1.0.onnx';
|
||||
const KOKORO_VOICES = 'voices.bin';
|
||||
const KOKORO_VOICE = 'af_bella';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Initialization
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Check if espeak-ng is available on the system.
|
||||
*/
|
||||
function isEspeakAvailable(): boolean {
|
||||
try {
|
||||
execSync('which espeak-ng', { stdio: 'pipe' });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate announcement WAV via espeak-ng (primary engine).
|
||||
* Returns true on success.
|
||||
*/
|
||||
function generateViaEspeak(wavPath: string, text: string, log: (msg: string) => void): boolean {
|
||||
log('[tts] generating announcement audio via espeak-ng...');
|
||||
try {
|
||||
execSync(
|
||||
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
|
||||
{ timeout: 10000, stdio: 'pipe' },
|
||||
);
|
||||
log('[tts] espeak-ng WAV generated');
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log(`[tts] espeak-ng failed: ${e.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate announcement WAV via Kokoro TTS (fallback engine).
|
||||
* Returns true on success.
|
||||
*/
|
||||
function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): boolean {
|
||||
const modelPath = path.join(TTS_DIR, KOKORO_MODEL);
|
||||
const voicesPath = path.join(TTS_DIR, KOKORO_VOICES);
|
||||
|
||||
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) {
|
||||
log('[tts] Kokoro model/voices not found — Kokoro fallback unavailable');
|
||||
return false;
|
||||
}
|
||||
|
||||
const root = process.cwd();
|
||||
const ttsBinPaths = [
|
||||
path.join(root, 'dist_rust', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'release', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
|
||||
];
|
||||
const ttsBin = ttsBinPaths.find((p) => fs.existsSync(p));
|
||||
if (!ttsBin) {
|
||||
log('[tts] tts-engine binary not found — Kokoro fallback unavailable');
|
||||
return false;
|
||||
}
|
||||
|
||||
log('[tts] generating announcement audio via Kokoro TTS (fallback)...');
|
||||
try {
|
||||
execSync(
|
||||
`"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${KOKORO_VOICE}" --output "${wavPath}" --text "${text}"`,
|
||||
{ timeout: 120000, stdio: 'pipe' },
|
||||
);
|
||||
log('[tts] Kokoro WAV generated');
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log(`[tts] Kokoro failed: ${e.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a WAV file and detect its sample rate from the fmt chunk.
|
||||
* Returns { pcm, sampleRate } or null on failure.
|
||||
*/
|
||||
function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
|
||||
const wav = fs.readFileSync(wavPath);
|
||||
if (wav.length < 44) return null;
|
||||
if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
|
||||
if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
|
||||
|
||||
let sampleRate = 22050; // default
|
||||
let offset = 12;
|
||||
let pcm: Buffer | null = null;
|
||||
|
||||
while (offset < wav.length - 8) {
|
||||
const chunkId = wav.toString('ascii', offset, offset + 4);
|
||||
const chunkSize = wav.readUInt32LE(offset + 4);
|
||||
if (chunkId === 'fmt ') {
|
||||
sampleRate = wav.readUInt32LE(offset + 12);
|
||||
}
|
||||
if (chunkId === 'data') {
|
||||
pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
|
||||
}
|
||||
offset += 8 + chunkSize;
|
||||
if (offset % 2 !== 0) offset++;
|
||||
}
|
||||
|
||||
if (!pcm) return null;
|
||||
return { pcm, sampleRate };
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-generate the announcement audio and encode to G.722 + Opus frames.
|
||||
* Must be called after the codec bridge is initialized.
|
||||
*
|
||||
* Engine priority: espeak-ng → Kokoro → disabled.
|
||||
*/
|
||||
export async function initAnnouncement(log: (msg: string) => void): Promise<boolean> {
|
||||
fs.mkdirSync(TTS_DIR, { recursive: true });
|
||||
|
||||
try {
|
||||
// Generate WAV if not cached.
|
||||
if (!fs.existsSync(CACHE_WAV)) {
|
||||
let generated = false;
|
||||
|
||||
// Try espeak-ng first.
|
||||
if (isEspeakAvailable()) {
|
||||
generated = generateViaEspeak(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
||||
} else {
|
||||
log('[tts] espeak-ng not installed — trying Kokoro fallback');
|
||||
}
|
||||
|
||||
// Fall back to Kokoro.
|
||||
if (!generated) {
|
||||
generated = generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
||||
}
|
||||
|
||||
if (!generated) {
|
||||
log('[tts] no TTS engine available — announcements disabled');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Read WAV and extract raw PCM + sample rate.
|
||||
const result = readWavWithRate(CACHE_WAV);
|
||||
if (!result) {
|
||||
log('[tts] failed to parse WAV file');
|
||||
return false;
|
||||
}
|
||||
|
||||
const { pcm, sampleRate } = result;
|
||||
|
||||
// Wait for codec bridge to be ready.
|
||||
if (!isCodecReady()) {
|
||||
log('[tts] codec bridge not ready — will retry');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Encode in 20ms chunks. The Rust encoder resamples to each codec's native rate.
|
||||
const FRAME_SAMPLES = Math.floor(sampleRate * 0.02);
|
||||
const FRAME_BYTES = FRAME_SAMPLES * 2; // 16-bit = 2 bytes per sample
|
||||
const totalFrames = Math.floor(pcm.length / FRAME_BYTES);
|
||||
|
||||
const g722Frames: Buffer[] = [];
|
||||
const opusFrames: Buffer[] = [];
|
||||
|
||||
log(`[tts] encoding ${totalFrames} frames (${FRAME_SAMPLES} samples/frame @ ${sampleRate}Hz)...`);
|
||||
for (let i = 0; i < totalFrames; i++) {
|
||||
const framePcm = pcm.subarray(i * FRAME_BYTES, (i + 1) * FRAME_BYTES);
|
||||
const pcmBuf = Buffer.from(framePcm);
|
||||
const [g722, opus] = await Promise.all([
|
||||
encodePcm(pcmBuf, sampleRate, 9), // G.722 for SIP devices
|
||||
encodePcm(pcmBuf, sampleRate, 111), // Opus for WebRTC browsers
|
||||
]);
|
||||
if (g722) g722Frames.push(g722);
|
||||
if (opus) opusFrames.push(opus);
|
||||
if (!g722 && !opus && i < 3) log(`[tts] frame ${i} encode failed`);
|
||||
}
|
||||
|
||||
cachedAnnouncement = {
|
||||
g722Frames,
|
||||
opusFrames,
|
||||
durationMs: totalFrames * 20,
|
||||
};
|
||||
|
||||
log(`[tts] announcement cached: ${g722Frames.length} frames (${(totalFrames * 20 / 1000).toFixed(1)}s)`);
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log(`[tts] init error: ${e.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Playback
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Play the pre-cached announcement to an RTP endpoint.
|
||||
*
|
||||
* @param sendPacket - function to send a raw RTP packet
|
||||
* @param ssrc - SSRC to use in RTP headers
|
||||
* @param onDone - called when the announcement finishes
|
||||
* @returns a cancel function, or null if no announcement is cached
|
||||
*/
|
||||
export function playAnnouncement(
|
||||
sendPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (!cachedAnnouncement || cachedAnnouncement.g722Frames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = cachedAnnouncement.g722Frames;
|
||||
const PT = 9; // G.722
|
||||
let frameIdx = 0;
|
||||
let seq = Math.floor(Math.random() * 0xffff);
|
||||
let rtpTs = Math.floor(Math.random() * 0xffffffff);
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendPacket(pkt);
|
||||
|
||||
seq++;
|
||||
rtpTs += rtpClockIncrement(PT);
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
// Return cancel function.
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Play pre-cached Opus announcement to a WebRTC PeerConnection sender.
|
||||
*
|
||||
* @param sendRtpPacket - function to send a raw RTP packet via sender.sendRtp()
|
||||
* @param ssrc - SSRC to use in RTP headers
|
||||
* @param onDone - called when announcement finishes
|
||||
* @returns cancel function, or null if no announcement cached
|
||||
*/
|
||||
export function playAnnouncementToWebRtc(
|
||||
sendRtpPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
counters: { seq: number; ts: number },
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (!cachedAnnouncement || cachedAnnouncement.opusFrames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = cachedAnnouncement.opusFrames;
|
||||
const PT = 111; // Opus
|
||||
let frameIdx = 0;
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendRtpPacket(pkt);
|
||||
|
||||
counters.seq++;
|
||||
counters.ts += 960; // Opus at 48kHz: 960 samples per 20ms
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
|
||||
/** Check if an announcement is cached and ready. */
|
||||
export function isAnnouncementReady(): boolean {
|
||||
return cachedAnnouncement !== null && cachedAnnouncement.g722Frames.length > 0;
|
||||
}
|
||||
|
||||
@@ -1,323 +0,0 @@
|
||||
/**
|
||||
* Audio recorder — captures RTP packets from a single direction,
|
||||
* decodes them to PCM, and writes a WAV file.
|
||||
*
|
||||
* Uses the Rust codec bridge to transcode incoming audio (G.722, Opus,
|
||||
* PCMU, PCMA) to PCMU, then decodes mu-law to 16-bit PCM in TypeScript.
|
||||
* Output: 8kHz 16-bit mono WAV (standard telephony quality).
|
||||
*
|
||||
* Supports:
|
||||
* - Max recording duration limit
|
||||
* - Silence detection (stop after N seconds of silence)
|
||||
* - Manual stop
|
||||
* - DTMF packets (PT 101) are automatically skipped
|
||||
*/
|
||||
|
||||
import { Buffer } from 'node:buffer';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { WavWriter } from './wav-writer.ts';
|
||||
import type { IWavWriterResult } from './wav-writer.ts';
|
||||
import { transcode, createSession, destroySession } from '../opusbridge.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface IRecordingOptions {
|
||||
/** Output directory for WAV files. */
|
||||
outputDir: string;
|
||||
/** Target sample rate for the WAV output (default 8000). */
|
||||
sampleRate?: number;
|
||||
/** Maximum recording duration in seconds. 0 = unlimited. Default 120. */
|
||||
maxDurationSec?: number;
|
||||
/** Stop after this many consecutive seconds of silence. 0 = disabled. Default 5. */
|
||||
silenceTimeoutSec?: number;
|
||||
/** Silence threshold: max PCM amplitude below this is "silent". Default 200. */
|
||||
silenceThreshold?: number;
|
||||
/** Logging function. */
|
||||
log: (msg: string) => void;
|
||||
}
|
||||
|
||||
export interface IRecordingResult {
|
||||
/** Full path to the WAV file. */
|
||||
filePath: string;
|
||||
/** Duration in milliseconds. */
|
||||
durationMs: number;
|
||||
/** Sample rate of the WAV. */
|
||||
sampleRate: number;
|
||||
/** Size of the WAV file in bytes. */
|
||||
fileSize: number;
|
||||
/** Why the recording was stopped. */
|
||||
stopReason: TRecordingStopReason;
|
||||
}
|
||||
|
||||
export type TRecordingStopReason = 'manual' | 'max-duration' | 'silence' | 'cancelled';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mu-law decode table (ITU-T G.711)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Pre-computed mu-law → 16-bit linear PCM lookup table (256 entries). */
|
||||
const MULAW_DECODE: Int16Array = buildMulawDecodeTable();
|
||||
|
||||
function buildMulawDecodeTable(): Int16Array {
|
||||
const table = new Int16Array(256);
|
||||
for (let i = 0; i < 256; i++) {
|
||||
// Invert all bits per mu-law standard.
|
||||
let mu = ~i & 0xff;
|
||||
const sign = mu & 0x80;
|
||||
const exponent = (mu >> 4) & 0x07;
|
||||
const mantissa = mu & 0x0f;
|
||||
let magnitude = ((mantissa << 1) + 33) << (exponent + 2);
|
||||
magnitude -= 0x84; // Bias adjustment
|
||||
table[i] = sign ? -magnitude : magnitude;
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
/** Decode a PCMU payload to 16-bit LE PCM. */
|
||||
function decodeMulaw(mulaw: Buffer): Buffer {
|
||||
const pcm = Buffer.alloc(mulaw.length * 2);
|
||||
for (let i = 0; i < mulaw.length; i++) {
|
||||
pcm.writeInt16LE(MULAW_DECODE[mulaw[i]], i * 2);
|
||||
}
|
||||
return pcm;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// AudioRecorder
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class AudioRecorder {
|
||||
/** Current state. */
|
||||
state: 'idle' | 'recording' | 'stopped' = 'idle';
|
||||
|
||||
/** Called when recording stops automatically (silence or max duration). */
|
||||
onStopped: ((result: IRecordingResult) => void) | null = null;
|
||||
|
||||
private outputDir: string;
|
||||
private sampleRate: number;
|
||||
private maxDurationSec: number;
|
||||
private silenceTimeoutSec: number;
|
||||
private silenceThreshold: number;
|
||||
private log: (msg: string) => void;
|
||||
|
||||
private wavWriter: WavWriter | null = null;
|
||||
private filePath: string = '';
|
||||
private codecSessionId: string | null = null;
|
||||
private stopReason: TRecordingStopReason = 'manual';
|
||||
|
||||
// Silence detection.
|
||||
private consecutiveSilentFrames = 0;
|
||||
/** Number of 20ms frames that constitute silence timeout. */
|
||||
private silenceFrameThreshold = 0;
|
||||
|
||||
// Max duration timer.
|
||||
private maxDurationTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
// Processing queue to avoid concurrent transcodes.
|
||||
private processQueue: Promise<void> = Promise.resolve();
|
||||
|
||||
constructor(options: IRecordingOptions) {
|
||||
this.outputDir = options.outputDir;
|
||||
this.sampleRate = options.sampleRate ?? 8000;
|
||||
this.maxDurationSec = options.maxDurationSec ?? 120;
|
||||
this.silenceTimeoutSec = options.silenceTimeoutSec ?? 5;
|
||||
this.silenceThreshold = options.silenceThreshold ?? 200;
|
||||
this.log = options.log;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start recording. Creates the output directory, WAV file, and codec session.
|
||||
* @param fileId - unique ID for the recording file name
|
||||
*/
|
||||
async start(fileId?: string): Promise<void> {
|
||||
if (this.state !== 'idle') return;
|
||||
|
||||
// Ensure output directory exists.
|
||||
if (!fs.existsSync(this.outputDir)) {
|
||||
fs.mkdirSync(this.outputDir, { recursive: true });
|
||||
}
|
||||
|
||||
// Generate file path.
|
||||
const id = fileId ?? `rec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
this.filePath = path.join(this.outputDir, `${id}.wav`);
|
||||
|
||||
// Create a codec session for isolated decoding.
|
||||
this.codecSessionId = `recorder-${id}`;
|
||||
await createSession(this.codecSessionId);
|
||||
|
||||
// Open WAV writer.
|
||||
this.wavWriter = new WavWriter({
|
||||
filePath: this.filePath,
|
||||
sampleRate: this.sampleRate,
|
||||
});
|
||||
this.wavWriter.open();
|
||||
|
||||
// Silence detection threshold: frames in timeout period.
|
||||
this.silenceFrameThreshold = this.silenceTimeoutSec > 0
|
||||
? Math.ceil((this.silenceTimeoutSec * 1000) / 20)
|
||||
: 0;
|
||||
this.consecutiveSilentFrames = 0;
|
||||
|
||||
// Max duration timer.
|
||||
if (this.maxDurationSec > 0) {
|
||||
this.maxDurationTimer = setTimeout(() => {
|
||||
if (this.state === 'recording') {
|
||||
this.stopReason = 'max-duration';
|
||||
this.log(`[recorder] max duration reached (${this.maxDurationSec}s)`);
|
||||
this.stop().then((result) => this.onStopped?.(result));
|
||||
}
|
||||
}, this.maxDurationSec * 1000);
|
||||
}
|
||||
|
||||
this.state = 'recording';
|
||||
this.stopReason = 'manual';
|
||||
this.log(`[recorder] started → ${this.filePath}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Feed an RTP packet. Strips the 12-byte header, transcodes the payload
|
||||
* to PCMU via the Rust bridge, decodes to PCM, and writes to WAV.
|
||||
* Skips telephone-event (DTMF) and comfort noise packets.
|
||||
*/
|
||||
processRtp(data: Buffer): void {
|
||||
if (this.state !== 'recording') return;
|
||||
if (data.length < 13) return; // too short
|
||||
|
||||
const pt = data[1] & 0x7f;
|
||||
|
||||
// Skip DTMF (telephone-event) and comfort noise.
|
||||
if (pt === 101 || pt === 13) return;
|
||||
|
||||
const payload = data.subarray(12);
|
||||
if (payload.length === 0) return;
|
||||
|
||||
// Queue processing to avoid concurrent transcodes corrupting codec state.
|
||||
this.processQueue = this.processQueue.then(() => this.decodeAndWrite(payload, pt));
|
||||
}
|
||||
|
||||
/** Decode a single RTP payload to PCM and write to WAV. */
|
||||
private async decodeAndWrite(payload: Buffer, pt: number): Promise<void> {
|
||||
if (this.state !== 'recording' || !this.wavWriter) return;
|
||||
|
||||
let pcm: Buffer;
|
||||
|
||||
if (pt === 0) {
|
||||
// PCMU: decode directly in TypeScript (no Rust round-trip needed).
|
||||
pcm = decodeMulaw(payload);
|
||||
} else {
|
||||
// All other codecs: transcode to PCMU via Rust, then decode mu-law.
|
||||
const mulaw = await transcode(payload, pt, 0, this.codecSessionId ?? undefined);
|
||||
if (!mulaw) return;
|
||||
pcm = decodeMulaw(mulaw);
|
||||
}
|
||||
|
||||
// Silence detection.
|
||||
if (this.silenceFrameThreshold > 0) {
|
||||
if (isSilent(pcm, this.silenceThreshold)) {
|
||||
this.consecutiveSilentFrames++;
|
||||
if (this.consecutiveSilentFrames >= this.silenceFrameThreshold) {
|
||||
this.stopReason = 'silence';
|
||||
this.log(`[recorder] silence detected (${this.silenceTimeoutSec}s)`);
|
||||
this.stop().then((result) => this.onStopped?.(result));
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
this.consecutiveSilentFrames = 0;
|
||||
}
|
||||
}
|
||||
|
||||
this.wavWriter.write(pcm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop recording and finalize the WAV file.
|
||||
*/
|
||||
async stop(): Promise<IRecordingResult> {
|
||||
if (this.state === 'stopped' || this.state === 'idle') {
|
||||
return {
|
||||
filePath: this.filePath,
|
||||
durationMs: 0,
|
||||
sampleRate: this.sampleRate,
|
||||
fileSize: 0,
|
||||
stopReason: this.stopReason,
|
||||
};
|
||||
}
|
||||
|
||||
this.state = 'stopped';
|
||||
|
||||
// Wait for pending decode operations to finish.
|
||||
await this.processQueue;
|
||||
|
||||
// Clear timers.
|
||||
if (this.maxDurationTimer) {
|
||||
clearTimeout(this.maxDurationTimer);
|
||||
this.maxDurationTimer = null;
|
||||
}
|
||||
|
||||
// Finalize WAV.
|
||||
let wavResult: IWavWriterResult | null = null;
|
||||
if (this.wavWriter) {
|
||||
wavResult = this.wavWriter.close();
|
||||
this.wavWriter = null;
|
||||
}
|
||||
|
||||
// Destroy codec session.
|
||||
if (this.codecSessionId) {
|
||||
await destroySession(this.codecSessionId);
|
||||
this.codecSessionId = null;
|
||||
}
|
||||
|
||||
const result: IRecordingResult = {
|
||||
filePath: this.filePath,
|
||||
durationMs: wavResult?.durationMs ?? 0,
|
||||
sampleRate: this.sampleRate,
|
||||
fileSize: wavResult?.fileSize ?? 0,
|
||||
stopReason: this.stopReason,
|
||||
};
|
||||
|
||||
this.log(`[recorder] stopped (${result.stopReason}): ${result.durationMs}ms → ${this.filePath}`);
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Cancel recording — stops and deletes the WAV file. */
|
||||
async cancel(): Promise<void> {
|
||||
this.stopReason = 'cancelled';
|
||||
await this.stop();
|
||||
|
||||
// Delete the incomplete file.
|
||||
try {
|
||||
if (fs.existsSync(this.filePath)) {
|
||||
fs.unlinkSync(this.filePath);
|
||||
this.log(`[recorder] cancelled — deleted ${this.filePath}`);
|
||||
}
|
||||
} catch { /* best effort */ }
|
||||
}
|
||||
|
||||
/** Clean up all resources. */
|
||||
destroy(): void {
|
||||
if (this.state === 'recording') {
|
||||
this.cancel();
|
||||
}
|
||||
this.onStopped = null;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Check if a PCM buffer is "silent" (max amplitude below threshold). */
|
||||
function isSilent(pcm: Buffer, threshold: number): boolean {
|
||||
let maxAmp = 0;
|
||||
for (let i = 0; i < pcm.length - 1; i += 2) {
|
||||
const sample = pcm.readInt16LE(i);
|
||||
const abs = sample < 0 ? -sample : sample;
|
||||
if (abs > maxAmp) maxAmp = abs;
|
||||
// Early exit: already above threshold.
|
||||
if (maxAmp >= threshold) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
265
ts/call/call.ts
265
ts/call/call.ts
@@ -1,265 +0,0 @@
|
||||
/**
|
||||
* Call — the hub entity in the hub model.
|
||||
*
|
||||
* A Call owns N legs and bridges their media. For 2-party calls, RTP packets
|
||||
* from leg A are forwarded to leg B and vice versa. For N>2 party calls,
|
||||
* packets from each leg are forwarded to all other legs (fan-out).
|
||||
*
|
||||
* Transcoding is applied per-leg when codecs differ.
|
||||
*/
|
||||
|
||||
import { Buffer } from 'node:buffer';
|
||||
import type { ILeg } from './leg.ts';
|
||||
import type { TCallState, TCallDirection, ICallStatus } from './types.ts';
|
||||
import { RtpPortPool } from './rtp-port-pool.ts';
|
||||
import type { SipLeg } from './sip-leg.ts';
|
||||
|
||||
export class Call {
|
||||
readonly id: string;
|
||||
state: TCallState = 'setting-up';
|
||||
direction: TCallDirection;
|
||||
readonly createdAt: number;
|
||||
|
||||
callerNumber: string | null = null;
|
||||
calleeNumber: string | null = null;
|
||||
providerUsed: string | null = null;
|
||||
|
||||
/** All legs in this call. */
|
||||
private legs = new Map<string, ILeg>();
|
||||
|
||||
/** Codec payload type for the "native" audio in the call (usually the first SIP leg's codec). */
|
||||
private nativeCodec: number | null = null;
|
||||
|
||||
/** Port pool reference for cleanup. */
|
||||
private portPool: RtpPortPool;
|
||||
private log: (msg: string) => void;
|
||||
private onChange: ((call: Call) => void) | null = null;
|
||||
|
||||
constructor(options: {
|
||||
id: string;
|
||||
direction: TCallDirection;
|
||||
portPool: RtpPortPool;
|
||||
log: (msg: string) => void;
|
||||
onChange?: (call: Call) => void;
|
||||
}) {
|
||||
this.id = options.id;
|
||||
this.direction = options.direction;
|
||||
this.createdAt = Date.now();
|
||||
this.portPool = options.portPool;
|
||||
this.log = options.log;
|
||||
this.onChange = options.onChange ?? null;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Leg management
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Add a leg to this call and wire up media forwarding. */
|
||||
addLeg(leg: ILeg): void {
|
||||
this.legs.set(leg.id, leg);
|
||||
|
||||
// Wire up RTP forwarding: when this leg receives a packet, forward to all other legs.
|
||||
leg.onRtpReceived = (data: Buffer) => {
|
||||
this.forwardRtp(leg.id, data);
|
||||
};
|
||||
|
||||
this.log(`[call:${this.id}] added leg ${leg.id} (${leg.type}), total=${this.legs.size}`);
|
||||
this.updateState();
|
||||
}
|
||||
|
||||
/** Remove a leg from this call, tear it down, and release its port. */
|
||||
removeLeg(legId: string): void {
|
||||
const leg = this.legs.get(legId);
|
||||
if (!leg) return;
|
||||
|
||||
leg.onRtpReceived = null;
|
||||
leg.teardown();
|
||||
if (leg.rtpPort) {
|
||||
this.portPool.release(leg.rtpPort);
|
||||
}
|
||||
this.legs.delete(legId);
|
||||
|
||||
this.log(`[call:${this.id}] removed leg ${legId}, total=${this.legs.size}`);
|
||||
this.updateState();
|
||||
}
|
||||
|
||||
getLeg(legId: string): ILeg | null {
|
||||
return this.legs.get(legId) ?? null;
|
||||
}
|
||||
|
||||
getLegs(): ILeg[] {
|
||||
return [...this.legs.values()];
|
||||
}
|
||||
|
||||
getLegByType(type: string): ILeg | null {
|
||||
for (const leg of this.legs.values()) {
|
||||
if (leg.type === type) return leg;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
getLegBySipCallId(sipCallId: string): ILeg | null {
|
||||
for (const leg of this.legs.values()) {
|
||||
if (leg.sipCallId === sipCallId) return leg;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
get legCount(): number {
|
||||
return this.legs.size;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Media forwarding (the hub)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private forwardRtp(fromLegId: string, data: Buffer): void {
|
||||
for (const [id, leg] of this.legs) {
|
||||
if (id === fromLegId) continue;
|
||||
if (leg.state !== 'connected') continue;
|
||||
|
||||
// For WebRTC legs, sendRtp calls forwardToBrowser which handles transcoding internally.
|
||||
// For SIP legs, forward the raw packet (same codec path) or let the leg handle it.
|
||||
// The Call hub does NOT transcode — that's the leg's responsibility.
|
||||
leg.sendRtp(data);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// State management
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private updateState(): void {
|
||||
if (this.state === 'terminated' || this.state === 'terminating') return;
|
||||
|
||||
const legs = [...this.legs.values()];
|
||||
if (legs.length === 0) {
|
||||
this.state = 'terminated';
|
||||
} else if (legs.every((l) => l.state === 'terminated')) {
|
||||
this.state = 'terminated';
|
||||
} else if (legs.some((l) => l.state === 'connected') && legs.filter((l) => l.state !== 'terminated').length >= 2) {
|
||||
// If a system leg is connected, report voicemail/ivr state for the dashboard.
|
||||
const systemLeg = legs.find((l) => l.type === 'system');
|
||||
if (systemLeg) {
|
||||
// Keep voicemail/ivr state if already set; otherwise set connected.
|
||||
if (this.state !== 'voicemail' && this.state !== 'ivr') {
|
||||
this.state = 'connected';
|
||||
}
|
||||
} else {
|
||||
this.state = 'connected';
|
||||
}
|
||||
} else if (legs.some((l) => l.state === 'ringing')) {
|
||||
this.state = 'ringing';
|
||||
} else {
|
||||
this.state = 'setting-up';
|
||||
}
|
||||
|
||||
this.onChange?.(this);
|
||||
}
|
||||
|
||||
/** Notify the call that a leg's state has changed. */
|
||||
notifyLegStateChange(_leg: ILeg): void {
|
||||
this.updateState();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Hangup
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Tear down all legs and terminate the call. */
|
||||
hangup(): void {
|
||||
if (this.state === 'terminated' || this.state === 'terminating') return;
|
||||
this.state = 'terminating';
|
||||
this.log(`[call:${this.id}] hanging up (${this.legs.size} legs)`);
|
||||
|
||||
for (const [id, leg] of this.legs) {
|
||||
// Send BYE/CANCEL for SIP legs (system legs have no SIP signaling).
|
||||
if (leg.type === 'sip-device' || leg.type === 'sip-provider') {
|
||||
(leg as SipLeg).sendHangup();
|
||||
}
|
||||
leg.teardown();
|
||||
if (leg.rtpPort) {
|
||||
this.portPool.release(leg.rtpPort);
|
||||
}
|
||||
}
|
||||
this.legs.clear();
|
||||
|
||||
this.state = 'terminated';
|
||||
this.onChange?.(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a BYE from one leg — tear down the other legs.
|
||||
* Called by CallManager when a SipLeg receives a BYE.
|
||||
*/
|
||||
handleLegTerminated(terminatedLegId: string): void {
|
||||
const terminatedLeg = this.legs.get(terminatedLegId);
|
||||
if (!terminatedLeg) return;
|
||||
|
||||
// Remove the terminated leg.
|
||||
terminatedLeg.onRtpReceived = null;
|
||||
if (terminatedLeg.rtpPort) {
|
||||
this.portPool.release(terminatedLeg.rtpPort);
|
||||
}
|
||||
this.legs.delete(terminatedLegId);
|
||||
|
||||
// If this is a 2-party call, hang up the other leg too.
|
||||
if (this.legs.size <= 1) {
|
||||
for (const [id, leg] of this.legs) {
|
||||
// Send BYE/CANCEL for SIP legs (system legs just get torn down).
|
||||
if (leg.type === 'sip-device' || leg.type === 'sip-provider') {
|
||||
(leg as SipLeg).sendHangup();
|
||||
}
|
||||
leg.teardown();
|
||||
if (leg.rtpPort) {
|
||||
this.portPool.release(leg.rtpPort);
|
||||
}
|
||||
}
|
||||
this.legs.clear();
|
||||
this.state = 'terminated';
|
||||
this.log(`[call:${this.id}] terminated`);
|
||||
this.onChange?.(this);
|
||||
} else {
|
||||
this.log(`[call:${this.id}] leg ${terminatedLegId} removed, ${this.legs.size} remaining`);
|
||||
this.updateState();
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Transfer
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Detach a leg from this call (without tearing it down).
|
||||
* The leg can then be added to another call.
|
||||
*/
|
||||
detachLeg(legId: string): ILeg | null {
|
||||
const leg = this.legs.get(legId);
|
||||
if (!leg) return null;
|
||||
|
||||
leg.onRtpReceived = null;
|
||||
this.legs.delete(legId);
|
||||
|
||||
this.log(`[call:${this.id}] detached leg ${legId}`);
|
||||
this.updateState();
|
||||
return leg;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Status
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
getStatus(): ICallStatus {
|
||||
return {
|
||||
id: this.id,
|
||||
state: this.state,
|
||||
direction: this.direction,
|
||||
callerNumber: this.callerNumber,
|
||||
calleeNumber: this.calleeNumber,
|
||||
providerUsed: this.providerUsed,
|
||||
createdAt: this.createdAt,
|
||||
duration: Math.floor((Date.now() - this.createdAt) / 1000),
|
||||
legs: [...this.legs.values()].map((l) => l.getStatus()),
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,272 +0,0 @@
|
||||
/**
|
||||
* DTMF detection — parses RFC 2833 telephone-event RTP packets
|
||||
* and SIP INFO (application/dtmf-relay) messages.
|
||||
*
|
||||
* Designed to be attached to any leg or RTP stream. The detector
|
||||
* deduplicates repeated telephone-event packets (same digit is sent
|
||||
* multiple times with increasing duration) and fires a callback
|
||||
* once per detected digit.
|
||||
*/
|
||||
|
||||
import { Buffer } from 'node:buffer';
|
||||
import type { SipMessage } from '../sip/index.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A single detected DTMF digit. */
|
||||
export interface IDtmfDigit {
|
||||
/** The digit character: '0'-'9', '*', '#', 'A'-'D'. */
|
||||
digit: string;
|
||||
/** Duration in milliseconds. */
|
||||
durationMs: number;
|
||||
/** Detection source. */
|
||||
source: 'rfc2833' | 'sip-info';
|
||||
/** Wall-clock timestamp when the digit was detected. */
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
/** Callback fired once per detected DTMF digit. */
|
||||
export type TDtmfCallback = (digit: IDtmfDigit) => void;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** RFC 2833 event ID → character mapping. */
|
||||
const EVENT_CHARS: string[] = [
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'*', '#', 'A', 'B', 'C', 'D',
|
||||
];
|
||||
|
||||
/** Safety timeout: report digit if no End packet arrives within this many ms. */
|
||||
const SAFETY_TIMEOUT_MS = 200;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DtmfDetector
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Detects DTMF digits from RFC 2833 RTP packets and SIP INFO messages.
|
||||
*
|
||||
* Usage:
|
||||
* ```
|
||||
* const detector = new DtmfDetector(log);
|
||||
* detector.onDigit = (d) => console.log('DTMF:', d.digit);
|
||||
* // Feed every RTP packet (detector checks PT internally):
|
||||
* detector.processRtp(rtpPacket);
|
||||
* // Or feed a SIP INFO message:
|
||||
* detector.processSipInfo(sipMsg);
|
||||
* ```
|
||||
*/
|
||||
export class DtmfDetector {
|
||||
/** Callback fired once per detected digit. */
|
||||
onDigit: TDtmfCallback | null = null;
|
||||
|
||||
/** Negotiated telephone-event payload type (default 101). */
|
||||
private telephoneEventPt: number;
|
||||
|
||||
/** Clock rate for duration calculation (default 8000 Hz). */
|
||||
private clockRate: number;
|
||||
|
||||
// -- Deduplication state for RFC 2833 --
|
||||
/** Event ID of the digit currently being received. */
|
||||
private currentEventId: number | null = null;
|
||||
/** RTP timestamp of the first packet for the current event. */
|
||||
private currentEventTs: number | null = null;
|
||||
/** Whether the current event has already been reported. */
|
||||
private currentEventReported = false;
|
||||
/** Latest duration value seen (in clock ticks). */
|
||||
private currentEventDuration = 0;
|
||||
/** Latest volume value seen (dBm0, 0 = loudest). */
|
||||
private currentEventVolume = 0;
|
||||
/** Safety timer: fires if no End packet arrives. */
|
||||
private safetyTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
private log: (msg: string) => void;
|
||||
|
||||
constructor(
|
||||
log: (msg: string) => void,
|
||||
telephoneEventPt = 101,
|
||||
clockRate = 8000,
|
||||
) {
|
||||
this.log = log;
|
||||
this.telephoneEventPt = telephoneEventPt;
|
||||
this.clockRate = clockRate;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// RFC 2833 RTP processing
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Feed an RTP packet. Checks PT; ignores non-DTMF packets.
|
||||
* Expects the full RTP packet (12-byte header + payload).
|
||||
*/
|
||||
processRtp(data: Buffer): void {
|
||||
if (data.length < 16) return; // 12-byte header + 4-byte telephone-event payload minimum
|
||||
|
||||
const pt = data[1] & 0x7f;
|
||||
if (pt !== this.telephoneEventPt) return;
|
||||
|
||||
// Parse RTP header fields we need.
|
||||
const marker = (data[1] & 0x80) !== 0;
|
||||
const rtpTimestamp = data.readUInt32BE(4);
|
||||
|
||||
// Parse telephone-event payload (4 bytes starting at offset 12).
|
||||
const eventId = data[12];
|
||||
const endBit = (data[13] & 0x80) !== 0;
|
||||
const volume = data[13] & 0x3f;
|
||||
const duration = data.readUInt16BE(14);
|
||||
|
||||
// Validate event ID.
|
||||
if (eventId >= EVENT_CHARS.length) return;
|
||||
|
||||
// Detect new event: marker bit, different event ID, or different RTP timestamp.
|
||||
const isNewEvent =
|
||||
marker ||
|
||||
eventId !== this.currentEventId ||
|
||||
rtpTimestamp !== this.currentEventTs;
|
||||
|
||||
if (isNewEvent) {
|
||||
// If there was an unreported previous event, report it now (fallback).
|
||||
this.reportPendingEvent();
|
||||
|
||||
// Start tracking the new event.
|
||||
this.currentEventId = eventId;
|
||||
this.currentEventTs = rtpTimestamp;
|
||||
this.currentEventReported = false;
|
||||
this.currentEventDuration = duration;
|
||||
this.currentEventVolume = volume;
|
||||
|
||||
// Start safety timer.
|
||||
this.clearSafetyTimer();
|
||||
this.safetyTimer = setTimeout(() => {
|
||||
this.reportPendingEvent();
|
||||
}, SAFETY_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
// Update duration (it increases with each retransmission).
|
||||
if (duration > this.currentEventDuration) {
|
||||
this.currentEventDuration = duration;
|
||||
}
|
||||
|
||||
// Report on End bit (first time only).
|
||||
if (endBit && !this.currentEventReported) {
|
||||
this.currentEventReported = true;
|
||||
this.clearSafetyTimer();
|
||||
|
||||
const digit = EVENT_CHARS[eventId];
|
||||
const durationMs = (this.currentEventDuration / this.clockRate) * 1000;
|
||||
|
||||
this.log(`[dtmf] RFC 2833 digit '${digit}' (${Math.round(durationMs)}ms)`);
|
||||
this.onDigit?.({
|
||||
digit,
|
||||
durationMs,
|
||||
source: 'rfc2833',
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/** Report a pending (unreported) event — called by safety timer or on new event start. */
|
||||
private reportPendingEvent(): void {
|
||||
if (
|
||||
this.currentEventId !== null &&
|
||||
!this.currentEventReported &&
|
||||
this.currentEventId < EVENT_CHARS.length
|
||||
) {
|
||||
this.currentEventReported = true;
|
||||
this.clearSafetyTimer();
|
||||
|
||||
const digit = EVENT_CHARS[this.currentEventId];
|
||||
const durationMs = (this.currentEventDuration / this.clockRate) * 1000;
|
||||
|
||||
this.log(`[dtmf] RFC 2833 digit '${digit}' (${Math.round(durationMs)}ms, safety timeout)`);
|
||||
this.onDigit?.({
|
||||
digit,
|
||||
durationMs,
|
||||
source: 'rfc2833',
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private clearSafetyTimer(): void {
|
||||
if (this.safetyTimer) {
|
||||
clearTimeout(this.safetyTimer);
|
||||
this.safetyTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// SIP INFO processing
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse a SIP INFO message carrying DTMF.
|
||||
* Supports Content-Type: application/dtmf-relay (Signal=X / Duration=Y).
|
||||
*/
|
||||
processSipInfo(msg: SipMessage): void {
|
||||
const ct = (msg.getHeader('Content-Type') || '').toLowerCase();
|
||||
if (!ct.includes('application/dtmf-relay') && !ct.includes('application/dtmf')) return;
|
||||
|
||||
const body = msg.body || '';
|
||||
|
||||
if (ct.includes('application/dtmf-relay')) {
|
||||
// Format: "Signal= 5\r\nDuration= 160\r\n"
|
||||
const signalMatch = body.match(/Signal\s*=\s*(\S+)/i);
|
||||
const durationMatch = body.match(/Duration\s*=\s*(\d+)/i);
|
||||
if (!signalMatch) return;
|
||||
|
||||
const signal = signalMatch[1];
|
||||
const durationTicks = durationMatch ? parseInt(durationMatch[1], 10) : 160;
|
||||
|
||||
// Validate digit.
|
||||
if (signal.length !== 1 || !/[0-9*#A-Da-d]/.test(signal)) return;
|
||||
const digit = signal.toUpperCase();
|
||||
const durationMs = (durationTicks / this.clockRate) * 1000;
|
||||
|
||||
this.log(`[dtmf] SIP INFO digit '${digit}' (${Math.round(durationMs)}ms)`);
|
||||
this.onDigit?.({
|
||||
digit,
|
||||
durationMs,
|
||||
source: 'sip-info',
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
} else if (ct.includes('application/dtmf')) {
|
||||
// Simple format: just the digit character in the body.
|
||||
const digit = body.trim().toUpperCase();
|
||||
if (digit.length !== 1 || !/[0-9*#A-D]/.test(digit)) return;
|
||||
|
||||
this.log(`[dtmf] SIP INFO digit '${digit}' (application/dtmf)`);
|
||||
this.onDigit?.({
|
||||
digit,
|
||||
durationMs: 250, // default duration
|
||||
source: 'sip-info',
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Lifecycle
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Reset detection state (e.g., between calls). */
|
||||
reset(): void {
|
||||
this.currentEventId = null;
|
||||
this.currentEventTs = null;
|
||||
this.currentEventReported = false;
|
||||
this.currentEventDuration = 0;
|
||||
this.currentEventVolume = 0;
|
||||
this.clearSafetyTimer();
|
||||
}
|
||||
|
||||
/** Clean up timers and references. */
|
||||
destroy(): void {
|
||||
this.clearSafetyTimer();
|
||||
this.onDigit = null;
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
export type { TCallState, TLegState, TLegType, TCallDirection, ICallStatus, ILegStatus, ICallHistoryEntry } from './types.ts';
|
||||
export type { ILeg } from './leg.ts';
|
||||
export { rtpClockIncrement, buildRtpHeader, codecDisplayName } from './leg.ts';
|
||||
export { RtpPortPool } from './rtp-port-pool.ts';
|
||||
export type { IRtpAllocation } from './rtp-port-pool.ts';
|
||||
export { SipLeg } from './sip-leg.ts';
|
||||
export type { ISipLegConfig } from './sip-leg.ts';
|
||||
export { WebRtcLeg } from './webrtc-leg.ts';
|
||||
export type { IWebRtcLegConfig } from './webrtc-leg.ts';
|
||||
export { Call } from './call.ts';
|
||||
export { CallManager } from './call-manager.ts';
|
||||
export type { ICallManagerConfig } from './call-manager.ts';
|
||||
104
ts/call/leg.ts
104
ts/call/leg.ts
@@ -1,104 +0,0 @@
|
||||
/**
|
||||
* ILeg interface — abstract connection from a Call hub to an endpoint.
|
||||
*
|
||||
* Concrete implementations: SipLeg (SIP devices + providers) and WebRtcLeg (browsers).
|
||||
* Shared RTP utilities (header building, clock rates) are also defined here.
|
||||
*/
|
||||
|
||||
import { Buffer } from 'node:buffer';
|
||||
import type dgram from 'node:dgram';
|
||||
import type { IEndpoint } from '../sip/index.ts';
|
||||
import type { TLegState, TLegType, ILegStatus } from './types.ts';
|
||||
import type { IRtpTranscoder } from '../codec.ts';
|
||||
import type { SipDialog } from '../sip/index.ts';
|
||||
import type { SipMessage } from '../sip/index.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ILeg interface
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ILeg {
|
||||
readonly id: string;
|
||||
readonly type: TLegType;
|
||||
state: TLegState;
|
||||
|
||||
/** The SIP Call-ID used by this leg (for CallManager routing). */
|
||||
readonly sipCallId: string;
|
||||
|
||||
/** Where this leg sends/receives RTP. */
|
||||
readonly rtpPort: number | null;
|
||||
readonly rtpSock: dgram.Socket | null;
|
||||
remoteMedia: IEndpoint | null;
|
||||
|
||||
/** Negotiated codec payload type (e.g. 9 = G.722, 111 = Opus). */
|
||||
codec: number | null;
|
||||
|
||||
/** Transcoder for converting to this leg's codec (set by Call when codecs differ). */
|
||||
transcoder: IRtpTranscoder | null;
|
||||
|
||||
/** Packet counters. */
|
||||
pktSent: number;
|
||||
pktReceived: number;
|
||||
|
||||
/** SIP dialog (SipLegs only, null for WebRtcLegs). */
|
||||
readonly dialog: SipDialog | null;
|
||||
|
||||
/**
|
||||
* Send an RTP packet toward this leg's remote endpoint.
|
||||
* If a transcoder is set, the Call should transcode before calling this.
|
||||
*/
|
||||
sendRtp(data: Buffer): void;
|
||||
|
||||
/**
|
||||
* Callback set by the owning Call — invoked when this leg receives an RTP packet.
|
||||
* The Call uses this to forward to other legs.
|
||||
*/
|
||||
onRtpReceived: ((data: Buffer) => void) | null;
|
||||
|
||||
/**
|
||||
* Handle an incoming SIP message routed to this leg (SipLegs only).
|
||||
* Returns a SipMessage response if one needs to be sent, or null.
|
||||
*/
|
||||
handleSipMessage(msg: SipMessage, rinfo: IEndpoint): void;
|
||||
|
||||
/** Release all resources (sockets, peer connections, etc.). */
|
||||
teardown(): void;
|
||||
|
||||
/** Status snapshot for the dashboard. */
|
||||
getStatus(): ILegStatus;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shared RTP utilities
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** RTP clock increment per 20ms frame for each codec. */
|
||||
export function rtpClockIncrement(pt: number): number {
|
||||
if (pt === 111) return 960; // Opus: 48000 Hz x 0.02s
|
||||
if (pt === 9) return 160; // G.722: 8000 Hz x 0.02s (SDP clock rate quirk)
|
||||
return 160; // PCMU/PCMA: 8000 Hz x 0.02s
|
||||
}
|
||||
|
||||
/** Build a fresh RTP header with correct PT, timestamp, seq, SSRC. */
|
||||
export function buildRtpHeader(pt: number, seq: number, ts: number, ssrc: number, marker: boolean): Buffer {
|
||||
const hdr = Buffer.alloc(12);
|
||||
hdr[0] = 0x80; // V=2
|
||||
hdr[1] = (marker ? 0x80 : 0) | (pt & 0x7f);
|
||||
hdr.writeUInt16BE(seq & 0xffff, 2);
|
||||
hdr.writeUInt32BE(ts >>> 0, 4);
|
||||
hdr.writeUInt32BE(ssrc >>> 0, 8);
|
||||
return hdr;
|
||||
}
|
||||
|
||||
/** Codec name for status display. */
|
||||
export function codecDisplayName(pt: number | null): string | null {
|
||||
if (pt === null) return null;
|
||||
switch (pt) {
|
||||
case 0: return 'PCMU';
|
||||
case 8: return 'PCMA';
|
||||
case 9: return 'G.722';
|
||||
case 111: return 'Opus';
|
||||
case 101: return 'telephone-event';
|
||||
default: return `PT${pt}`;
|
||||
}
|
||||
}
|
||||
@@ -1,404 +0,0 @@
|
||||
/**
|
||||
* PromptCache — manages multiple named audio prompts for IVR and voicemail.
|
||||
*
|
||||
* Each prompt is pre-encoded as both G.722 frames (for SIP legs) and Opus
|
||||
* frames (for WebRTC legs), ready for 20ms RTP playback.
|
||||
*
|
||||
* Supports three sources:
|
||||
* 1. TTS generation via espeak-ng (primary) or Kokoro (fallback)
|
||||
* 2. Loading from a pre-existing WAV file
|
||||
* 3. Programmatic tone generation (beep, etc.)
|
||||
*
|
||||
* The existing announcement.ts system continues to work independently;
|
||||
* this module provides generalized prompt management for IVR/voicemail.
|
||||
*/
|
||||
|
||||
import { execSync } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import { buildRtpHeader, rtpClockIncrement } from './leg.ts';
|
||||
import { encodePcm, isCodecReady } from '../opusbridge.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A pre-encoded prompt ready for RTP playback. */
|
||||
export interface ICachedPrompt {
|
||||
/** Unique prompt identifier. */
|
||||
id: string;
|
||||
/** G.722 encoded frames (20ms each, no RTP header). */
|
||||
g722Frames: Buffer[];
|
||||
/** Opus encoded frames (20ms each, no RTP header). */
|
||||
opusFrames: Buffer[];
|
||||
/** Total duration in milliseconds. */
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TTS helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
|
||||
|
||||
/** Check if espeak-ng is available. */
|
||||
function isEspeakAvailable(): boolean {
|
||||
try {
|
||||
execSync('which espeak-ng', { stdio: 'pipe' });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate WAV via espeak-ng. */
|
||||
function generateViaEspeak(wavPath: string, text: string): boolean {
|
||||
try {
|
||||
execSync(
|
||||
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
|
||||
{ timeout: 10000, stdio: 'pipe' },
|
||||
);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate WAV via Kokoro TTS. */
|
||||
function generateViaKokoro(wavPath: string, text: string, voice: string): boolean {
|
||||
const modelPath = path.join(TTS_DIR, 'kokoro-v1.0.onnx');
|
||||
const voicesPath = path.join(TTS_DIR, 'voices.bin');
|
||||
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) return false;
|
||||
|
||||
const root = process.cwd();
|
||||
const ttsBin = [
|
||||
path.join(root, 'dist_rust', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'release', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
|
||||
].find((p) => fs.existsSync(p));
|
||||
if (!ttsBin) return false;
|
||||
|
||||
try {
|
||||
execSync(
|
||||
`"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${voice}" --output "${wavPath}" --text "${text}"`,
|
||||
{ timeout: 120000, stdio: 'pipe' },
|
||||
);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Read a WAV file and return raw PCM + sample rate. */
|
||||
function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
|
||||
const wav = fs.readFileSync(wavPath);
|
||||
if (wav.length < 44) return null;
|
||||
if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
|
||||
if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
|
||||
|
||||
let sampleRate = 22050;
|
||||
let pcm: Buffer | null = null;
|
||||
let offset = 12;
|
||||
|
||||
while (offset < wav.length - 8) {
|
||||
const chunkId = wav.toString('ascii', offset, offset + 4);
|
||||
const chunkSize = wav.readUInt32LE(offset + 4);
|
||||
if (chunkId === 'fmt ') {
|
||||
sampleRate = wav.readUInt32LE(offset + 12);
|
||||
}
|
||||
if (chunkId === 'data') {
|
||||
pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
|
||||
}
|
||||
offset += 8 + chunkSize;
|
||||
if (offset % 2 !== 0) offset++;
|
||||
}
|
||||
|
||||
return pcm ? { pcm, sampleRate } : null;
|
||||
}
|
||||
|
||||
/** Encode raw PCM frames to G.722 + Opus. */
|
||||
async function encodePcmFrames(
|
||||
pcm: Buffer,
|
||||
sampleRate: number,
|
||||
log: (msg: string) => void,
|
||||
): Promise<{ g722Frames: Buffer[]; opusFrames: Buffer[] } | null> {
|
||||
if (!isCodecReady()) return null;
|
||||
|
||||
const frameSamples = Math.floor(sampleRate * 0.02); // 20ms
|
||||
const frameBytes = frameSamples * 2; // 16-bit
|
||||
const totalFrames = Math.floor(pcm.length / frameBytes);
|
||||
|
||||
const g722Frames: Buffer[] = [];
|
||||
const opusFrames: Buffer[] = [];
|
||||
|
||||
for (let i = 0; i < totalFrames; i++) {
|
||||
const framePcm = Buffer.from(pcm.subarray(i * frameBytes, (i + 1) * frameBytes));
|
||||
const [g722, opus] = await Promise.all([
|
||||
encodePcm(framePcm, sampleRate, 9), // G.722
|
||||
encodePcm(framePcm, sampleRate, 111), // Opus
|
||||
]);
|
||||
if (g722) g722Frames.push(g722);
|
||||
if (opus) opusFrames.push(opus);
|
||||
}
|
||||
|
||||
return { g722Frames, opusFrames };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PromptCache
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class PromptCache {
|
||||
private prompts = new Map<string, ICachedPrompt>();
|
||||
private log: (msg: string) => void;
|
||||
private espeakAvailable: boolean | null = null;
|
||||
|
||||
constructor(log: (msg: string) => void) {
|
||||
this.log = log;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Public API
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Get a cached prompt by ID. */
|
||||
get(id: string): ICachedPrompt | null {
|
||||
return this.prompts.get(id) ?? null;
|
||||
}
|
||||
|
||||
/** Check if a prompt is cached. */
|
||||
has(id: string): boolean {
|
||||
return this.prompts.has(id);
|
||||
}
|
||||
|
||||
/** List all cached prompt IDs. */
|
||||
listIds(): string[] {
|
||||
return [...this.prompts.keys()];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a TTS prompt and cache it.
|
||||
* Uses espeak-ng (primary) or Kokoro (fallback).
|
||||
*/
|
||||
async generatePrompt(id: string, text: string, voice = 'af_bella'): Promise<ICachedPrompt | null> {
|
||||
fs.mkdirSync(TTS_DIR, { recursive: true });
|
||||
const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`);
|
||||
|
||||
// Check espeak availability once.
|
||||
if (this.espeakAvailable === null) {
|
||||
this.espeakAvailable = isEspeakAvailable();
|
||||
}
|
||||
|
||||
// Generate WAV.
|
||||
let generated = false;
|
||||
if (!fs.existsSync(wavPath)) {
|
||||
if (this.espeakAvailable) {
|
||||
generated = generateViaEspeak(wavPath, text);
|
||||
}
|
||||
if (!generated) {
|
||||
generated = generateViaKokoro(wavPath, text, voice);
|
||||
}
|
||||
if (!generated) {
|
||||
this.log(`[prompt-cache] failed to generate TTS for "${id}"`);
|
||||
return null;
|
||||
}
|
||||
this.log(`[prompt-cache] generated WAV for "${id}"`);
|
||||
}
|
||||
|
||||
return this.loadWavPrompt(id, wavPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a WAV file as a prompt and cache it.
|
||||
*/
|
||||
async loadWavPrompt(id: string, wavPath: string): Promise<ICachedPrompt | null> {
|
||||
if (!fs.existsSync(wavPath)) {
|
||||
this.log(`[prompt-cache] WAV not found: ${wavPath}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = readWavWithRate(wavPath);
|
||||
if (!result) {
|
||||
this.log(`[prompt-cache] failed to parse WAV: ${wavPath}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const encoded = await encodePcmFrames(result.pcm, result.sampleRate, this.log);
|
||||
if (!encoded) {
|
||||
this.log(`[prompt-cache] encoding failed for "${id}" (codec bridge not ready?)`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const durationMs = encoded.g722Frames.length * 20;
|
||||
const prompt: ICachedPrompt = {
|
||||
id,
|
||||
g722Frames: encoded.g722Frames,
|
||||
opusFrames: encoded.opusFrames,
|
||||
durationMs,
|
||||
};
|
||||
|
||||
this.prompts.set(id, prompt);
|
||||
this.log(`[prompt-cache] cached "${id}": ${encoded.g722Frames.length} frames (${(durationMs / 1000).toFixed(1)}s)`);
|
||||
return prompt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a beep tone prompt (sine wave).
|
||||
* @param id - prompt ID
|
||||
* @param freqHz - tone frequency (default 1000 Hz)
|
||||
* @param durationMs - tone duration (default 500ms)
|
||||
* @param amplitude - 16-bit amplitude (default 8000)
|
||||
*/
|
||||
async generateBeep(
|
||||
id: string,
|
||||
freqHz = 1000,
|
||||
durationMs = 500,
|
||||
amplitude = 8000,
|
||||
): Promise<ICachedPrompt | null> {
|
||||
// Generate at 16kHz for decent quality.
|
||||
const sampleRate = 16000;
|
||||
const totalSamples = Math.floor((sampleRate * durationMs) / 1000);
|
||||
const pcm = Buffer.alloc(totalSamples * 2);
|
||||
|
||||
for (let i = 0; i < totalSamples; i++) {
|
||||
const t = i / sampleRate;
|
||||
// Apply a short fade-in/fade-out to avoid click artifacts.
|
||||
const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade
|
||||
let envelope = 1.0;
|
||||
if (i < fadeLen) envelope = i / fadeLen;
|
||||
else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen;
|
||||
|
||||
const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope);
|
||||
pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
|
||||
}
|
||||
|
||||
const encoded = await encodePcmFrames(pcm, sampleRate, this.log);
|
||||
if (!encoded) {
|
||||
this.log(`[prompt-cache] beep encoding failed for "${id}"`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const actualDuration = encoded.g722Frames.length * 20;
|
||||
const prompt: ICachedPrompt = {
|
||||
id,
|
||||
g722Frames: encoded.g722Frames,
|
||||
opusFrames: encoded.opusFrames,
|
||||
durationMs: actualDuration,
|
||||
};
|
||||
|
||||
this.prompts.set(id, prompt);
|
||||
this.log(`[prompt-cache] beep "${id}" cached: ${actualDuration}ms @ ${freqHz}Hz`);
|
||||
return prompt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a prompt from the cache.
|
||||
*/
|
||||
remove(id: string): void {
|
||||
this.prompts.delete(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cached prompts.
|
||||
*/
|
||||
clear(): void {
|
||||
this.prompts.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Standalone playback helpers (for use by SystemLeg)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Play a cached prompt's G.722 frames as RTP packets at 20ms intervals.
|
||||
*
|
||||
* @param prompt - the cached prompt to play
|
||||
* @param sendPacket - function to send a raw RTP packet (12-byte header + payload)
|
||||
* @param ssrc - SSRC for RTP headers
|
||||
* @param onDone - called when playback finishes
|
||||
* @returns cancel function, or null if prompt has no G.722 frames
|
||||
*/
|
||||
export function playPromptG722(
|
||||
prompt: ICachedPrompt,
|
||||
sendPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (prompt.g722Frames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = prompt.g722Frames;
|
||||
const PT = 9;
|
||||
let frameIdx = 0;
|
||||
let seq = Math.floor(Math.random() * 0xffff);
|
||||
let rtpTs = Math.floor(Math.random() * 0xffffffff);
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendPacket(pkt);
|
||||
|
||||
seq++;
|
||||
rtpTs += rtpClockIncrement(PT);
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Play a cached prompt's Opus frames as RTP packets at 20ms intervals.
|
||||
*
|
||||
* @param prompt - the cached prompt to play
|
||||
* @param sendPacket - function to send a raw RTP packet
|
||||
* @param ssrc - SSRC for RTP headers
|
||||
* @param counters - shared seq/ts counters (mutated in place for seamless transitions)
|
||||
* @param onDone - called when playback finishes
|
||||
* @returns cancel function, or null if prompt has no Opus frames
|
||||
*/
|
||||
export function playPromptOpus(
|
||||
prompt: ICachedPrompt,
|
||||
sendPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
counters: { seq: number; ts: number },
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (prompt.opusFrames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = prompt.opusFrames;
|
||||
const PT = 111;
|
||||
let frameIdx = 0;
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendPacket(pkt);
|
||||
|
||||
counters.seq++;
|
||||
counters.ts += 960; // Opus 48kHz: 960 samples per 20ms
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
/**
|
||||
* Unified RTP port pool — replaces the three separate allocators
|
||||
* in sipproxy.ts, calloriginator.ts, and webrtcbridge.ts.
|
||||
*
|
||||
* Allocates even-numbered UDP ports from a configured range.
|
||||
* Each allocation binds a dgram socket and returns it ready to use.
|
||||
*/
|
||||
|
||||
import dgram from 'node:dgram';
|
||||
|
||||
export interface IRtpAllocation {
|
||||
port: number;
|
||||
sock: dgram.Socket;
|
||||
}
|
||||
|
||||
export class RtpPortPool {
|
||||
private min: number;
|
||||
private max: number;
|
||||
private allocated = new Map<number, dgram.Socket>();
|
||||
private log: (msg: string) => void;
|
||||
|
||||
constructor(min: number, max: number, log: (msg: string) => void) {
|
||||
this.min = min % 2 === 0 ? min : min + 1; // ensure even start
|
||||
this.max = max;
|
||||
this.log = log;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate an even-numbered port and bind a UDP socket to it.
|
||||
* Returns null if the pool is exhausted.
|
||||
*/
|
||||
allocate(): IRtpAllocation | null {
|
||||
for (let port = this.min; port < this.max; port += 2) {
|
||||
if (this.allocated.has(port)) continue;
|
||||
|
||||
const sock = dgram.createSocket('udp4');
|
||||
try {
|
||||
sock.bind(port, '0.0.0.0');
|
||||
} catch {
|
||||
try { sock.close(); } catch { /* ignore */ }
|
||||
continue;
|
||||
}
|
||||
this.allocated.set(port, sock);
|
||||
this.log(`[rtp-pool] allocated port ${port} (${this.allocated.size} in use)`);
|
||||
return { port, sock };
|
||||
}
|
||||
this.log('[rtp-pool] WARN: port pool exhausted');
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release a port back to the pool and close its socket.
|
||||
*/
|
||||
release(port: number): void {
|
||||
const sock = this.allocated.get(port);
|
||||
if (!sock) return;
|
||||
try { sock.close(); } catch { /* ignore */ }
|
||||
this.allocated.delete(port);
|
||||
this.log(`[rtp-pool] released port ${port} (${this.allocated.size} in use)`);
|
||||
}
|
||||
|
||||
/** Number of currently allocated ports. */
|
||||
get size(): number {
|
||||
return this.allocated.size;
|
||||
}
|
||||
|
||||
/** Total capacity (number of even ports in range). */
|
||||
get capacity(): number {
|
||||
return Math.floor((this.max - this.min) / 2);
|
||||
}
|
||||
}
|
||||
@@ -1,633 +0,0 @@
|
||||
/**
|
||||
* SipLeg — a SIP connection from the Call hub to a device or provider.
|
||||
*
|
||||
* Wraps a SipDialog and an RTP socket. Handles:
|
||||
* - INVITE/ACK/BYE/CANCEL lifecycle
|
||||
* - SDP rewriting (LAN IP for devices, public IP for providers)
|
||||
* - Digest auth for provider legs (407/401)
|
||||
* - Early-media silence for providers with quirks
|
||||
* - Record-Route insertion for dialog-establishing requests
|
||||
*/
|
||||
|
||||
import dgram from 'node:dgram';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import {
|
||||
SipMessage,
|
||||
SipDialog,
|
||||
buildSdp,
|
||||
parseSdpEndpoint,
|
||||
rewriteSdp,
|
||||
rewriteSipUri,
|
||||
parseDigestChallenge,
|
||||
computeDigestAuth,
|
||||
generateTag,
|
||||
} from '../sip/index.ts';
|
||||
import type { IEndpoint } from '../sip/index.ts';
|
||||
import type { IProviderConfig, IQuirks } from '../config.ts';
|
||||
import type { TLegState, TLegType, ILegStatus } from './types.ts';
|
||||
import type { ILeg } from './leg.ts';
|
||||
import { codecDisplayName } from './leg.ts';
|
||||
import type { IRtpTranscoder } from '../codec.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// SipLeg config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ISipLegConfig {
|
||||
/** Whether this leg faces a device (LAN) or a provider (WAN). */
|
||||
role: 'device' | 'provider';
|
||||
|
||||
/** Proxy LAN IP (for SDP rewriting toward devices). */
|
||||
lanIp: string;
|
||||
/** Proxy LAN port (for Via, Contact, Record-Route). */
|
||||
lanPort: number;
|
||||
|
||||
/** Public IP (for SDP rewriting toward providers). */
|
||||
getPublicIp: () => string | null;
|
||||
|
||||
/** Send a SIP message via the main UDP socket. */
|
||||
sendSip: (buf: Buffer, dest: IEndpoint) => void;
|
||||
/** Logging function. */
|
||||
log: (msg: string) => void;
|
||||
|
||||
/** Provider config (for provider legs: auth, codecs, quirks, outbound proxy). */
|
||||
provider?: IProviderConfig;
|
||||
|
||||
/** The endpoint to send SIP messages to (device address or provider outbound proxy). */
|
||||
sipTarget: IEndpoint;
|
||||
|
||||
/** RTP port and socket (pre-allocated from the pool). */
|
||||
rtpPort: number;
|
||||
rtpSock: dgram.Socket;
|
||||
|
||||
/** Payload types to offer in SDP. */
|
||||
payloadTypes?: number[];
|
||||
|
||||
/** Registered AOR (for From header in provider leg). */
|
||||
getRegisteredAor?: () => string | null;
|
||||
/** SIP password (for digest auth). */
|
||||
getSipPassword?: () => string | null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// SipLeg
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class SipLeg implements ILeg {
|
||||
readonly id: string;
|
||||
readonly type: TLegType;
|
||||
state: TLegState = 'inviting';
|
||||
readonly config: ISipLegConfig;
|
||||
|
||||
/** The SIP dialog for this leg. */
|
||||
dialog: SipDialog | null = null;
|
||||
|
||||
/** Original INVITE (needed for CANCEL). */
|
||||
invite: SipMessage | null = null;
|
||||
|
||||
/** Original unauthenticated INVITE (for re-ACKing retransmitted 407s). */
|
||||
private origInvite: SipMessage | null = null;
|
||||
|
||||
/** Whether we've attempted digest auth on this leg. */
|
||||
private authAttempted = false;
|
||||
|
||||
/** RTP socket and port. */
|
||||
readonly rtpPort: number;
|
||||
readonly rtpSock: dgram.Socket;
|
||||
|
||||
/** Remote media endpoint (learned from SDP). */
|
||||
remoteMedia: IEndpoint | null = null;
|
||||
|
||||
/** Negotiated codec. */
|
||||
codec: number | null = null;
|
||||
|
||||
/** Transcoder (set by Call when codecs differ between legs). */
|
||||
transcoder: IRtpTranscoder | null = null;
|
||||
|
||||
/** Stable SSRC for this leg (used for silence + forwarded audio). */
|
||||
readonly ssrc: number = (Math.random() * 0xffffffff) >>> 0;
|
||||
|
||||
/** Packet counters. */
|
||||
pktSent = 0;
|
||||
pktReceived = 0;
|
||||
|
||||
/** Callback set by Call to receive RTP. */
|
||||
onRtpReceived: ((data: Buffer) => void) | null = null;
|
||||
|
||||
/** Silence stream timer (for provider quirks). */
|
||||
private silenceTimer: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
/** Callbacks for lifecycle events. */
|
||||
onStateChange: ((leg: SipLeg) => void) | null = null;
|
||||
onConnected: ((leg: SipLeg) => void) | null = null;
|
||||
onTerminated: ((leg: SipLeg) => void) | null = null;
|
||||
|
||||
/** Callback for SIP INFO messages (used for DTMF relay). */
|
||||
onInfoReceived: ((msg: SipMessage) => void) | null = null;
|
||||
|
||||
constructor(id: string, config: ISipLegConfig) {
|
||||
this.id = id;
|
||||
this.type = config.role === 'device' ? 'sip-device' : 'sip-provider';
|
||||
this.config = config;
|
||||
this.rtpPort = config.rtpPort;
|
||||
this.rtpSock = config.rtpSock;
|
||||
|
||||
// Set up RTP receive handler.
|
||||
this.rtpSock.on('message', (data: Buffer, rinfo: dgram.RemoteInfo) => {
|
||||
this.pktReceived++;
|
||||
|
||||
// Learn remote media endpoint from first packet if not yet known.
|
||||
if (!this.remoteMedia) {
|
||||
this.remoteMedia = { address: rinfo.address, port: rinfo.port };
|
||||
this.config.log(`[sip-leg:${this.id}] learned remote media: ${rinfo.address}:${rinfo.port}`);
|
||||
}
|
||||
|
||||
// Forward to the Call hub.
|
||||
if (this.onRtpReceived) {
|
||||
this.onRtpReceived(data);
|
||||
}
|
||||
});
|
||||
|
||||
this.rtpSock.on('error', (e: Error) => {
|
||||
this.config.log(`[sip-leg:${this.id}] rtp error: ${e.message}`);
|
||||
});
|
||||
}
|
||||
|
||||
get sipCallId(): string {
|
||||
return this.dialog?.callId || 'no-dialog';
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Outbound INVITE (B2BUA mode — create a new dialog)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Send an INVITE to establish this leg.
|
||||
* Creates a new SipDialog (UAC side).
|
||||
*/
|
||||
sendInvite(options: {
|
||||
fromUri: string;
|
||||
toUri: string;
|
||||
callId: string;
|
||||
fromTag?: string;
|
||||
fromDisplayName?: string;
|
||||
cseq?: number;
|
||||
extraHeaders?: [string, string][];
|
||||
}): void {
|
||||
const ip = this.type === 'sip-provider'
|
||||
? (this.config.getPublicIp() || this.config.lanIp)
|
||||
: this.config.lanIp;
|
||||
const pts = this.config.payloadTypes || [9, 0, 8, 101];
|
||||
|
||||
const sdp = buildSdp({ ip, port: this.rtpPort, payloadTypes: pts });
|
||||
|
||||
const invite = SipMessage.createRequest('INVITE', options.toUri, {
|
||||
via: { host: ip, port: this.config.lanPort },
|
||||
from: { uri: options.fromUri, displayName: options.fromDisplayName, tag: options.fromTag },
|
||||
to: { uri: options.toUri },
|
||||
callId: options.callId,
|
||||
cseq: options.cseq,
|
||||
contact: `<sip:${ip}:${this.config.lanPort}>`,
|
||||
body: sdp,
|
||||
contentType: 'application/sdp',
|
||||
extraHeaders: options.extraHeaders,
|
||||
});
|
||||
|
||||
this.invite = invite;
|
||||
this.dialog = SipDialog.fromUacInvite(invite, ip, this.config.lanPort);
|
||||
this.state = 'inviting';
|
||||
|
||||
this.config.log(`[sip-leg:${this.id}] INVITE -> ${this.config.sipTarget.address}:${this.config.sipTarget.port}`);
|
||||
this.config.sendSip(invite.serialize(), this.config.sipTarget);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Passthrough mode — forward a SIP message with rewriting
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Accept an incoming INVITE as a UAS (for passthrough inbound calls).
|
||||
* Creates a SipDialog on the UAS side.
|
||||
*/
|
||||
acceptIncoming(invite: SipMessage): void {
|
||||
const localTag = generateTag();
|
||||
this.dialog = SipDialog.fromUasInvite(invite, localTag, this.config.lanIp, this.config.lanPort);
|
||||
this.invite = invite;
|
||||
this.state = 'inviting';
|
||||
|
||||
// Learn remote media from SDP.
|
||||
if (invite.hasSdpBody) {
|
||||
const ep = parseSdpEndpoint(invite.body);
|
||||
if (ep) {
|
||||
this.remoteMedia = ep;
|
||||
this.config.log(`[sip-leg:${this.id}] media from SDP: ${ep.address}:${ep.port}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Forward a SIP message through this leg with SDP rewriting.
|
||||
* Used for passthrough calls where the proxy relays messages.
|
||||
*/
|
||||
forwardMessage(msg: SipMessage, dest: IEndpoint): void {
|
||||
const rewriteIp = this.type === 'sip-provider'
|
||||
? (this.config.getPublicIp() || this.config.lanIp)
|
||||
: this.config.lanIp;
|
||||
|
||||
// Rewrite SDP if present.
|
||||
if (msg.hasSdpBody) {
|
||||
const { body, original } = rewriteSdp(msg.body, rewriteIp, this.rtpPort);
|
||||
msg.body = body;
|
||||
msg.updateContentLength();
|
||||
if (original) {
|
||||
this.remoteMedia = original;
|
||||
this.config.log(`[sip-leg:${this.id}] media from SDP rewrite: ${original.address}:${original.port}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Record-Route for dialog-establishing requests.
|
||||
if (msg.isRequest && msg.isDialogEstablishing) {
|
||||
msg.prependHeader('Record-Route', `<sip:${this.config.lanIp}:${this.config.lanPort};lr>`);
|
||||
}
|
||||
|
||||
// Rewrite Contact.
|
||||
if (this.type === 'sip-provider') {
|
||||
const contact = msg.getHeader('Contact');
|
||||
if (contact) {
|
||||
const nc = rewriteSipUri(contact, rewriteIp, this.config.lanPort);
|
||||
if (nc !== contact) msg.setHeader('Contact', nc);
|
||||
}
|
||||
}
|
||||
|
||||
// Rewrite Request-URI for inbound messages going to device.
|
||||
if (this.type === 'sip-device' && msg.isRequest) {
|
||||
msg.setRequestUri(rewriteSipUri(msg.requestUri!, dest.address, dest.port));
|
||||
}
|
||||
|
||||
this.config.sendSip(msg.serialize(), dest);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// SIP message handling (routed by CallManager)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
handleSipMessage(msg: SipMessage, rinfo: IEndpoint): void {
|
||||
if (msg.isResponse) {
|
||||
this.handleResponse(msg, rinfo);
|
||||
} else {
|
||||
this.handleRequest(msg, rinfo);
|
||||
}
|
||||
}
|
||||
|
||||
private handleResponse(msg: SipMessage, _rinfo: IEndpoint): void {
|
||||
const code = msg.statusCode ?? 0;
|
||||
const method = msg.cseqMethod?.toUpperCase();
|
||||
|
||||
this.config.log(`[sip-leg:${this.id}] <- ${code} (${method})`);
|
||||
|
||||
if (method === 'INVITE') {
|
||||
this.handleInviteResponse(msg, code);
|
||||
}
|
||||
// BYE/CANCEL responses don't need action beyond logging.
|
||||
}
|
||||
|
||||
private handleInviteResponse(msg: SipMessage, code: number): void {
|
||||
// Handle retransmitted 407 for the original unauthenticated INVITE.
|
||||
if (this.authAttempted && this.dialog) {
|
||||
const responseCSeqNum = parseInt((msg.getHeader('CSeq') || '').split(/\s+/)[0], 10);
|
||||
if (responseCSeqNum < this.dialog.localCSeq && code >= 400) {
|
||||
if (this.origInvite) {
|
||||
const ack = buildNon2xxAck(this.origInvite, msg);
|
||||
this.config.sendSip(ack.serialize(), this.config.sipTarget);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle 407 Proxy Authentication Required.
|
||||
if (code === 407 && this.type === 'sip-provider') {
|
||||
this.handleAuthChallenge(msg);
|
||||
return;
|
||||
}
|
||||
|
||||
// Update dialog state.
|
||||
if (this.dialog) {
|
||||
this.dialog.processResponse(msg);
|
||||
}
|
||||
|
||||
if (code === 180 || code === 183) {
|
||||
this.state = 'ringing';
|
||||
this.onStateChange?.(this);
|
||||
} else if (code >= 200 && code < 300) {
|
||||
// ACK the 200 OK.
|
||||
if (this.dialog) {
|
||||
const ack = this.dialog.createAck();
|
||||
this.config.sendSip(ack.serialize(), this.config.sipTarget);
|
||||
this.config.log(`[sip-leg:${this.id}] ACK sent`);
|
||||
}
|
||||
|
||||
// If already connected (200 retransmit), just re-ACK.
|
||||
if (this.state === 'connected') {
|
||||
this.config.log(`[sip-leg:${this.id}] re-ACK (200 retransmit)`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Learn media endpoint from SDP.
|
||||
if (msg.hasSdpBody) {
|
||||
const ep = parseSdpEndpoint(msg.body);
|
||||
if (ep) {
|
||||
this.remoteMedia = ep;
|
||||
this.config.log(`[sip-leg:${this.id}] media = ${ep.address}:${ep.port}`);
|
||||
}
|
||||
}
|
||||
|
||||
this.state = 'connected';
|
||||
this.config.log(`[sip-leg:${this.id}] CONNECTED`);
|
||||
|
||||
// Start silence for provider legs with early media quirks.
|
||||
if (this.type === 'sip-provider') {
|
||||
this.startSilence();
|
||||
}
|
||||
|
||||
// Prime the RTP path.
|
||||
if (this.remoteMedia) {
|
||||
this.primeRtp(this.remoteMedia);
|
||||
}
|
||||
|
||||
this.onConnected?.(this);
|
||||
this.onStateChange?.(this);
|
||||
} else if (code >= 300) {
|
||||
this.config.log(`[sip-leg:${this.id}] rejected ${code}`);
|
||||
this.state = 'terminated';
|
||||
if (this.dialog) this.dialog.terminate();
|
||||
this.onTerminated?.(this);
|
||||
this.onStateChange?.(this);
|
||||
}
|
||||
}
|
||||
|
||||
private handleAuthChallenge(msg: SipMessage): void {
|
||||
if (this.authAttempted) {
|
||||
this.config.log(`[sip-leg:${this.id}] 407 after auth attempt — credentials rejected`);
|
||||
this.state = 'terminated';
|
||||
if (this.dialog) this.dialog.terminate();
|
||||
this.onTerminated?.(this);
|
||||
return;
|
||||
}
|
||||
this.authAttempted = true;
|
||||
|
||||
const challenge = msg.getHeader('Proxy-Authenticate');
|
||||
if (!challenge) {
|
||||
this.config.log(`[sip-leg:${this.id}] 407 but no Proxy-Authenticate`);
|
||||
this.state = 'terminated';
|
||||
if (this.dialog) this.dialog.terminate();
|
||||
this.onTerminated?.(this);
|
||||
return;
|
||||
}
|
||||
|
||||
const parsed = parseDigestChallenge(challenge);
|
||||
if (!parsed) {
|
||||
this.config.log(`[sip-leg:${this.id}] could not parse digest challenge`);
|
||||
this.state = 'terminated';
|
||||
if (this.dialog) this.dialog.terminate();
|
||||
this.onTerminated?.(this);
|
||||
return;
|
||||
}
|
||||
|
||||
const password = this.config.getSipPassword?.();
|
||||
const aor = this.config.getRegisteredAor?.();
|
||||
if (!password || !aor) {
|
||||
this.config.log(`[sip-leg:${this.id}] 407 but no password or AOR`);
|
||||
this.state = 'terminated';
|
||||
if (this.dialog) this.dialog.terminate();
|
||||
this.onTerminated?.(this);
|
||||
return;
|
||||
}
|
||||
|
||||
const username = aor.replace(/^sips?:/, '').split('@')[0];
|
||||
const destUri = this.invite?.requestUri || '';
|
||||
|
||||
const authValue = computeDigestAuth({
|
||||
username,
|
||||
password,
|
||||
realm: parsed.realm,
|
||||
nonce: parsed.nonce,
|
||||
method: 'INVITE',
|
||||
uri: destUri,
|
||||
algorithm: parsed.algorithm,
|
||||
opaque: parsed.opaque,
|
||||
});
|
||||
|
||||
// ACK the 407.
|
||||
if (this.invite) {
|
||||
const ack407 = buildNon2xxAck(this.invite, msg);
|
||||
this.config.sendSip(ack407.serialize(), this.config.sipTarget);
|
||||
this.config.log(`[sip-leg:${this.id}] ACK-407 sent`);
|
||||
}
|
||||
|
||||
// Keep original INVITE for re-ACKing retransmitted 407s.
|
||||
this.origInvite = this.invite;
|
||||
|
||||
// Resend INVITE with auth, same From tag, incremented CSeq.
|
||||
const ip = this.config.getPublicIp() || this.config.lanIp;
|
||||
const fromTag = this.dialog!.localTag;
|
||||
const pts = this.config.payloadTypes || [9, 0, 8, 101];
|
||||
|
||||
const sdp = buildSdp({ ip, port: this.rtpPort, payloadTypes: pts });
|
||||
|
||||
const inviteAuth = SipMessage.createRequest('INVITE', destUri, {
|
||||
via: { host: ip, port: this.config.lanPort },
|
||||
from: { uri: aor, tag: fromTag },
|
||||
to: { uri: destUri },
|
||||
callId: this.dialog!.callId,
|
||||
cseq: 2,
|
||||
contact: `<sip:${ip}:${this.config.lanPort}>`,
|
||||
body: sdp,
|
||||
contentType: 'application/sdp',
|
||||
extraHeaders: [['Proxy-Authorization', authValue]],
|
||||
});
|
||||
|
||||
this.invite = inviteAuth;
|
||||
this.dialog!.localCSeq = 2;
|
||||
|
||||
this.config.log(`[sip-leg:${this.id}] resending INVITE with auth`);
|
||||
this.config.sendSip(inviteAuth.serialize(), this.config.sipTarget);
|
||||
}
|
||||
|
||||
private handleRequest(msg: SipMessage, rinfo: IEndpoint): void {
|
||||
const method = msg.method;
|
||||
this.config.log(`[sip-leg:${this.id}] <- ${method} from ${rinfo.address}:${rinfo.port}`);
|
||||
|
||||
if (method === 'BYE') {
|
||||
// Send 200 OK to the BYE.
|
||||
const ok = SipMessage.createResponse(200, 'OK', msg);
|
||||
this.config.sendSip(ok.serialize(), { address: rinfo.address, port: rinfo.port });
|
||||
|
||||
this.state = 'terminated';
|
||||
if (this.dialog) this.dialog.terminate();
|
||||
this.onTerminated?.(this);
|
||||
this.onStateChange?.(this);
|
||||
}
|
||||
if (method === 'INFO') {
|
||||
// Respond 200 OK to the INFO request.
|
||||
const ok = SipMessage.createResponse(200, 'OK', msg);
|
||||
this.config.sendSip(ok.serialize(), { address: rinfo.address, port: rinfo.port });
|
||||
|
||||
// Forward to DTMF handler (if attached).
|
||||
this.onInfoReceived?.(msg);
|
||||
}
|
||||
// Other in-dialog requests (re-INVITE, etc.) can be handled here in the future.
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Send BYE / CANCEL
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Send BYE (if confirmed) or CANCEL (if early) to tear down this leg. */
|
||||
sendHangup(): void {
|
||||
if (!this.dialog) return;
|
||||
|
||||
if (this.dialog.state === 'confirmed') {
|
||||
const bye = this.dialog.createRequest('BYE');
|
||||
this.config.sendSip(bye.serialize(), this.config.sipTarget);
|
||||
this.config.log(`[sip-leg:${this.id}] BYE sent`);
|
||||
} else if (this.dialog.state === 'early' && this.invite) {
|
||||
const cancel = this.dialog.createCancel(this.invite);
|
||||
this.config.sendSip(cancel.serialize(), this.config.sipTarget);
|
||||
this.config.log(`[sip-leg:${this.id}] CANCEL sent`);
|
||||
}
|
||||
|
||||
this.state = 'terminating';
|
||||
this.dialog.terminate();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// RTP
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
sendRtp(data: Buffer): void {
|
||||
if (!this.remoteMedia) return;
|
||||
this.rtpSock.send(data, this.remoteMedia.port, this.remoteMedia.address);
|
||||
this.pktSent++;
|
||||
}
|
||||
|
||||
/** Send a 1-byte UDP packet to punch NAT hole. */
|
||||
private primeRtp(peer: IEndpoint): void {
|
||||
try {
|
||||
this.rtpSock.send(Buffer.alloc(1), peer.port, peer.address);
|
||||
this.config.log(`[sip-leg:${this.id}] RTP primed -> ${peer.address}:${peer.port}`);
|
||||
} catch (e: any) {
|
||||
this.config.log(`[sip-leg:${this.id}] prime error: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Silence stream (provider quirks)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private startSilence(): void {
|
||||
if (this.silenceTimer) return;
|
||||
const quirks = this.config.provider?.quirks;
|
||||
if (!quirks?.earlyMediaSilence) return;
|
||||
if (!this.remoteMedia) return;
|
||||
|
||||
const PT = quirks.silencePayloadType ?? 9;
|
||||
const MAX = quirks.silenceMaxPackets ?? 250;
|
||||
const PAYLOAD = 160;
|
||||
let seq = Math.floor(Math.random() * 0xffff);
|
||||
let rtpTs = Math.floor(Math.random() * 0xffffffff);
|
||||
let count = 0;
|
||||
|
||||
// Use proper silence byte for the codec (0x00 is NOT silence for most codecs).
|
||||
const silenceByte = silenceByteForPT(PT);
|
||||
|
||||
this.silenceTimer = setInterval(() => {
|
||||
if (this.pktReceived > 0 || count >= MAX) {
|
||||
clearInterval(this.silenceTimer!);
|
||||
this.silenceTimer = null;
|
||||
this.config.log(`[sip-leg:${this.id}] silence stop after ${count} pkts`);
|
||||
return;
|
||||
}
|
||||
const pkt = Buffer.alloc(12 + PAYLOAD, silenceByte);
|
||||
// RTP header (first 12 bytes).
|
||||
pkt[0] = 0x80;
|
||||
pkt[1] = PT;
|
||||
pkt.writeUInt16BE(seq & 0xffff, 2);
|
||||
pkt.writeUInt32BE(rtpTs >>> 0, 4);
|
||||
pkt.writeUInt32BE(this.ssrc >>> 0, 8); // stable SSRC
|
||||
this.rtpSock.send(pkt, this.remoteMedia!.port, this.remoteMedia!.address);
|
||||
seq++;
|
||||
rtpTs += PAYLOAD;
|
||||
count++;
|
||||
}, 20);
|
||||
|
||||
this.config.log(`[sip-leg:${this.id}] silence start -> ${this.remoteMedia.address}:${this.remoteMedia.port} (ssrc=${this.ssrc})`);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Lifecycle
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
teardown(): void {
|
||||
if (this.silenceTimer) {
|
||||
clearInterval(this.silenceTimer);
|
||||
this.silenceTimer = null;
|
||||
}
|
||||
this.state = 'terminated';
|
||||
if (this.dialog) this.dialog.terminate();
|
||||
// Note: RTP socket is NOT closed here — the RtpPortPool manages that.
|
||||
}
|
||||
|
||||
getStatus(): ILegStatus {
|
||||
return {
|
||||
id: this.id,
|
||||
type: this.type,
|
||||
state: this.state,
|
||||
remoteMedia: this.remoteMedia,
|
||||
rtpPort: this.rtpPort,
|
||||
pktSent: this.pktSent,
|
||||
pktReceived: this.pktReceived,
|
||||
codec: codecDisplayName(this.codec),
|
||||
transcoding: this.transcoder !== null,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: proper silence byte per codec
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Return the byte value representing digital silence for a given RTP payload type. */
|
||||
function silenceByteForPT(pt: number): number {
|
||||
switch (pt) {
|
||||
case 0: return 0xFF; // PCMU: μ-law silence (zero amplitude)
|
||||
case 8: return 0xD5; // PCMA: A-law silence (zero amplitude)
|
||||
case 9: return 0xD5; // G.722: sub-band silence (zero amplitude)
|
||||
default: return 0xFF; // safe default
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: ACK for non-2xx (same transaction)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function buildNon2xxAck(originalInvite: SipMessage, response: SipMessage): SipMessage {
|
||||
const via = originalInvite.getHeader('Via') || '';
|
||||
const from = originalInvite.getHeader('From') || '';
|
||||
const toFromResponse = response.getHeader('To') || '';
|
||||
const callId = originalInvite.callId;
|
||||
const cseqNum = parseInt((originalInvite.getHeader('CSeq') || '1').split(/\s+/)[0], 10);
|
||||
|
||||
return new SipMessage(
|
||||
`ACK ${originalInvite.requestUri} SIP/2.0`,
|
||||
[
|
||||
['Via', via],
|
||||
['From', from],
|
||||
['To', toFromResponse],
|
||||
['Call-ID', callId],
|
||||
['CSeq', `${cseqNum} ACK`],
|
||||
['Max-Forwards', '70'],
|
||||
['Content-Length', '0'],
|
||||
],
|
||||
'',
|
||||
);
|
||||
}
|
||||
@@ -1,336 +0,0 @@
|
||||
/**
|
||||
* SystemLeg — virtual ILeg for IVR menus and voicemail.
|
||||
*
|
||||
* Plugs into the Call hub exactly like SipLeg or WebRtcLeg:
|
||||
* - Receives caller audio via sendRtp() (called by Call.forwardRtp)
|
||||
* - Plays prompts by firing onRtpReceived (picked up by Call.forwardRtp → caller's leg)
|
||||
* - Detects DTMF from caller's audio (RFC 2833 telephone-event)
|
||||
* - Records caller's audio to WAV files (for voicemail)
|
||||
*
|
||||
* No UDP socket or SIP dialog needed — purely virtual.
|
||||
*/
|
||||
|
||||
import { Buffer } from 'node:buffer';
|
||||
import type dgram from 'node:dgram';
|
||||
import type { IEndpoint } from '../sip/index.ts';
|
||||
import type { SipMessage } from '../sip/index.ts';
|
||||
import type { SipDialog } from '../sip/index.ts';
|
||||
import type { IRtpTranscoder } from '../codec.ts';
|
||||
import type { ILeg } from './leg.ts';
|
||||
import type { TLegState, TLegType, ILegStatus } from './types.ts';
|
||||
import { DtmfDetector } from './dtmf-detector.ts';
|
||||
import type { IDtmfDigit } from './dtmf-detector.ts';
|
||||
import { AudioRecorder } from './audio-recorder.ts';
|
||||
import type { IRecordingResult } from './audio-recorder.ts';
|
||||
import { PromptCache, playPromptG722, playPromptOpus } from './prompt-cache.ts';
|
||||
import type { ICachedPrompt } from './prompt-cache.ts';
|
||||
import { buildRtpHeader, rtpClockIncrement } from './leg.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type TSystemLegMode = 'ivr' | 'voicemail-greeting' | 'voicemail-recording' | 'idle';
|
||||
|
||||
export interface ISystemLegConfig {
|
||||
/** Logging function. */
|
||||
log: (msg: string) => void;
|
||||
/** The prompt cache for TTS playback. */
|
||||
promptCache: PromptCache;
|
||||
/**
|
||||
* Codec payload type used by the caller's leg.
|
||||
* Determines whether G.722 (9) or Opus (111) frames are played.
|
||||
* Default: 9 (G.722, typical for SIP callers).
|
||||
*/
|
||||
callerCodecPt?: number;
|
||||
/** Called when a DTMF digit is detected. */
|
||||
onDtmfDigit?: (digit: IDtmfDigit) => void;
|
||||
/** Called when a voicemail recording is complete. */
|
||||
onRecordingComplete?: (result: IRecordingResult) => void;
|
||||
/** Called when the SystemLeg wants to signal an IVR action. */
|
||||
onAction?: (action: string, data?: any) => void;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// SystemLeg
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class SystemLeg implements ILeg {
|
||||
readonly id: string;
|
||||
readonly type: TLegType = 'system';
|
||||
state: TLegState = 'connected'; // Immediately "connected" — no setup phase.
|
||||
|
||||
/** Current operating mode. */
|
||||
mode: TSystemLegMode = 'idle';
|
||||
|
||||
// --- ILeg required fields (virtual — no real network resources) ---
|
||||
readonly sipCallId: string;
|
||||
readonly rtpPort: number | null = null;
|
||||
readonly rtpSock: dgram.Socket | null = null;
|
||||
remoteMedia: IEndpoint | null = null;
|
||||
codec: number | null = null;
|
||||
transcoder: IRtpTranscoder | null = null;
|
||||
pktSent = 0;
|
||||
pktReceived = 0;
|
||||
readonly dialog: SipDialog | null = null;
|
||||
|
||||
/**
|
||||
* Set by Call.addLeg() — firing this injects audio into the Call hub,
|
||||
* which forwards it to the caller's leg.
|
||||
*/
|
||||
onRtpReceived: ((data: Buffer) => void) | null = null;
|
||||
|
||||
// --- Internal components ---
|
||||
private dtmfDetector: DtmfDetector;
|
||||
private recorder: AudioRecorder | null = null;
|
||||
private promptCache: PromptCache;
|
||||
private promptCancel: (() => void) | null = null;
|
||||
private callerCodecPt: number;
|
||||
private log: (msg: string) => void;
|
||||
readonly config: ISystemLegConfig;
|
||||
|
||||
/** Stable SSRC for all prompt playback (random, stays constant for the leg's lifetime). */
|
||||
private ssrc: number;
|
||||
|
||||
/** Sequence/timestamp counters for Opus prompt playback (shared for seamless transitions). */
|
||||
private opusCounters = { seq: 0, ts: 0 };
|
||||
|
||||
constructor(id: string, config: ISystemLegConfig) {
|
||||
this.id = id;
|
||||
this.sipCallId = `system-${id}`; // Virtual Call-ID — not a real SIP dialog.
|
||||
this.config = config;
|
||||
this.log = config.log;
|
||||
this.promptCache = config.promptCache;
|
||||
this.callerCodecPt = config.callerCodecPt ?? 9; // Default G.722
|
||||
|
||||
this.ssrc = (Math.random() * 0xffffffff) >>> 0;
|
||||
this.opusCounters.seq = Math.floor(Math.random() * 0xffff);
|
||||
this.opusCounters.ts = Math.floor(Math.random() * 0xffffffff);
|
||||
|
||||
// Initialize DTMF detector.
|
||||
this.dtmfDetector = new DtmfDetector(this.log);
|
||||
this.dtmfDetector.onDigit = (digit) => {
|
||||
this.log(`[system-leg:${this.id}] DTMF '${digit.digit}' (${digit.source})`);
|
||||
this.config.onDtmfDigit?.(digit);
|
||||
};
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// ILeg: sendRtp — receives caller's audio from the Call hub
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Called by the Call hub (via forwardRtp) to deliver the caller's audio
|
||||
* to this leg. We use this for DTMF detection and recording.
|
||||
*/
|
||||
sendRtp(data: Buffer): void {
|
||||
this.pktReceived++;
|
||||
|
||||
// Feed DTMF detector (it checks PT internally, ignores non-101 packets).
|
||||
this.dtmfDetector.processRtp(data);
|
||||
|
||||
// Feed recorder if active.
|
||||
if (this.mode === 'voicemail-recording' && this.recorder) {
|
||||
this.recorder.processRtp(data);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// ILeg: handleSipMessage — handles SIP INFO for DTMF
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Handle a SIP message routed to this leg. Only SIP INFO (DTMF) is relevant.
|
||||
*/
|
||||
handleSipMessage(msg: SipMessage, _rinfo: IEndpoint): void {
|
||||
if (msg.method === 'INFO') {
|
||||
this.dtmfDetector.processSipInfo(msg);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Prompt playback
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Play a cached prompt by ID.
|
||||
* The audio is injected into the Call hub via onRtpReceived.
|
||||
*
|
||||
* @param promptId - ID of the prompt in the PromptCache
|
||||
* @param onDone - called when playback completes (not on cancel)
|
||||
* @returns true if playback started, false if prompt not found
|
||||
*/
|
||||
playPrompt(promptId: string, onDone?: () => void): boolean {
|
||||
const prompt = this.promptCache.get(promptId);
|
||||
if (!prompt) {
|
||||
this.log(`[system-leg:${this.id}] prompt "${promptId}" not found`);
|
||||
onDone?.();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Cancel any in-progress playback.
|
||||
this.cancelPrompt();
|
||||
|
||||
this.log(`[system-leg:${this.id}] playing prompt "${promptId}" (${prompt.durationMs}ms)`);
|
||||
|
||||
// Select G.722 or Opus frames based on caller codec.
|
||||
if (this.callerCodecPt === 111) {
|
||||
// WebRTC caller: play Opus frames.
|
||||
this.promptCancel = playPromptOpus(
|
||||
prompt,
|
||||
(pkt) => this.injectPacket(pkt),
|
||||
this.ssrc,
|
||||
this.opusCounters,
|
||||
() => {
|
||||
this.promptCancel = null;
|
||||
onDone?.();
|
||||
},
|
||||
);
|
||||
} else {
|
||||
// SIP caller: play G.722 frames (works for all SIP codecs since the
|
||||
// SipLeg's RTP socket sends whatever we give it — the provider's
|
||||
// media endpoint accepts the codec negotiated in the SDP).
|
||||
this.promptCancel = playPromptG722(
|
||||
prompt,
|
||||
(pkt) => this.injectPacket(pkt),
|
||||
this.ssrc,
|
||||
() => {
|
||||
this.promptCancel = null;
|
||||
onDone?.();
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
return this.promptCancel !== null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Play a sequence of prompts, one after another.
|
||||
*/
|
||||
playPromptSequence(promptIds: string[], onDone?: () => void): void {
|
||||
let index = 0;
|
||||
const playNext = () => {
|
||||
if (index >= promptIds.length) {
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
const id = promptIds[index++];
|
||||
if (!this.playPrompt(id, playNext)) {
|
||||
// Prompt not found — skip and play next.
|
||||
playNext();
|
||||
}
|
||||
};
|
||||
playNext();
|
||||
}
|
||||
|
||||
/** Cancel any in-progress prompt playback. */
|
||||
cancelPrompt(): void {
|
||||
if (this.promptCancel) {
|
||||
this.promptCancel();
|
||||
this.promptCancel = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Whether a prompt is currently playing. */
|
||||
get isPlaying(): boolean {
|
||||
return this.promptCancel !== null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inject an RTP packet into the Call hub.
|
||||
* This simulates "receiving" audio on this leg — the hub
|
||||
* will forward it to the caller's leg.
|
||||
*/
|
||||
private injectPacket(pkt: Buffer): void {
|
||||
this.pktSent++;
|
||||
this.onRtpReceived?.(pkt);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Recording
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Start recording the caller's audio.
|
||||
* @param outputDir - directory to write the WAV file
|
||||
* @param fileId - unique ID for the file name
|
||||
*/
|
||||
async startRecording(outputDir: string, fileId?: string): Promise<void> {
|
||||
if (this.recorder) {
|
||||
await this.recorder.stop();
|
||||
}
|
||||
|
||||
this.recorder = new AudioRecorder({
|
||||
outputDir,
|
||||
log: this.log,
|
||||
maxDurationSec: 120,
|
||||
silenceTimeoutSec: 5,
|
||||
});
|
||||
|
||||
this.recorder.onStopped = (result) => {
|
||||
this.log(`[system-leg:${this.id}] recording auto-stopped (${result.stopReason})`);
|
||||
this.config.onRecordingComplete?.(result);
|
||||
};
|
||||
|
||||
this.mode = 'voicemail-recording';
|
||||
await this.recorder.start(fileId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop recording and finalize the WAV file.
|
||||
*/
|
||||
async stopRecording(): Promise<IRecordingResult | null> {
|
||||
if (!this.recorder) return null;
|
||||
|
||||
const result = await this.recorder.stop();
|
||||
this.recorder = null;
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Cancel recording — stops and deletes the file. */
|
||||
async cancelRecording(): Promise<void> {
|
||||
if (this.recorder) {
|
||||
await this.recorder.cancel();
|
||||
this.recorder = null;
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Lifecycle
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Release all resources. */
|
||||
teardown(): void {
|
||||
this.cancelPrompt();
|
||||
|
||||
// Stop recording gracefully.
|
||||
if (this.recorder && this.recorder.state === 'recording') {
|
||||
this.recorder.stop().then((result) => {
|
||||
this.config.onRecordingComplete?.(result);
|
||||
});
|
||||
this.recorder = null;
|
||||
}
|
||||
|
||||
this.dtmfDetector.destroy();
|
||||
this.state = 'terminated';
|
||||
this.mode = 'idle';
|
||||
this.onRtpReceived = null;
|
||||
|
||||
this.log(`[system-leg:${this.id}] torn down`);
|
||||
}
|
||||
|
||||
/** Status snapshot for the dashboard. */
|
||||
getStatus(): ILegStatus {
|
||||
return {
|
||||
id: this.id,
|
||||
type: this.type,
|
||||
state: this.state,
|
||||
remoteMedia: null,
|
||||
rtpPort: null,
|
||||
pktSent: this.pktSent,
|
||||
pktReceived: this.pktReceived,
|
||||
codec: this.callerCodecPt === 111 ? 'Opus' : 'G.722',
|
||||
transcoding: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
/**
|
||||
* Hub model type definitions — Call, Leg, and status types.
|
||||
*/
|
||||
|
||||
import type { IEndpoint } from '../sip/index.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// State types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type TCallState =
|
||||
| 'setting-up'
|
||||
| 'ringing'
|
||||
| 'connected'
|
||||
| 'on-hold'
|
||||
| 'voicemail'
|
||||
| 'ivr'
|
||||
| 'transferring'
|
||||
| 'terminating'
|
||||
| 'terminated';
|
||||
|
||||
export type TLegState =
|
||||
| 'inviting'
|
||||
| 'ringing'
|
||||
| 'connected'
|
||||
| 'on-hold'
|
||||
| 'terminating'
|
||||
| 'terminated';
|
||||
|
||||
export type TLegType = 'sip-device' | 'sip-provider' | 'webrtc' | 'system';
|
||||
|
||||
export type TCallDirection = 'inbound' | 'outbound' | 'internal';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Status interfaces (for frontend dashboard)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ILegStatus {
|
||||
id: string;
|
||||
type: TLegType;
|
||||
state: TLegState;
|
||||
remoteMedia: IEndpoint | null;
|
||||
rtpPort: number | null;
|
||||
pktSent: number;
|
||||
pktReceived: number;
|
||||
codec: string | null;
|
||||
transcoding: boolean;
|
||||
}
|
||||
|
||||
export interface ICallStatus {
|
||||
id: string;
|
||||
state: TCallState;
|
||||
direction: TCallDirection;
|
||||
callerNumber: string | null;
|
||||
calleeNumber: string | null;
|
||||
providerUsed: string | null;
|
||||
createdAt: number;
|
||||
duration: number;
|
||||
legs: ILegStatus[];
|
||||
}
|
||||
|
||||
export interface ICallHistoryEntry {
|
||||
id: string;
|
||||
direction: TCallDirection;
|
||||
callerNumber: string | null;
|
||||
calleeNumber: string | null;
|
||||
providerUsed: string | null;
|
||||
startedAt: number;
|
||||
duration: number;
|
||||
}
|
||||
@@ -1,163 +0,0 @@
|
||||
/**
|
||||
* Streaming WAV file writer — opens a file, writes a placeholder header,
|
||||
* appends raw PCM data in chunks, and finalizes (patches sizes) on close.
|
||||
*
|
||||
* Produces standard RIFF/WAVE format compatible with the WAV parser
|
||||
* in announcement.ts (extractPcmFromWav).
|
||||
*/
|
||||
|
||||
import fs from 'node:fs';
|
||||
import { Buffer } from 'node:buffer';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface IWavWriterOptions {
|
||||
/** Full path to the output WAV file. */
|
||||
filePath: string;
|
||||
/** Sample rate in Hz (e.g. 16000). */
|
||||
sampleRate: number;
|
||||
/** Number of channels (default 1 = mono). */
|
||||
channels?: number;
|
||||
/** Bits per sample (default 16). */
|
||||
bitsPerSample?: number;
|
||||
}
|
||||
|
||||
export interface IWavWriterResult {
|
||||
/** Full path to the WAV file. */
|
||||
filePath: string;
|
||||
/** Total duration in milliseconds. */
|
||||
durationMs: number;
|
||||
/** Sample rate of the output file. */
|
||||
sampleRate: number;
|
||||
/** Total number of audio samples written. */
|
||||
totalSamples: number;
|
||||
/** File size in bytes. */
|
||||
fileSize: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WAV header constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Standard WAV header size: RIFF(12) + fmt(24) + data-header(8) = 44 bytes. */
|
||||
const HEADER_SIZE = 44;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WavWriter
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class WavWriter {
|
||||
private fd: number | null = null;
|
||||
private totalDataBytes = 0;
|
||||
private closed = false;
|
||||
|
||||
private filePath: string;
|
||||
private sampleRate: number;
|
||||
private channels: number;
|
||||
private bitsPerSample: number;
|
||||
|
||||
constructor(options: IWavWriterOptions) {
|
||||
this.filePath = options.filePath;
|
||||
this.sampleRate = options.sampleRate;
|
||||
this.channels = options.channels ?? 1;
|
||||
this.bitsPerSample = options.bitsPerSample ?? 16;
|
||||
}
|
||||
|
||||
/** Open the file and write a placeholder 44-byte WAV header. */
|
||||
open(): void {
|
||||
if (this.fd !== null) throw new Error('WavWriter already open');
|
||||
|
||||
this.fd = fs.openSync(this.filePath, 'w');
|
||||
this.totalDataBytes = 0;
|
||||
this.closed = false;
|
||||
|
||||
// Write 44 bytes of zeros as placeholder — patched in close().
|
||||
const placeholder = Buffer.alloc(HEADER_SIZE);
|
||||
fs.writeSync(this.fd, placeholder, 0, HEADER_SIZE, 0);
|
||||
}
|
||||
|
||||
/** Append raw 16-bit little-endian PCM samples. */
|
||||
write(pcm: Buffer): void {
|
||||
if (this.fd === null || this.closed) return;
|
||||
if (pcm.length === 0) return;
|
||||
|
||||
fs.writeSync(this.fd, pcm, 0, pcm.length);
|
||||
this.totalDataBytes += pcm.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalize: rewrite the RIFF and data chunk sizes in the header, close the file.
|
||||
* Returns metadata about the written WAV.
|
||||
*/
|
||||
close(): IWavWriterResult {
|
||||
if (this.fd === null || this.closed) {
|
||||
return {
|
||||
filePath: this.filePath,
|
||||
durationMs: 0,
|
||||
sampleRate: this.sampleRate,
|
||||
totalSamples: 0,
|
||||
fileSize: HEADER_SIZE,
|
||||
};
|
||||
}
|
||||
|
||||
this.closed = true;
|
||||
|
||||
const blockAlign = this.channels * (this.bitsPerSample / 8);
|
||||
const byteRate = this.sampleRate * blockAlign;
|
||||
const fileSize = HEADER_SIZE + this.totalDataBytes;
|
||||
|
||||
// Build the complete 44-byte header.
|
||||
const hdr = Buffer.alloc(HEADER_SIZE);
|
||||
let offset = 0;
|
||||
|
||||
// RIFF chunk descriptor.
|
||||
hdr.write('RIFF', offset); offset += 4;
|
||||
hdr.writeUInt32LE(fileSize - 8, offset); offset += 4; // ChunkSize = fileSize - 8
|
||||
hdr.write('WAVE', offset); offset += 4;
|
||||
|
||||
// fmt sub-chunk.
|
||||
hdr.write('fmt ', offset); offset += 4;
|
||||
hdr.writeUInt32LE(16, offset); offset += 4; // Subchunk1Size (PCM = 16)
|
||||
hdr.writeUInt16LE(1, offset); offset += 2; // AudioFormat (1 = PCM)
|
||||
hdr.writeUInt16LE(this.channels, offset); offset += 2;
|
||||
hdr.writeUInt32LE(this.sampleRate, offset); offset += 4;
|
||||
hdr.writeUInt32LE(byteRate, offset); offset += 4;
|
||||
hdr.writeUInt16LE(blockAlign, offset); offset += 2;
|
||||
hdr.writeUInt16LE(this.bitsPerSample, offset); offset += 2;
|
||||
|
||||
// data sub-chunk.
|
||||
hdr.write('data', offset); offset += 4;
|
||||
hdr.writeUInt32LE(this.totalDataBytes, offset); offset += 4;
|
||||
|
||||
// Patch the header at the beginning of the file.
|
||||
fs.writeSync(this.fd, hdr, 0, HEADER_SIZE, 0);
|
||||
fs.closeSync(this.fd);
|
||||
this.fd = null;
|
||||
|
||||
const bytesPerSample = this.bitsPerSample / 8;
|
||||
const totalSamples = Math.floor(this.totalDataBytes / (bytesPerSample * this.channels));
|
||||
const durationMs = (totalSamples / this.sampleRate) * 1000;
|
||||
|
||||
return {
|
||||
filePath: this.filePath,
|
||||
durationMs: Math.round(durationMs),
|
||||
sampleRate: this.sampleRate,
|
||||
totalSamples,
|
||||
fileSize,
|
||||
};
|
||||
}
|
||||
|
||||
/** Current recording duration in milliseconds. */
|
||||
get durationMs(): number {
|
||||
const bytesPerSample = this.bitsPerSample / 8;
|
||||
const totalSamples = Math.floor(this.totalDataBytes / (bytesPerSample * this.channels));
|
||||
return (totalSamples / this.sampleRate) * 1000;
|
||||
}
|
||||
|
||||
/** Whether the writer is still open and accepting data. */
|
||||
get isOpen(): boolean {
|
||||
return this.fd !== null && !this.closed;
|
||||
}
|
||||
}
|
||||
@@ -1,417 +0,0 @@
|
||||
/**
|
||||
* WebRtcLeg — a WebRTC connection from the Call hub to a browser client.
|
||||
*
|
||||
* Wraps a werift RTCPeerConnection and handles:
|
||||
* - WebRTC offer/answer/ICE negotiation
|
||||
* - Opus <-> G.722/PCMU/PCMA transcoding via Rust IPC
|
||||
* - RTP header rebuilding with correct PT, timestamp, SSRC
|
||||
*/
|
||||
|
||||
import dgram from 'node:dgram';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import { WebSocket } from 'ws';
|
||||
import type { IEndpoint } from '../sip/index.ts';
|
||||
import type { TLegState, ILegStatus } from './types.ts';
|
||||
import type { ILeg } from './leg.ts';
|
||||
import { rtpClockIncrement, buildRtpHeader, codecDisplayName } from './leg.ts';
|
||||
import { createTranscoder, OPUS_PT } from '../codec.ts';
|
||||
import type { IRtpTranscoder } from '../codec.ts';
|
||||
import { createSession, destroySession } from '../opusbridge.ts';
|
||||
import type { SipDialog } from '../sip/index.ts';
|
||||
import type { SipMessage } from '../sip/index.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebRtcLeg config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface IWebRtcLegConfig {
|
||||
/** The browser's WebSocket connection. */
|
||||
ws: WebSocket;
|
||||
/** The browser's session ID. */
|
||||
sessionId: string;
|
||||
/** RTP port and socket (pre-allocated from the pool). */
|
||||
rtpPort: number;
|
||||
rtpSock: dgram.Socket;
|
||||
/** Logging function. */
|
||||
log: (msg: string) => void;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebRtcLeg
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class WebRtcLeg implements ILeg {
|
||||
readonly id: string;
|
||||
readonly type = 'webrtc' as const;
|
||||
state: TLegState = 'inviting';
|
||||
readonly sessionId: string;
|
||||
|
||||
/** The werift RTCPeerConnection instance. */
|
||||
private pc: any = null;
|
||||
|
||||
/** RTP socket for bridging to SIP. */
|
||||
readonly rtpSock: dgram.Socket;
|
||||
readonly rtpPort: number;
|
||||
|
||||
/** Remote media endpoint (the other side of the bridge, set by Call). */
|
||||
remoteMedia: IEndpoint | null = null;
|
||||
|
||||
/** Negotiated WebRTC codec payload type. */
|
||||
codec: number | null = null;
|
||||
|
||||
/** Transcoders for WebRTC <-> SIP conversion. */
|
||||
transcoder: IRtpTranscoder | null = null; // used by Call for fan-out
|
||||
private toSipTranscoder: IRtpTranscoder | null = null;
|
||||
private fromSipTranscoder: IRtpTranscoder | null = null;
|
||||
|
||||
/** RTP counters for outgoing (to SIP) direction. */
|
||||
private toSipSeq = 0;
|
||||
private toSipTs = 0;
|
||||
private toSipSsrc = (Math.random() * 0xffffffff) >>> 0;
|
||||
|
||||
/** RTP counters for incoming (from SIP) direction.
|
||||
* Initialized to random values so announcements and provider audio share
|
||||
* a continuous sequence — prevents the browser jitter buffer from discarding
|
||||
* packets after the announcement→provider transition. */
|
||||
readonly fromSipCounters = {
|
||||
seq: Math.floor(Math.random() * 0xffff),
|
||||
ts: Math.floor(Math.random() * 0xffffffff),
|
||||
};
|
||||
fromSipSsrc = (Math.random() * 0xffffffff) >>> 0;
|
||||
|
||||
/** Packet counters. */
|
||||
pktSent = 0;
|
||||
pktReceived = 0;
|
||||
|
||||
/** Callback set by Call. */
|
||||
onRtpReceived: ((data: Buffer) => void) | null = null;
|
||||
|
||||
/** Callback to send transcoded RTP to the provider via the SipLeg's socket.
|
||||
* Set by CallManager when the bridge is established. If null, falls back to own rtpSock. */
|
||||
onSendToProvider: ((data: Buffer, dest: IEndpoint) => void) | null = null;
|
||||
|
||||
/** Lifecycle callbacks. */
|
||||
onConnected: ((leg: WebRtcLeg) => void) | null = null;
|
||||
onTerminated: ((leg: WebRtcLeg) => void) | null = null;
|
||||
|
||||
/** Cancel handle for an in-progress announcement. */
|
||||
announcementCancel: (() => void) | null = null;
|
||||
|
||||
private ws: WebSocket;
|
||||
private config: IWebRtcLegConfig;
|
||||
private pendingIceCandidates: any[] = [];
|
||||
|
||||
// SipDialog is not applicable for WebRTC legs.
|
||||
readonly dialog: SipDialog | null = null;
|
||||
readonly sipCallId: string;
|
||||
|
||||
constructor(id: string, config: IWebRtcLegConfig) {
|
||||
this.id = id;
|
||||
this.sessionId = config.sessionId;
|
||||
this.ws = config.ws;
|
||||
this.rtpSock = config.rtpSock;
|
||||
this.rtpPort = config.rtpPort;
|
||||
this.config = config;
|
||||
this.sipCallId = `webrtc-${id}`;
|
||||
|
||||
// Log RTP arriving on this socket (symmetric RTP from provider).
|
||||
// Audio forwarding is handled by the Call hub: SipLeg → forwardRtp → WebRtcLeg.sendRtp.
|
||||
// We do NOT transcode here to avoid double-processing (the SipLeg also receives these packets).
|
||||
let sipRxCount = 0;
|
||||
this.rtpSock.on('message', (data: Buffer) => {
|
||||
sipRxCount++;
|
||||
if (sipRxCount === 1 || sipRxCount === 50 || sipRxCount % 500 === 0) {
|
||||
this.config.log(`[webrtc-leg:${this.id}] SIP->browser rtp #${sipRxCount} (${data.length}b) [symmetric, ignored]`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// WebRTC offer/answer
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Handle a WebRTC offer from the browser. Creates the PeerConnection,
|
||||
* sets remote offer, creates answer, and sends it back.
|
||||
*/
|
||||
async handleOffer(offerSdp: string): Promise<void> {
|
||||
this.config.log(`[webrtc-leg:${this.id}] received offer`);
|
||||
|
||||
try {
|
||||
const werift = await import('werift');
|
||||
|
||||
this.pc = new werift.RTCPeerConnection({ iceServers: [] });
|
||||
|
||||
// Add sendrecv transceiver before setRemoteDescription.
|
||||
this.pc.addTransceiver('audio', { direction: 'sendrecv' });
|
||||
|
||||
// Handle incoming audio from browser.
|
||||
this.pc.ontrack = (event: any) => {
|
||||
const track = event.track;
|
||||
this.config.log(`[webrtc-leg:${this.id}] got track: ${track.kind}`);
|
||||
|
||||
let rxCount = 0;
|
||||
track.onReceiveRtp.subscribe((rtp: any) => {
|
||||
if (!this.remoteMedia) return;
|
||||
rxCount++;
|
||||
if (rxCount === 1 || rxCount === 50 || rxCount % 500 === 0) {
|
||||
this.config.log(`[webrtc-leg:${this.id}] browser->SIP rtp #${rxCount}`);
|
||||
}
|
||||
|
||||
this.forwardToSip(rtp, rxCount);
|
||||
});
|
||||
};
|
||||
|
||||
// ICE candidate handling.
|
||||
this.pc.onicecandidate = (candidate: any) => {
|
||||
if (candidate) {
|
||||
const json = candidate.toJSON?.() || candidate;
|
||||
this.wsSend({ type: 'webrtc-ice', sessionId: this.sessionId, candidate: json });
|
||||
}
|
||||
};
|
||||
|
||||
this.pc.onconnectionstatechange = () => {
|
||||
this.config.log(`[webrtc-leg:${this.id}] connection state: ${this.pc.connectionState}`);
|
||||
if (this.pc.connectionState === 'connected') {
|
||||
this.state = 'connected';
|
||||
this.onConnected?.(this);
|
||||
} else if (this.pc.connectionState === 'failed' || this.pc.connectionState === 'closed') {
|
||||
this.state = 'terminated';
|
||||
this.onTerminated?.(this);
|
||||
}
|
||||
};
|
||||
|
||||
if (this.pc.oniceconnectionstatechange !== undefined) {
|
||||
this.pc.oniceconnectionstatechange = () => {
|
||||
this.config.log(`[webrtc-leg:${this.id}] ICE state: ${this.pc.iceConnectionState}`);
|
||||
};
|
||||
}
|
||||
|
||||
// Set remote offer and create answer.
|
||||
await this.pc.setRemoteDescription({ type: 'offer', sdp: offerSdp });
|
||||
const answer = await this.pc.createAnswer();
|
||||
await this.pc.setLocalDescription(answer);
|
||||
|
||||
const sdp: string = this.pc.localDescription!.sdp;
|
||||
|
||||
// Detect negotiated codec.
|
||||
const mAudio = sdp.match(/m=audio\s+\d+\s+\S+\s+(\d+)/);
|
||||
if (mAudio) {
|
||||
this.codec = parseInt(mAudio[1], 10);
|
||||
this.config.log(`[webrtc-leg:${this.id}] negotiated audio PT=${this.codec}`);
|
||||
}
|
||||
|
||||
// Extract sender SSRC from SDP.
|
||||
const ssrcMatch = sdp.match(/a=ssrc:(\d+)\s/);
|
||||
if (ssrcMatch) {
|
||||
this.fromSipSsrc = parseInt(ssrcMatch[1], 10);
|
||||
}
|
||||
// Also try from sender object.
|
||||
const senders = this.pc.getSenders();
|
||||
if (senders[0]) {
|
||||
const senderSsrc = (senders[0] as any).ssrc ?? (senders[0] as any)._ssrc;
|
||||
if (senderSsrc) this.fromSipSsrc = senderSsrc;
|
||||
}
|
||||
|
||||
// Send answer to browser.
|
||||
this.wsSend({ type: 'webrtc-answer', sessionId: this.sessionId, sdp });
|
||||
this.config.log(`[webrtc-leg:${this.id}] sent answer, rtp port=${this.rtpPort}`);
|
||||
|
||||
// Process buffered ICE candidates.
|
||||
for (const c of this.pendingIceCandidates) {
|
||||
try { await this.pc.addIceCandidate(c); } catch { /* ignore */ }
|
||||
}
|
||||
this.pendingIceCandidates = [];
|
||||
|
||||
} catch (err: any) {
|
||||
this.config.log(`[webrtc-leg:${this.id}] offer error: ${err.message}`);
|
||||
this.wsSend({ type: 'webrtc-error', sessionId: this.sessionId, error: err.message });
|
||||
this.state = 'terminated';
|
||||
this.onTerminated?.(this);
|
||||
}
|
||||
}
|
||||
|
||||
/** Add an ICE candidate from the browser. */
|
||||
async addIceCandidate(candidate: any): Promise<void> {
|
||||
if (!this.pc) {
|
||||
this.pendingIceCandidates.push(candidate);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
if (candidate) await this.pc.addIceCandidate(candidate);
|
||||
} catch (err: any) {
|
||||
this.config.log(`[webrtc-leg:${this.id}] ICE error: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Transcoding setup
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Codec session ID for isolated Rust codec state (unique per leg). */
|
||||
private codecSessionId = `webrtc-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
|
||||
/**
|
||||
* Set up transcoders for bridging between WebRTC and SIP codecs.
|
||||
* Called by the Call when the remote media endpoint is known.
|
||||
* Creates an isolated Rust codec session so concurrent calls don't
|
||||
* corrupt each other's stateful codec state (Opus/G.722 ADPCM).
|
||||
*/
|
||||
async setupTranscoders(sipPT: number): Promise<void> {
|
||||
const webrtcPT = this.codec ?? OPUS_PT;
|
||||
// Create isolated codec session for this leg.
|
||||
await createSession(this.codecSessionId);
|
||||
this.toSipTranscoder = createTranscoder(webrtcPT, sipPT, this.codecSessionId, 'to_sip');
|
||||
this.fromSipTranscoder = createTranscoder(sipPT, webrtcPT, this.codecSessionId, 'to_browser');
|
||||
const mode = this.toSipTranscoder ? `transcoding PT ${webrtcPT}<->${sipPT}` : `pass-through PT ${webrtcPT}`;
|
||||
this.config.log(`[webrtc-leg:${this.id}] ${mode} (session: ${this.codecSessionId})`);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// RTP forwarding
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Forward RTP from SIP side to browser via WebRTC. */
|
||||
private forwardToBrowser(data: Buffer, count: number): void {
|
||||
const sender = this.pc?.getSenders()[0];
|
||||
if (!sender) return;
|
||||
|
||||
if (this.fromSipTranscoder && data.length > 12) {
|
||||
const payload = Buffer.from(data.subarray(12));
|
||||
// Stop announcement if still playing — provider audio takes over.
|
||||
if (this.announcementCancel) {
|
||||
this.announcementCancel();
|
||||
this.announcementCancel = null;
|
||||
}
|
||||
// Capture seq/ts BEFORE async transcode to preserve ordering.
|
||||
const toPT = this.fromSipTranscoder.toPT;
|
||||
const seq = this.fromSipCounters.seq++;
|
||||
const ts = this.fromSipCounters.ts;
|
||||
this.fromSipCounters.ts += rtpClockIncrement(toPT);
|
||||
const result = this.fromSipTranscoder.payload(payload);
|
||||
const sendTranscoded = (transcoded: Buffer) => {
|
||||
if (transcoded.length === 0) return; // transcoding failed
|
||||
try {
|
||||
const hdr = buildRtpHeader(toPT, seq, ts, this.fromSipSsrc, false);
|
||||
const out = Buffer.concat([hdr, transcoded]);
|
||||
const r = sender.sendRtp(out);
|
||||
if (r instanceof Promise) r.catch(() => {});
|
||||
} catch { /* ignore */ }
|
||||
};
|
||||
if (result instanceof Promise) result.then(sendTranscoded).catch(() => {});
|
||||
else sendTranscoded(result);
|
||||
} else if (!this.fromSipTranscoder) {
|
||||
// No transcoder — either same codec or not set up yet.
|
||||
// Only forward if we don't expect transcoding.
|
||||
if (this.codec === null) {
|
||||
try { sender.sendRtp(data); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Forward RTP from browser to SIP side. */
|
||||
private forwardToSip(rtp: any, count: number): void {
|
||||
if (!this.remoteMedia) return;
|
||||
|
||||
if (this.toSipTranscoder) {
|
||||
const payload: Buffer = rtp.payload;
|
||||
if (!payload || payload.length === 0) return;
|
||||
// Capture seq/ts BEFORE async transcode to preserve ordering.
|
||||
const toPT = this.toSipTranscoder.toPT;
|
||||
const seq = this.toSipSeq++;
|
||||
const ts = this.toSipTs;
|
||||
this.toSipTs += rtpClockIncrement(toPT);
|
||||
const result = this.toSipTranscoder.payload(payload);
|
||||
const sendTranscoded = (transcoded: Buffer) => {
|
||||
if (transcoded.length === 0) return; // transcoding failed
|
||||
const hdr = buildRtpHeader(toPT, seq, ts, this.toSipSsrc, false);
|
||||
const out = Buffer.concat([hdr, transcoded]);
|
||||
if (this.onSendToProvider) {
|
||||
this.onSendToProvider(out, this.remoteMedia!);
|
||||
} else {
|
||||
this.rtpSock.send(out, this.remoteMedia!.port, this.remoteMedia!.address);
|
||||
}
|
||||
this.pktSent++;
|
||||
};
|
||||
if (result instanceof Promise) result.then(sendTranscoded).catch(() => {});
|
||||
else sendTranscoded(result);
|
||||
} else if (this.codec === null) {
|
||||
// Same codec (no transcoding needed) — pass through.
|
||||
const raw = rtp.serialize();
|
||||
if (this.onSendToProvider) {
|
||||
this.onSendToProvider(raw, this.remoteMedia);
|
||||
} else {
|
||||
this.rtpSock.send(raw, this.remoteMedia.port, this.remoteMedia.address);
|
||||
}
|
||||
this.pktSent++;
|
||||
}
|
||||
// If codec is set but transcoder is null, drop the packet — transcoder not ready yet.
|
||||
// This prevents raw Opus from being sent to a G.722 endpoint.
|
||||
}
|
||||
|
||||
/**
|
||||
* Send RTP to the browser via WebRTC (used by Call hub for fan-out).
|
||||
* This transcodes and sends through the PeerConnection, NOT to a UDP address.
|
||||
*/
|
||||
sendRtp(data: Buffer): void {
|
||||
this.forwardToBrowser(data, this.pktSent);
|
||||
this.pktSent++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a pre-encoded RTP packet directly to the browser via PeerConnection.
|
||||
* Used for announcements — the packet must already be in the correct codec (Opus).
|
||||
*/
|
||||
sendDirectToBrowser(pkt: Buffer): void {
|
||||
const sender = this.pc?.getSenders()[0];
|
||||
if (!sender) return;
|
||||
try {
|
||||
const r = sender.sendRtp(pkt);
|
||||
if (r instanceof Promise) r.catch(() => {});
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
|
||||
/** No-op: WebRTC legs don't process SIP messages. */
|
||||
handleSipMessage(_msg: SipMessage, _rinfo: IEndpoint): void {
|
||||
// WebRTC legs don't handle SIP messages.
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Lifecycle
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
teardown(): void {
|
||||
this.state = 'terminated';
|
||||
try { this.pc?.close(); } catch { /* ignore */ }
|
||||
this.pc = null;
|
||||
// Destroy the isolated Rust codec session for this leg.
|
||||
destroySession(this.codecSessionId).catch(() => {});
|
||||
// Note: RTP socket is NOT closed here — the RtpPortPool manages that.
|
||||
}
|
||||
|
||||
getStatus(): ILegStatus {
|
||||
return {
|
||||
id: this.id,
|
||||
type: this.type,
|
||||
state: this.state,
|
||||
remoteMedia: this.remoteMedia,
|
||||
rtpPort: this.rtpPort,
|
||||
pktSent: this.pktSent,
|
||||
pktReceived: this.pktReceived,
|
||||
codec: codecDisplayName(this.codec),
|
||||
transcoding: this.toSipTranscoder !== null || this.fromSipTranscoder !== null,
|
||||
};
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private wsSend(data: unknown): void {
|
||||
try {
|
||||
if (this.ws.readyState === WebSocket.OPEN) {
|
||||
this.ws.send(JSON.stringify(data));
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
40
ts/codec.ts
40
ts/codec.ts
@@ -1,40 +0,0 @@
|
||||
/**
|
||||
* Audio codec translation layer for bridging between WebRTC and SIP.
|
||||
*
|
||||
* All actual codec work (Opus, G.722, PCMU, PCMA) is done in Rust via
|
||||
* the smartrust bridge. This module provides the RTP-level transcoding
|
||||
* interface used by the webrtcbridge.
|
||||
*/
|
||||
|
||||
import { Buffer } from 'node:buffer';
|
||||
import { transcode, isCodecReady } from './opusbridge.ts';
|
||||
|
||||
/** Opus dynamic payload type (standard WebRTC assignment). */
|
||||
export const OPUS_PT = 111;
|
||||
|
||||
export interface IRtpTranscoder {
|
||||
/** Transcode an RTP payload. Always async (Rust IPC). */
|
||||
payload(data: Buffer): Promise<Buffer>;
|
||||
fromPT: number;
|
||||
toPT: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a transcoder that converts RTP payloads between two codecs.
|
||||
* Returns null if the codecs are the same or the Rust bridge isn't ready.
|
||||
*
|
||||
* @param sessionId - optional Rust codec session for isolated state per call
|
||||
*/
|
||||
export function createTranscoder(fromPT: number, toPT: number, sessionId?: string, direction?: string): IRtpTranscoder | null {
|
||||
if (fromPT === toPT) return null;
|
||||
if (!isCodecReady()) return null;
|
||||
|
||||
return {
|
||||
fromPT,
|
||||
toPT,
|
||||
async payload(data: Buffer): Promise<Buffer> {
|
||||
const result = await transcode(data, fromPT, toPT, sessionId, direction);
|
||||
return result || Buffer.alloc(0); // return empty on failure — never pass raw codec bytes
|
||||
},
|
||||
};
|
||||
}
|
||||
248
ts/config.ts
248
ts/config.ts
@@ -8,7 +8,16 @@
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import type { IEndpoint } from './sip/index.ts';
|
||||
import type { IVoiceboxConfig } from './voicebox.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shared types (previously in ts/sip/types.ts, now inlined)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface IEndpoint {
|
||||
address: string;
|
||||
port: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Config interfaces
|
||||
@@ -39,6 +48,24 @@ export interface IDeviceConfig {
|
||||
extension: string;
|
||||
}
|
||||
|
||||
export type TIncomingNumberMode = 'single' | 'range' | 'regex';
|
||||
|
||||
export interface IIncomingNumberConfig {
|
||||
id: string;
|
||||
label: string;
|
||||
providerId?: string;
|
||||
mode: TIncomingNumberMode;
|
||||
countryCode?: string;
|
||||
areaCode?: string;
|
||||
localNumber?: string;
|
||||
rangeEnd?: string;
|
||||
pattern?: string;
|
||||
|
||||
// Legacy persisted fields kept for migration compatibility.
|
||||
number?: string;
|
||||
rangeStart?: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Match/Action routing model
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -53,8 +80,11 @@ export interface ISipRouteMatch {
|
||||
direction: 'inbound' | 'outbound';
|
||||
|
||||
/**
|
||||
* Match the dialed/called number (To/Request-URI for inbound DID, dialed digits for outbound).
|
||||
* Supports: exact string, prefix with trailing '*' (e.g. "+4930*"), or regex ("/^\\+49/").
|
||||
* Match the normalized called number.
|
||||
*
|
||||
* Inbound: matches the provider-delivered DID / Request-URI user part.
|
||||
* Outbound: matches the normalized dialed digits.
|
||||
* Supports: exact string, numeric range `start..end`, prefix with trailing '*' (e.g. "+4930*"), or regex ("/^\\+49/").
|
||||
*/
|
||||
numberPattern?: string;
|
||||
|
||||
@@ -80,13 +110,13 @@ export interface ISipRouteAction {
|
||||
|
||||
// --- Inbound actions (IVR / voicemail) ---
|
||||
|
||||
/** Route directly to a voicemail box (skip ringing devices). */
|
||||
/** Voicemail fallback for matched inbound routes. */
|
||||
voicemailBox?: string;
|
||||
|
||||
/** Route to an IVR menu by menu ID (skip ringing devices). */
|
||||
ivrMenuId?: string;
|
||||
|
||||
/** Override no-answer timeout (seconds) before routing to voicemail. */
|
||||
/** Reserved for future no-answer handling. */
|
||||
noAnswerTimeout?: number;
|
||||
|
||||
// --- Outbound actions (provider selection) ---
|
||||
@@ -152,24 +182,13 @@ export interface IContact {
|
||||
// Voicebox configuration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface IVoiceboxConfig {
|
||||
/** Unique ID — typically matches device ID or extension. */
|
||||
id: string;
|
||||
/** Whether this voicebox is active. */
|
||||
enabled: boolean;
|
||||
/** Custom TTS greeting text. */
|
||||
greetingText?: string;
|
||||
/** TTS voice ID (default 'af_bella'). */
|
||||
greetingVoice?: string;
|
||||
/** Path to uploaded WAV greeting (overrides TTS). */
|
||||
greetingWavPath?: string;
|
||||
/** Seconds to wait before routing to voicemail (default 25). */
|
||||
noAnswerTimeoutSec?: number;
|
||||
/** Maximum recording duration in seconds (default 120). */
|
||||
maxRecordingSec?: number;
|
||||
/** Maximum stored messages per box (default 50). */
|
||||
maxMessages?: number;
|
||||
}
|
||||
// Canonical definition lives in voicebox.ts (imported at the top of this
|
||||
// file) — re-exported here so consumers can import everything from a
|
||||
// single config module without pulling in the voicebox implementation.
|
||||
// This used to be a duplicated interface and caused
|
||||
// "number | undefined is not assignable to number" type errors when
|
||||
// passing config.voiceboxes into VoiceboxManager.init().
|
||||
export type { IVoiceboxConfig };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// IVR configuration
|
||||
@@ -233,6 +252,7 @@ export interface IAppConfig {
|
||||
proxy: IProxyConfig;
|
||||
providers: IProviderConfig[];
|
||||
devices: IDeviceConfig[];
|
||||
incomingNumbers?: IIncomingNumberConfig[];
|
||||
routing: IRoutingConfig;
|
||||
contacts: IContact[];
|
||||
voiceboxes?: IVoiceboxConfig[];
|
||||
@@ -287,6 +307,14 @@ export function loadConfig(): IAppConfig {
|
||||
d.extension ??= '100';
|
||||
}
|
||||
|
||||
cfg.incomingNumbers ??= [];
|
||||
for (const incoming of cfg.incomingNumbers) {
|
||||
if (!incoming.id) incoming.id = `incoming-${Date.now()}`;
|
||||
incoming.label ??= incoming.id;
|
||||
incoming.mode ??= incoming.pattern ? 'regex' : incoming.rangeStart || incoming.rangeEnd ? 'range' : 'single';
|
||||
incoming.countryCode ??= incoming.mode === 'regex' ? undefined : '+49';
|
||||
}
|
||||
|
||||
cfg.routing ??= { routes: [] };
|
||||
cfg.routing.routes ??= [];
|
||||
|
||||
@@ -319,175 +347,5 @@ export function loadConfig(): IAppConfig {
|
||||
return cfg;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pattern matching
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Test a value against a pattern string.
|
||||
* - undefined/empty pattern: matches everything (wildcard)
|
||||
* - Prefix: "pattern*" matches values starting with "pattern"
|
||||
* - Regex: "/pattern/" or "/pattern/i" compiles as RegExp
|
||||
* - Otherwise: exact match
|
||||
*/
|
||||
export function matchesPattern(pattern: string | undefined, value: string): boolean {
|
||||
if (!pattern) return true;
|
||||
|
||||
// Prefix match: "+49*"
|
||||
if (pattern.endsWith('*')) {
|
||||
return value.startsWith(pattern.slice(0, -1));
|
||||
}
|
||||
|
||||
// Regex match: "/^\\+49/" or "/pattern/i"
|
||||
if (pattern.startsWith('/')) {
|
||||
const lastSlash = pattern.lastIndexOf('/');
|
||||
if (lastSlash > 0) {
|
||||
const re = new RegExp(pattern.slice(1, lastSlash), pattern.slice(lastSlash + 1));
|
||||
return re.test(value);
|
||||
}
|
||||
}
|
||||
|
||||
// Exact match.
|
||||
return value === pattern;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Route resolution
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Result of resolving an outbound route. */
|
||||
export interface IOutboundRouteResult {
|
||||
provider: IProviderConfig;
|
||||
transformedNumber: string;
|
||||
}
|
||||
|
||||
/** Result of resolving an inbound route. */
|
||||
export interface IInboundRouteResult {
|
||||
/** Device IDs to ring (empty = all devices). */
|
||||
deviceIds: string[];
|
||||
ringBrowsers: boolean;
|
||||
/** If set, route directly to this voicemail box (skip ringing). */
|
||||
voicemailBox?: string;
|
||||
/** If set, route to this IVR menu (skip ringing). */
|
||||
ivrMenuId?: string;
|
||||
/** Override for no-answer timeout in seconds. */
|
||||
noAnswerTimeout?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve which provider to use for an outbound call, and transform the number.
|
||||
*
|
||||
* @param cfg - app config
|
||||
* @param dialedNumber - the number being dialed
|
||||
* @param sourceDeviceId - optional device originating the call
|
||||
* @param isProviderRegistered - callback to check if a provider is currently registered
|
||||
*/
|
||||
export function resolveOutboundRoute(
|
||||
cfg: IAppConfig,
|
||||
dialedNumber: string,
|
||||
sourceDeviceId?: string,
|
||||
isProviderRegistered?: (providerId: string) => boolean,
|
||||
): IOutboundRouteResult | null {
|
||||
const routes = cfg.routing.routes
|
||||
.filter((r) => r.enabled && r.match.direction === 'outbound')
|
||||
.sort((a, b) => b.priority - a.priority);
|
||||
|
||||
for (const route of routes) {
|
||||
const m = route.match;
|
||||
|
||||
if (!matchesPattern(m.numberPattern, dialedNumber)) continue;
|
||||
if (m.sourceDevice && m.sourceDevice !== sourceDeviceId) continue;
|
||||
|
||||
// Find a registered provider (primary + failovers).
|
||||
const candidates = [route.action.provider, ...(route.action.failoverProviders || [])].filter(Boolean) as string[];
|
||||
for (const pid of candidates) {
|
||||
const provider = getProvider(cfg, pid);
|
||||
if (!provider) continue;
|
||||
if (isProviderRegistered && !isProviderRegistered(pid)) continue;
|
||||
|
||||
// Apply number transformation.
|
||||
let num = dialedNumber;
|
||||
if (route.action.stripPrefix && num.startsWith(route.action.stripPrefix)) {
|
||||
num = num.slice(route.action.stripPrefix.length);
|
||||
}
|
||||
if (route.action.prependPrefix) {
|
||||
num = route.action.prependPrefix + num;
|
||||
}
|
||||
|
||||
return { provider, transformedNumber: num };
|
||||
}
|
||||
|
||||
// Route matched but no provider is available — continue to next route.
|
||||
}
|
||||
|
||||
// Fallback: first available provider.
|
||||
const fallback = cfg.providers[0];
|
||||
return fallback ? { provider: fallback, transformedNumber: dialedNumber } : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve which devices/browsers to ring for an inbound call.
|
||||
*
|
||||
* @param cfg - app config
|
||||
* @param providerId - the provider the call is coming from
|
||||
* @param calledNumber - the DID / called number (from Request-URI)
|
||||
* @param callerNumber - the caller ID (from From header)
|
||||
*/
|
||||
export function resolveInboundRoute(
|
||||
cfg: IAppConfig,
|
||||
providerId: string,
|
||||
calledNumber: string,
|
||||
callerNumber: string,
|
||||
): IInboundRouteResult {
|
||||
const routes = cfg.routing.routes
|
||||
.filter((r) => r.enabled && r.match.direction === 'inbound')
|
||||
.sort((a, b) => b.priority - a.priority);
|
||||
|
||||
for (const route of routes) {
|
||||
const m = route.match;
|
||||
|
||||
if (m.sourceProvider && m.sourceProvider !== providerId) continue;
|
||||
if (!matchesPattern(m.numberPattern, calledNumber)) continue;
|
||||
if (!matchesPattern(m.callerPattern, callerNumber)) continue;
|
||||
|
||||
return {
|
||||
deviceIds: route.action.targets || [],
|
||||
ringBrowsers: route.action.ringBrowsers ?? false,
|
||||
voicemailBox: route.action.voicemailBox,
|
||||
ivrMenuId: route.action.ivrMenuId,
|
||||
noAnswerTimeout: route.action.noAnswerTimeout,
|
||||
};
|
||||
}
|
||||
|
||||
// Fallback: ring all devices + browsers.
|
||||
return { deviceIds: [], ringBrowsers: true };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Lookup helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function getProvider(cfg: IAppConfig, id: string): IProviderConfig | null {
|
||||
return cfg.providers.find((p) => p.id === id) ?? null;
|
||||
}
|
||||
|
||||
export function getDevice(cfg: IAppConfig, id: string): IDeviceConfig | null {
|
||||
return cfg.devices.find((d) => d.id === id) ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use resolveOutboundRoute() instead. Kept for backward compat.
|
||||
*/
|
||||
export function getProviderForOutbound(cfg: IAppConfig): IProviderConfig | null {
|
||||
const result = resolveOutboundRoute(cfg, '');
|
||||
return result?.provider ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use resolveInboundRoute() instead. Kept for backward compat.
|
||||
*/
|
||||
export function getDevicesForInbound(cfg: IAppConfig, providerId: string): IDeviceConfig[] {
|
||||
const result = resolveInboundRoute(cfg, providerId, '', '');
|
||||
if (!result.deviceIds.length) return cfg.devices;
|
||||
return result.deviceIds.map((id) => getDevice(cfg, id)).filter(Boolean) as IDeviceConfig[];
|
||||
}
|
||||
// Route resolution, pattern matching, and provider/device lookup
|
||||
// are now handled entirely by the Rust proxy-engine.
|
||||
|
||||
126
ts/frontend.ts
126
ts/frontend.ts
@@ -11,12 +11,39 @@ import path from 'node:path';
|
||||
import http from 'node:http';
|
||||
import https from 'node:https';
|
||||
import { WebSocketServer, WebSocket } from 'ws';
|
||||
import type { CallManager } from './call/index.ts';
|
||||
import { handleWebRtcSignaling } from './webrtcbridge.ts';
|
||||
import type { VoiceboxManager } from './voicebox.ts';
|
||||
|
||||
const CONFIG_PATH = path.join(process.cwd(), '.nogit', 'config.json');
|
||||
|
||||
interface IHandleRequestContext {
|
||||
getStatus: () => unknown;
|
||||
log: (msg: string) => void;
|
||||
onStartCall: (number: string, deviceId?: string, providerId?: string) => { id: string } | null;
|
||||
onHangupCall: (callId: string) => boolean;
|
||||
onConfigSaved?: () => void | Promise<void>;
|
||||
voiceboxManager?: VoiceboxManager;
|
||||
}
|
||||
|
||||
interface IWebUiOptions extends IHandleRequestContext {
|
||||
port: number;
|
||||
onWebRtcOffer?: (sessionId: string, sdp: string, ws: WebSocket) => Promise<void>;
|
||||
onWebRtcIce?: (sessionId: string, candidate: unknown) => Promise<void>;
|
||||
onWebRtcClose?: (sessionId: string) => Promise<void>;
|
||||
onWebRtcAccept?: (callId: string, sessionId: string) => void;
|
||||
}
|
||||
|
||||
interface IWebRtcSocketMessage {
|
||||
type?: string;
|
||||
sessionId?: string;
|
||||
callId?: string;
|
||||
sdp?: string;
|
||||
candidate?: unknown;
|
||||
userAgent?: string;
|
||||
_remoteIp?: string | null;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebSocket broadcast
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -79,14 +106,9 @@ function loadStaticFiles(): void {
|
||||
async function handleRequest(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
getStatus: () => unknown,
|
||||
log: (msg: string) => void,
|
||||
onStartCall: (number: string, deviceId?: string, providerId?: string) => { id: string } | null,
|
||||
onHangupCall: (callId: string) => boolean,
|
||||
onConfigSaved?: () => void,
|
||||
callManager?: CallManager,
|
||||
voiceboxManager?: VoiceboxManager,
|
||||
context: IHandleRequestContext,
|
||||
): Promise<void> {
|
||||
const { getStatus, log, onStartCall, onHangupCall, onConfigSaved, voiceboxManager } = context;
|
||||
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
|
||||
const method = req.method || 'GET';
|
||||
|
||||
@@ -125,14 +147,19 @@ async function handleRequest(
|
||||
}
|
||||
}
|
||||
|
||||
// API: add leg to call.
|
||||
// API: add a SIP device to a call (mid-call INVITE to desk phone).
|
||||
if (url.pathname.startsWith('/api/call/') && url.pathname.endsWith('/addleg') && method === 'POST') {
|
||||
try {
|
||||
const callId = url.pathname.split('/')[3];
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.deviceId) return sendJson(res, { ok: false, error: 'missing deviceId' }, 400);
|
||||
const ok = callManager?.addDeviceToCall(callId, body.deviceId) ?? false;
|
||||
return sendJson(res, { ok });
|
||||
const { addDeviceLeg } = await import('./proxybridge.ts');
|
||||
const legId = await addDeviceLeg(callId, body.deviceId);
|
||||
if (legId) {
|
||||
return sendJson(res, { ok: true, legId });
|
||||
} else {
|
||||
return sendJson(res, { ok: false, error: 'device not registered or call not found' }, 404);
|
||||
}
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -144,8 +171,9 @@ async function handleRequest(
|
||||
const callId = url.pathname.split('/')[3];
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.number) return sendJson(res, { ok: false, error: 'missing number' }, 400);
|
||||
const ok = callManager?.addExternalToCall(callId, body.number, body.providerId) ?? false;
|
||||
return sendJson(res, { ok });
|
||||
const { addLeg: addLegFn } = await import('./proxybridge.ts');
|
||||
const legId = await addLegFn(callId, body.number, body.providerId);
|
||||
return sendJson(res, { ok: !!legId, legId });
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -157,22 +185,22 @@ async function handleRequest(
|
||||
const callId = url.pathname.split('/')[3];
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.legId) return sendJson(res, { ok: false, error: 'missing legId' }, 400);
|
||||
const ok = callManager?.removeLegFromCall(callId, body.legId) ?? false;
|
||||
const { removeLeg: removeLegFn } = await import('./proxybridge.ts');
|
||||
const ok = await removeLegFn(callId, body.legId);
|
||||
return sendJson(res, { ok });
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
}
|
||||
|
||||
// API: transfer leg.
|
||||
// API: transfer leg (not yet implemented).
|
||||
if (url.pathname === '/api/transfer' && method === 'POST') {
|
||||
try {
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.sourceCallId || !body?.legId || !body?.targetCallId) {
|
||||
return sendJson(res, { ok: false, error: 'missing sourceCallId, legId, or targetCallId' }, 400);
|
||||
}
|
||||
const ok = callManager?.transferLeg(body.sourceCallId, body.legId, body.targetCallId) ?? false;
|
||||
return sendJson(res, { ok });
|
||||
return sendJson(res, { ok: false, error: 'not yet implemented' }, 501);
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -238,6 +266,7 @@ async function handleRequest(
|
||||
if (existing && ud.displayName !== undefined) existing.displayName = ud.displayName;
|
||||
}
|
||||
}
|
||||
if (updates.incomingNumbers !== undefined) cfg.incomingNumbers = updates.incomingNumbers;
|
||||
if (updates.routing) {
|
||||
if (updates.routing.routes) {
|
||||
cfg.routing.routes = updates.routing.routes;
|
||||
@@ -249,7 +278,7 @@ async function handleRequest(
|
||||
|
||||
fs.writeFileSync(CONFIG_PATH, JSON.stringify(cfg, null, 2) + '\n');
|
||||
log('[config] updated config.json');
|
||||
onConfigSaved?.();
|
||||
await onConfigSaved?.();
|
||||
return sendJson(res, { ok: true });
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
@@ -330,15 +359,21 @@ async function handleRequest(
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function initWebUi(
|
||||
getStatus: () => unknown,
|
||||
log: (msg: string) => void,
|
||||
onStartCall: (number: string, deviceId?: string, providerId?: string) => { id: string } | null,
|
||||
onHangupCall: (callId: string) => boolean,
|
||||
onConfigSaved?: () => void,
|
||||
callManager?: CallManager,
|
||||
voiceboxManager?: VoiceboxManager,
|
||||
options: IWebUiOptions,
|
||||
): void {
|
||||
const WEB_PORT = 3060;
|
||||
const {
|
||||
port,
|
||||
getStatus,
|
||||
log,
|
||||
onStartCall,
|
||||
onHangupCall,
|
||||
onConfigSaved,
|
||||
voiceboxManager,
|
||||
onWebRtcOffer,
|
||||
onWebRtcIce,
|
||||
onWebRtcClose,
|
||||
onWebRtcAccept,
|
||||
} = options;
|
||||
|
||||
loadStaticFiles();
|
||||
|
||||
@@ -352,12 +387,12 @@ export function initWebUi(
|
||||
const cert = fs.readFileSync(certPath, 'utf8');
|
||||
const key = fs.readFileSync(keyPath, 'utf8');
|
||||
server = https.createServer({ cert, key }, (req, res) =>
|
||||
handleRequest(req, res, getStatus, log, onStartCall, onHangupCall, onConfigSaved, callManager, voiceboxManager).catch(() => { res.writeHead(500); res.end(); }),
|
||||
handleRequest(req, res, { getStatus, log, onStartCall, onHangupCall, onConfigSaved, voiceboxManager }).catch(() => { res.writeHead(500); res.end(); }),
|
||||
);
|
||||
useTls = true;
|
||||
} catch {
|
||||
server = http.createServer((req, res) =>
|
||||
handleRequest(req, res, getStatus, log, onStartCall, onHangupCall, onConfigSaved, callManager, voiceboxManager).catch(() => { res.writeHead(500); res.end(); }),
|
||||
handleRequest(req, res, { getStatus, log, onStartCall, onHangupCall, onConfigSaved, voiceboxManager }).catch(() => { res.writeHead(500); res.end(); }),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -371,21 +406,30 @@ export function initWebUi(
|
||||
|
||||
socket.on('message', (raw) => {
|
||||
try {
|
||||
const msg = JSON.parse(raw.toString());
|
||||
if (msg.type === 'webrtc-accept' && msg.callId) {
|
||||
log(`[webrtc] browser accepted call ${msg.callId} session=${msg.sessionId || 'none'}`);
|
||||
const ok = callManager?.acceptBrowserCall(msg.callId, msg.sessionId) ?? false;
|
||||
log(`[webrtc] acceptBrowserCall result: ${ok}`);
|
||||
} else if (msg.type === 'webrtc-offer' && msg.sessionId) {
|
||||
callManager?.handleWebRtcOffer(msg.sessionId, msg.sdp, socket as any).catch((e: any) =>
|
||||
log(`[webrtc] offer error: ${e.message}`));
|
||||
const msg = JSON.parse(raw.toString()) as IWebRtcSocketMessage;
|
||||
if (msg.type === 'webrtc-offer' && msg.sessionId) {
|
||||
// Forward to Rust proxy-engine for WebRTC handling.
|
||||
if (onWebRtcOffer && typeof msg.sdp === 'string') {
|
||||
log(`[webrtc-ws] offer msg keys: ${Object.keys(msg).join(',')}, sdp type: ${typeof msg.sdp}, sdp len: ${msg.sdp?.length || 0}`);
|
||||
onWebRtcOffer(msg.sessionId, msg.sdp, socket).catch((e: any) =>
|
||||
log(`[webrtc] offer error: ${e.message}`));
|
||||
}
|
||||
} else if (msg.type === 'webrtc-ice' && msg.sessionId) {
|
||||
callManager?.handleWebRtcIce(msg.sessionId, msg.candidate).catch(() => {});
|
||||
if (onWebRtcIce) {
|
||||
onWebRtcIce(msg.sessionId, msg.candidate).catch(() => {});
|
||||
}
|
||||
} else if (msg.type === 'webrtc-hangup' && msg.sessionId) {
|
||||
callManager?.handleWebRtcHangup(msg.sessionId);
|
||||
if (onWebRtcClose) {
|
||||
onWebRtcClose(msg.sessionId).catch(() => {});
|
||||
}
|
||||
} else if (msg.type === 'webrtc-accept' && msg.callId) {
|
||||
log(`[webrtc] accept: call=${msg.callId} session=${msg.sessionId || 'none'}`);
|
||||
if (onWebRtcAccept && msg.sessionId) {
|
||||
onWebRtcAccept(msg.callId, msg.sessionId);
|
||||
}
|
||||
} else if (msg.type?.startsWith('webrtc-')) {
|
||||
msg._remoteIp = remoteIp;
|
||||
handleWebRtcSignaling(socket as any, msg);
|
||||
handleWebRtcSignaling(socket, msg);
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
});
|
||||
@@ -394,8 +438,8 @@ export function initWebUi(
|
||||
socket.on('error', () => wsClients.delete(socket));
|
||||
});
|
||||
|
||||
server.listen(WEB_PORT, '0.0.0.0', () => {
|
||||
log(`web ui listening on ${useTls ? 'https' : 'http'}://0.0.0.0:${WEB_PORT}`);
|
||||
server.listen(port, '0.0.0.0', () => {
|
||||
log(`web ui listening on ${useTls ? 'https' : 'http'}://0.0.0.0:${port}`);
|
||||
});
|
||||
|
||||
setInterval(() => broadcastWs('status', getStatus()), 1000);
|
||||
|
||||
209
ts/ivr.ts
209
ts/ivr.ts
@@ -1,209 +0,0 @@
|
||||
/**
|
||||
* IVR engine — state machine that navigates callers through menus
|
||||
* based on DTMF digit input.
|
||||
*
|
||||
* The IvrEngine is instantiated per-call and drives a SystemLeg:
|
||||
* - Plays menu prompts via the SystemLeg's prompt playback
|
||||
* - Receives DTMF digits and resolves them to actions
|
||||
* - Fires an onAction callback for the CallManager to execute
|
||||
* (route to extension, voicemail, transfer, etc.)
|
||||
*/
|
||||
|
||||
import type { IIvrConfig, IIvrMenu, TIvrAction } from './config.ts';
|
||||
import type { SystemLeg } from './call/system-leg.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// IVR Engine
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class IvrEngine {
|
||||
private config: IIvrConfig;
|
||||
private systemLeg: SystemLeg;
|
||||
private onAction: (action: TIvrAction) => void;
|
||||
private log: (msg: string) => void;
|
||||
|
||||
/** The currently active menu. */
|
||||
private currentMenu: IIvrMenu | null = null;
|
||||
|
||||
/** How many times the current menu has been replayed (for retry limit). */
|
||||
private retryCount = 0;
|
||||
|
||||
/** Timer for digit input timeout. */
|
||||
private digitTimeout: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
/** Whether the engine is waiting for a digit (prompt finished playing). */
|
||||
private waitingForDigit = false;
|
||||
|
||||
/** Whether the engine has been destroyed. */
|
||||
private destroyed = false;
|
||||
|
||||
constructor(
|
||||
config: IIvrConfig,
|
||||
systemLeg: SystemLeg,
|
||||
onAction: (action: TIvrAction) => void,
|
||||
log: (msg: string) => void,
|
||||
) {
|
||||
this.config = config;
|
||||
this.systemLeg = systemLeg;
|
||||
this.onAction = onAction;
|
||||
this.log = log;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Public API
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Start the IVR — navigates to the entry menu and plays its prompt.
|
||||
*/
|
||||
start(): void {
|
||||
const entryMenu = this.getMenu(this.config.entryMenuId);
|
||||
if (!entryMenu) {
|
||||
this.log(`[ivr] entry menu "${this.config.entryMenuId}" not found — hanging up`);
|
||||
this.onAction({ type: 'hangup' });
|
||||
return;
|
||||
}
|
||||
|
||||
this.navigateToMenu(entryMenu);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a DTMF digit from the caller.
|
||||
*/
|
||||
handleDigit(digit: string): void {
|
||||
if (this.destroyed || !this.currentMenu) return;
|
||||
|
||||
// Clear the timeout — caller pressed something.
|
||||
this.clearDigitTimeout();
|
||||
|
||||
// Cancel any playing prompt (caller interrupted it).
|
||||
this.systemLeg.cancelPrompt();
|
||||
this.waitingForDigit = false;
|
||||
|
||||
this.log(`[ivr] digit '${digit}' in menu "${this.currentMenu.id}"`);
|
||||
|
||||
// Look up the digit in the current menu.
|
||||
const entry = this.currentMenu.entries.find((e) => e.digit === digit);
|
||||
if (entry) {
|
||||
this.executeAction(entry.action);
|
||||
} else {
|
||||
this.log(`[ivr] invalid digit '${digit}' in menu "${this.currentMenu.id}"`);
|
||||
this.executeAction(this.currentMenu.invalidAction);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up timers and state.
|
||||
*/
|
||||
destroy(): void {
|
||||
this.destroyed = true;
|
||||
this.clearDigitTimeout();
|
||||
this.currentMenu = null;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Internal
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Navigate to a menu: play its prompt, then wait for digit. */
|
||||
private navigateToMenu(menu: IIvrMenu): void {
|
||||
if (this.destroyed) return;
|
||||
|
||||
this.currentMenu = menu;
|
||||
this.waitingForDigit = false;
|
||||
this.clearDigitTimeout();
|
||||
|
||||
const promptId = `ivr-menu-${menu.id}`;
|
||||
this.log(`[ivr] playing menu "${menu.id}" prompt`);
|
||||
|
||||
this.systemLeg.playPrompt(promptId, () => {
|
||||
if (this.destroyed) return;
|
||||
// Prompt finished — start digit timeout.
|
||||
this.waitingForDigit = true;
|
||||
this.startDigitTimeout();
|
||||
});
|
||||
}
|
||||
|
||||
/** Start the timeout timer for digit input. */
|
||||
private startDigitTimeout(): void {
|
||||
const timeoutSec = this.currentMenu?.timeoutSec ?? 5;
|
||||
|
||||
this.digitTimeout = setTimeout(() => {
|
||||
if (this.destroyed || !this.currentMenu) return;
|
||||
this.log(`[ivr] digit timeout in menu "${this.currentMenu.id}"`);
|
||||
this.handleTimeout();
|
||||
}, timeoutSec * 1000);
|
||||
}
|
||||
|
||||
/** Handle timeout (no digit pressed). */
|
||||
private handleTimeout(): void {
|
||||
if (!this.currentMenu) return;
|
||||
|
||||
this.retryCount++;
|
||||
const maxRetries = this.currentMenu.maxRetries ?? 3;
|
||||
|
||||
if (this.retryCount >= maxRetries) {
|
||||
this.log(`[ivr] max retries (${maxRetries}) reached in menu "${this.currentMenu.id}"`);
|
||||
this.executeAction(this.currentMenu.timeoutAction);
|
||||
} else {
|
||||
this.log(`[ivr] retry ${this.retryCount}/${maxRetries} in menu "${this.currentMenu.id}"`);
|
||||
// Replay the current menu.
|
||||
this.navigateToMenu(this.currentMenu);
|
||||
}
|
||||
}
|
||||
|
||||
/** Execute an IVR action. */
|
||||
private executeAction(action: TIvrAction): void {
|
||||
if (this.destroyed) return;
|
||||
|
||||
switch (action.type) {
|
||||
case 'submenu': {
|
||||
const submenu = this.getMenu(action.menuId);
|
||||
if (submenu) {
|
||||
this.retryCount = 0;
|
||||
this.navigateToMenu(submenu);
|
||||
} else {
|
||||
this.log(`[ivr] submenu "${action.menuId}" not found — hanging up`);
|
||||
this.onAction({ type: 'hangup' });
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'repeat': {
|
||||
if (this.currentMenu) {
|
||||
this.navigateToMenu(this.currentMenu);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'play-message': {
|
||||
// Play a message prompt, then return to the current menu.
|
||||
this.systemLeg.playPrompt(action.promptId, () => {
|
||||
if (this.destroyed || !this.currentMenu) return;
|
||||
this.navigateToMenu(this.currentMenu);
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
// All other actions (route-extension, route-voicemail, transfer, hangup)
|
||||
// are handled by the CallManager via the onAction callback.
|
||||
this.log(`[ivr] action: ${action.type}`);
|
||||
this.onAction(action);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/** Look up a menu by ID. */
|
||||
private getMenu(menuId: string): IIvrMenu | null {
|
||||
return this.config.menus.find((m) => m.id === menuId) ?? null;
|
||||
}
|
||||
|
||||
/** Clear the digit timeout timer. */
|
||||
private clearDigitTimeout(): void {
|
||||
if (this.digitTimeout) {
|
||||
clearTimeout(this.digitTimeout);
|
||||
this.digitTimeout = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user