Compare commits
53 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c18f2f7ca1 | |||
| 3e2fee16c1 | |||
| 04e706715f | |||
| 980a1500f5 | |||
| 33b4ae5dd0 | |||
| d2c18a4ebb | |||
| 3c010a3b1b | |||
| 88768f0586 | |||
| 0d82a626b5 | |||
| 30d056f376 | |||
| 89ae12318e | |||
| feb3514de4 | |||
| adfc4726fd | |||
| 06c86d7e81 | |||
| cff70ab179 | |||
| 51f7560730 | |||
| 5a280c5c41 | |||
| 59d8c2557c | |||
| cfadd7a2b6 | |||
| 80f710f6d8 | |||
| 9ea57cd659 | |||
| c40c726dc3 | |||
| 37ba7501fa | |||
| 24924a1aea | |||
| 7ed76a9488 | |||
| a9fdfe5733 | |||
| 6fcdf4291a | |||
| 81441e7853 | |||
| 21ffc1d017 | |||
| 2f16c5efae | |||
| 254d7f3633 | |||
| 67537664df | |||
| 54129dcdae | |||
| 8c6556dae3 | |||
| 291beb1da4 | |||
| 79147f1e40 | |||
| c3a63a4092 | |||
| 7c4756402e | |||
| b6950e11d2 | |||
| e4935fbf21 | |||
| f543ff1568 | |||
| c63a759689 | |||
| a02146633b | |||
| f78639dd19 | |||
| 2aca5f1510 | |||
| 73b28f5f57 | |||
| 10ad432a4c | |||
| 66112091a2 | |||
| c9ae747c95 | |||
| 45f9b9c15c | |||
| 7d59361352 | |||
| 6a130db7c7 | |||
| 93f671f1f9 |
@@ -0,0 +1,16 @@
|
||||
node_modules/
|
||||
.nogit/
|
||||
nogit/
|
||||
.git/
|
||||
.playwright-mcp/
|
||||
.vscode/
|
||||
test/
|
||||
dist_rust/
|
||||
dist_ts_web/
|
||||
rust/target/
|
||||
sip_trace.log
|
||||
sip_trace_*.log
|
||||
proxy.out
|
||||
proxy_v2.out
|
||||
*.pid
|
||||
.server.pid
|
||||
@@ -0,0 +1,32 @@
|
||||
name: Docker (tags)
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
env:
|
||||
IMAGE: code.foss.global/host.today/ht-docker-node:dbase_dind
|
||||
NPMCI_LOGIN_DOCKER_GITEA: ${{ github.server_url }}|${{ gitea.repository_owner }}|${{ secrets.GITEA_TOKEN }}
|
||||
NPMCI_LOGIN_DOCKER_DOCKERREGISTRY: ${{ secrets.NPMCI_LOGIN_DOCKER_DOCKERREGISTRY }}
|
||||
|
||||
jobs:
|
||||
release:
|
||||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ env.IMAGE }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
pnpm install -g pnpm
|
||||
pnpm install -g @git.zone/tsdocker
|
||||
|
||||
- name: Release
|
||||
run: |
|
||||
tsdocker login
|
||||
tsdocker build
|
||||
tsdocker push
|
||||
@@ -8,5 +8,30 @@
|
||||
"production": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"@git.zone/tsrust": {
|
||||
"targets": ["linux_amd64", "linux_arm64"]
|
||||
},
|
||||
"@git.zone/tsdocker": {
|
||||
"registries": ["code.foss.global"],
|
||||
"registryRepoMap": {
|
||||
"code.foss.global": "serve.zone/siprouter",
|
||||
"dockerregistry.lossless.digital": "serve.zone/siprouter"
|
||||
},
|
||||
"platforms": ["linux/amd64", "linux/arm64"]
|
||||
},
|
||||
"@git.zone/cli": {
|
||||
"release": {
|
||||
"targets": {
|
||||
"git": {
|
||||
"enabled": true,
|
||||
"remote": "origin"
|
||||
},
|
||||
"docker": {
|
||||
"enabled": true,
|
||||
"engine": "tsdocker"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,41 +1,103 @@
|
||||
# Project Notes
|
||||
|
||||
## Architecture: Hub Model (Call as Centerpiece)
|
||||
## Architecture: Hub Model in Rust (Call as Centerpiece)
|
||||
|
||||
All call logic lives in `ts/call/`. The Call is the central entity with N legs.
|
||||
The call hub lives in the Rust proxy-engine (`rust/crates/proxy-engine/`). TypeScript is the **control plane only** — it configures the engine, sends high-level commands (`hangup`, `make_call`, `webrtc_offer`, etc.), and receives events (`incoming_call`, `call_answered`, `device_registered`, `webrtc_audio_rx`, …). No raw SIP/RTP ever touches TypeScript.
|
||||
|
||||
### Key Files
|
||||
- `ts/call/call-manager.ts` — singleton registry, factory methods, SIP routing
|
||||
- `ts/call/call.ts` — the hub: owns legs, media forwarding
|
||||
- `ts/call/sip-leg.ts` — SIP device/provider connection (wraps SipDialog)
|
||||
- `ts/call/webrtc-leg.ts` — browser WebRTC connection (wraps werift PeerConnection)
|
||||
- `ts/call/rtp-port-pool.ts` — unified RTP port pool
|
||||
- `ts/sipproxy.ts` — thin bootstrap wiring everything together
|
||||
- `ts/webrtcbridge.ts` — browser device registration (signaling only)
|
||||
The `Call` is still the central entity: it owns N legs and a central mixer task that provides mix-minus audio to all participants. Legs can be `SipProvider`, `SipDevice`, `WebRtc`, or `Tool` (recording/transcription observer).
|
||||
|
||||
### WebRTC Browser Call Flow (Critical)
|
||||
### Key Rust files (`rust/crates/proxy-engine/src/`)
|
||||
|
||||
The browser call flow has a specific signaling order that MUST be followed:
|
||||
- `call_manager.rs` — singleton registry, call factory methods, SIP routing (inbound/outbound/passthrough), B2BUA state machine, inbound route resolution
|
||||
- `call.rs` — the `Call` hub + `LegInfo` struct, owns legs and the mixer task
|
||||
- `sip_leg.rs` — full SIP dialog management for B2BUA legs (INVITE, 407 auth retry, BYE, CANCEL, early media)
|
||||
- `rtp.rs` — RTP port pool (uses `Weak<UdpSocket>` so calls auto-release ports on drop) + RTP header helpers
|
||||
- `mixer.rs` — 20 ms-tick mix-minus engine (48 kHz f32 internal, per-leg transcoding via `codec-lib`, per-leg denoising)
|
||||
- `jitter_buffer.rs` — per-leg reordering/packet-loss compensation
|
||||
- `leg_io.rs` — spawns inbound/outbound RTP I/O tasks per SIP leg
|
||||
- `webrtc_engine.rs` — browser WebRTC sessions (werift-rs based), ICE/DTLS/SRTP
|
||||
- `provider.rs` — SIP trunk registrations, public-IP detection via Via `received=`
|
||||
- `registrar.rs` — accepts REGISTER from SIP phones, tracks contacts (push-based device status)
|
||||
- `config.rs` — `AppConfig` deserialized from TS, route resolvers (`resolve_outbound_route`, `resolve_inbound_route`)
|
||||
- `main.rs` — IPC command dispatcher (`handle_command`), event emitter, top-level SIP packet router
|
||||
- `sip_transport.rs` — owning wrapper around the main SIP UDP socket
|
||||
- `voicemail.rs` / `recorder.rs` / `audio_player.rs` / `tts.rs` — media subsystems
|
||||
- `tool_leg.rs` — per-source observer audio for recording/transcription tools
|
||||
- `ipc.rs` — event-emission helper used throughout
|
||||
|
||||
1. `POST /api/call` with browser deviceId → CallManager creates Call, saves pending state, notifies browser via `webrtc-incoming`
|
||||
2. Browser sends `webrtc-offer` (with its own `sessionId`) → CallManager creates a **standalone** WebRtcLeg (NOT attached to any call yet)
|
||||
3. Browser sends `webrtc-accept` (with `callId` + `sessionId`) → CallManager links the standalone WebRtcLeg to the Call, then starts the SIP provider leg
|
||||
### Key TS files (control plane)
|
||||
|
||||
**The WebRtcLeg CANNOT be created at call creation time** because the browser's session ID is unknown until the `webrtc-offer` arrives.
|
||||
- `ts/sipproxy.ts` — entrypoint, wires the proxy engine bridge + web UI + WebRTC signaling
|
||||
- `ts/proxybridge.ts` — `@push.rocks/smartrust` bridge to the Rust binary, typed `TProxyCommands` map
|
||||
- `ts/config.ts` — JSON config loader (`IAppConfig`, `IProviderConfig`, etc.), sent to Rust via `configure`
|
||||
- `ts/voicebox.ts` — voicemail metadata persistence (WAV files live in `.nogit/voicemail/{boxId}/`)
|
||||
- `ts/webrtcbridge.ts` — browser WebSocket signaling, browser device registry (`deviceIdToWs`)
|
||||
- `ts/call/prompt-cache.ts` — the only remaining file under `ts/call/` (IVR prompt caching)
|
||||
|
||||
### WebRTC Audio Return Channel (Critical)
|
||||
### Rust SIP protocol library
|
||||
|
||||
The SIP→browser audio path works through the Call hub:
|
||||
`rust/crates/sip-proto/` is a zero-dependency SIP data library (parse/build/mutate/serialize messages, dialog management, SDP helpers, digest auth). Do not add transport or timer logic there — it's purely data-level.
|
||||
|
||||
1. Provider sends RTP to SipLeg's socket
|
||||
2. SipLeg's `onRtpReceived` fires → Call hub's `forwardRtp`
|
||||
3. Call hub calls `webrtcLeg.sendRtp(data)` → which calls `forwardToBrowser()`
|
||||
4. `forwardToBrowser` transcodes (G.722→Opus) and sends via `sender.sendRtp()` (WebRTC PeerConnection)
|
||||
## Event-push architecture for device status
|
||||
|
||||
**`WebRtcLeg.sendRtp()` MUST feed into `forwardToBrowser()`** (the WebRTC PeerConnection path), NOT send to a UDP address. This was a bug that caused one-way audio.
|
||||
Device status flows **via push events**, not pull-based IPC queries:
|
||||
|
||||
The browser→SIP direction works independently: `ontrack.onReceiveRtp` → `forwardToSip()` → transcodes → sends directly to provider's media endpoint via UDP.
|
||||
1. Rust emits `device_registered` when a phone REGISTERs
|
||||
2. TS `sipproxy.ts` maintains a `deviceStatuses` Map, updated from the event
|
||||
3. Map snapshot goes into the WebSocket `status` broadcast
|
||||
4. Web UI (`ts_web/elements/sipproxy-devices.ts`) reads it from the push stream
|
||||
|
||||
### SIP Protocol Library
|
||||
There used to be a `get_status` pull IPC for this, but it was never called from TS and has been removed. If a new dashboard ever needs a pull-based snapshot, the push Map is the right source to read from.
|
||||
|
||||
`ts/sip/` is a zero-dependency SIP protocol library. Do not add transport or timer logic there — it's purely data-level (parse/build/mutate/serialize).
|
||||
## Inbound routing (wired in Commit 4 of the cleanup PR)
|
||||
|
||||
Inbound route resolution goes through `config.resolve_inbound_route(provider_id, called_number, caller_number)` inside `create_inbound_call` (call_manager.rs). The result carries a `ring_browsers` flag that propagates to the `incoming_call` event; `ts/sipproxy.ts` gates the `webrtc-incoming` browser fan-out behind that flag.
|
||||
|
||||
**Known limitations / TODOs** (documented in code at `create_inbound_call`):
|
||||
- Multi-target inbound fork is not yet implemented — only the first registered device from `route.device_ids` is rung.
|
||||
- `ring_browsers` is **informational only**: browsers see a toast but do not race the SIP device to answer. True first-to-answer-wins requires a multi-leg fork + per-leg CANCEL, which is not built yet.
|
||||
- `voicemail_box`, `ivr_menu_id`, `no_answer_timeout` are resolved but not yet honored downstream.
|
||||
|
||||
## WebRTC Browser Call Flow (Critical)
|
||||
|
||||
The browser call signaling order is strict:
|
||||
|
||||
1. Browser initiates outbound via a TS API (e.g. `POST /api/call`) — TS creates a pending call in the Rust engine via `make_call` and notifies the browser with a `webrtc-incoming` push.
|
||||
2. Browser sends `webrtc-offer` (with its own `sessionId`) → Rust `handle_webrtc_offer` creates a **standalone** WebRTC session (NOT attached to any call yet).
|
||||
3. Browser sends `webrtc_link` (with `callId` + `sessionId`) → Rust links the standalone session to the Call and wires the WebRTC leg through the mixer.
|
||||
|
||||
**The WebRTC leg cannot be fully attached at call-creation time** because the browser's session ID is unknown until the `webrtc-offer` arrives.
|
||||
|
||||
### WebRTC audio return channel (Critical)
|
||||
|
||||
The SIP→browser audio path goes through the mixer, not a direct RTP relay:
|
||||
|
||||
1. Provider sends RTP → received on the provider leg's UDP socket (`leg_io::spawn_sip_inbound`)
|
||||
2. Packet flows through `jitter_buffer` → mixer's inbound mpsc channel
|
||||
3. Mixer decodes/resamples/denoises, computes mix-minus per leg
|
||||
4. WebRTC leg receives its mix-minus frame, encodes to Opus, and pushes via the WebRTC engine's peer connection sender
|
||||
|
||||
Browser→SIP works symmetrically: `ontrack.onReceiveRtp` → WebRTC leg's outbound mpsc → mixer → other legs' inbound channels.
|
||||
|
||||
## SDP/Record-Route NAT (fixed in Commit 3 of the cleanup PR)
|
||||
|
||||
The proxy tracks a `public_ip: Option<String>` on every `LegInfo` (populated from provider-leg construction sites). When `route_passthrough_message` rewrites SDP (`c=` line) or emits a `Record-Route`, it picks `advertise_ip` based on the destination leg's kind:
|
||||
|
||||
- `SipProvider` → `other.public_ip.unwrap_or(lan_ip)` (provider reaches us via public IP)
|
||||
- `SipDevice` / `WebRtc` / `Tool` / `Media` → `lan_ip` (everything else is LAN or proxy-internal)
|
||||
|
||||
This fixed a real NAT-traversal bug where the proxy advertised its RFC1918 LAN IP to the provider in SDP, causing one-way or no audio for device-originated inbound traffic behind NAT.
|
||||
|
||||
## Build & development
|
||||
|
||||
- **Build:** `pnpm run buildRust` (never `cargo build` directly — tsrust cross-compiles for both `x86_64-unknown-linux-gnu` and `aarch64-unknown-linux-gnu`)
|
||||
- **Cross-compile setup:** the aarch64 target requires `gcc-aarch64-linux-gnu` + `libstdc++6-arm64-cross` (Debian/Ubuntu). See `rust/.cargo/config.toml` for the linker wiring. A committed symlink at `rust/.cargo/crosslibs/aarch64/libstdc++.so` → `/usr/aarch64-linux-gnu/lib/libstdc++.so.6` avoids needing the `libstdc++-13-dev-arm64-cross` package.
|
||||
- **Bundle web UI:** `pnpm run bundle` (esbuild, output: `dist_ts_web/bundle.js`)
|
||||
- **Full build:** `pnpm run build` (= `buildRust && bundle`)
|
||||
- **Start server:** `pnpm run start` (runs `tsx ts/sipproxy.ts`)
|
||||
|
||||
## Persistent files
|
||||
|
||||
- `.nogit/config.json` — app config (providers, devices, routes, voiceboxes, IVR menus)
|
||||
- `.nogit/voicemail/{boxId}/` — voicemail WAV files + `messages.json` index
|
||||
- `.nogit/prompts/` — cached TTS prompts for IVR menus
|
||||
|
||||
+74
@@ -0,0 +1,74 @@
|
||||
# gitzone dockerfile_service
|
||||
## STAGE 1 // BUILD
|
||||
FROM code.foss.global/host.today/ht-docker-node:lts AS build
|
||||
|
||||
# System build tools that the Rust dep tree needs beyond the base image:
|
||||
# - cmake : used by the `cmake` crate (transitive via ort_sys / a webrtc
|
||||
# sub-crate) to build a C/C++ library from source when a
|
||||
# prebuilt-binary download path doesn't apply.
|
||||
# - pkg-config : used by audiopus_sys and other *-sys crates to locate libs
|
||||
# on the native target (safe no-op if they vendor their own).
|
||||
# These are normally pre-installed on dev machines but not in ht-docker-node:lts.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cmake \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# buildx sets TARGETARCH automatically for each platform it's building:
|
||||
# linux/amd64 -> TARGETARCH=amd64
|
||||
# linux/arm64 -> TARGETARCH=arm64
|
||||
# We use it to tell tsrust to build ONLY the current container's arch. This
|
||||
# overrides the `@git.zone/tsrust.targets` list in .smartconfig.json, which is
|
||||
# right for local dev / CI (where you want both binaries) but wrong for per-
|
||||
# platform Docker stages (each stage would then also try to cross-compile to
|
||||
# the OTHER arch — which fails in the arm64 stage because no reverse cross-
|
||||
# toolchain is installed).
|
||||
#
|
||||
# With --target set, tsrust builds a single target natively within whichever
|
||||
# platform this stage is running under (native on amd64, QEMU-emulated on arm64).
|
||||
ARG TARGETARCH
|
||||
|
||||
COPY ./ /app
|
||||
WORKDIR /app
|
||||
RUN pnpm config set store-dir .pnpm-store
|
||||
RUN rm -rf node_modules && pnpm install
|
||||
|
||||
# tsrust --target takes precedence over .smartconfig.json's targets array.
|
||||
# Writes dist_rust/proxy-engine_linux_amd64 or dist_rust/proxy-engine_linux_arm64.
|
||||
# The TS layer (ts/proxybridge.ts buildLocalPaths) picks the right one at runtime
|
||||
# via process.arch.
|
||||
RUN pnpm exec tsrust --target linux_${TARGETARCH}
|
||||
|
||||
# Web bundle (esbuild — pure JS, uses the platform's native esbuild binary
|
||||
# installed by pnpm above, so no cross-bundling concerns).
|
||||
RUN pnpm run bundle
|
||||
|
||||
# Drop pnpm store to keep the image smaller. node_modules stays because the
|
||||
# runtime entrypoint is tsx and siprouter has no separate dist_ts/ to run from.
|
||||
RUN rm -rf .pnpm-store
|
||||
|
||||
## STAGE 2 // PRODUCTION
|
||||
FROM code.foss.global/host.today/ht-docker-node:alpine-node AS production
|
||||
|
||||
# gcompat + libstdc++ let the glibc-linked proxy-engine binary run on Alpine.
|
||||
RUN apk add --no-cache gcompat libstdc++
|
||||
|
||||
WORKDIR /app
|
||||
COPY --from=build /app /app
|
||||
|
||||
ENV SIPROUTER_MODE=OCI_CONTAINER
|
||||
ENV NODE_ENV=production
|
||||
|
||||
LABEL org.opencontainers.image.title="siprouter" \
|
||||
org.opencontainers.image.description="SIP proxy with Rust data plane and WebRTC bridge" \
|
||||
org.opencontainers.image.source="https://code.foss.global/serve.zone/siprouter"
|
||||
|
||||
# 5070 SIP signaling (UDP+TCP)
|
||||
# 5061 SIP-TLS (optional, UDP+TCP)
|
||||
# 3060 Web UI / WebSocket (HTTP or HTTPS, auto-detected from .nogit/cert.pem)
|
||||
# 20000-20200/udp RTP media range (must match config.proxy.rtpPortRange)
|
||||
EXPOSE 5070/udp 5070/tcp 5061/udp 5061/tcp 3060/tcp 20000-20200/udp
|
||||
|
||||
# exec replaces sh as PID 1 with tsx, so SIGINT/SIGTERM reach Node and
|
||||
# ts/sipproxy.ts' shutdown handler (which calls shutdownProxyEngine) runs cleanly.
|
||||
CMD ["sh", "-c", "exec ./node_modules/.bin/tsx ts/sipproxy.ts"]
|
||||
+198
-1
@@ -1,5 +1,202 @@
|
||||
# Changelog
|
||||
|
||||
## Pending
|
||||
|
||||
|
||||
## 2026-05-21 - 1.27.0
|
||||
|
||||
### Features
|
||||
|
||||
- persist siprouter config and media through SmartData and SmartBucket (storage)
|
||||
- store runtime config, voicemail metadata, fax jobs, and fax inbox metadata in SmartData
|
||||
- store voicemail audio, custom greetings, and fax payloads in SmartBucket while keeping local cache paths for Rust media access
|
||||
- migrate legacy local voicemail and fax metadata/media into SmartData and SmartBucket on startup
|
||||
- enable gitzone Docker release publishing through the configured tsdocker target
|
||||
|
||||
## 2026-04-20 - 1.26.0 - feat(fax)
|
||||
add fax routing, job tracking, inbox management, and T.38/UDPTL media support
|
||||
|
||||
- adds outbound fax origination through the proxy engine with provider codec validation and a new send_fax command
|
||||
- introduces fax box configuration, inbox storage, and dashboard/API endpoints for listing, downloading, and deleting received fax messages
|
||||
- tracks fax lifecycle events and persisted fax jobs in the runtime layer
|
||||
- extends SIP SDP parsing and rewriting to support non-audio media, including T.38 over UDPTL
|
||||
- records leg media protocol details and bridge state to distinguish RTP, WebRTC, internal, and fax media paths
|
||||
|
||||
## 2026-04-14 - 1.25.2 - fix(proxy-engine)
|
||||
improve inbound SIP routing diagnostics and enrich leg media state reporting
|
||||
|
||||
- Extract inbound called numbers from DID-related SIP headers when the request URI contains a provider account username.
|
||||
- Emit detailed sip_unhandled diagnostics for inbound route misses, missing devices, and RTP allocation failures.
|
||||
- Include codec, RTP port, remote media, and metadata in leg state change events and preserve those fields in runtime status/history views.
|
||||
- Match hostname-based providers against resolved inbound source IPs to accept provider traffic sent from resolved addresses.
|
||||
- Invalidate cached TTS WAV metadata across engine restarts and vendor the kokoro-tts crate via a local patch.
|
||||
|
||||
## 2026-04-14 - 1.25.1 - fix(proxy-engine)
|
||||
respect explicit inbound route targets and store voicemail in the configured mailbox
|
||||
|
||||
- Prevent inbound routes with an explicit empty target list from ringing arbitrary registered devices by distinguishing omitted targets from empty targets.
|
||||
- Route unrouted or no-target inbound calls to voicemail with a generated unrouted greeting instead of falling back to random devices.
|
||||
- Pass voicemail box identifiers through proxy events and runtime handling so recordings are saved and indexed under the correct mailbox instead of always using default.
|
||||
|
||||
## 2026-04-14 - 1.25.0 - feat(proxy-engine)
|
||||
add live TTS streaming interactions and incoming number range support
|
||||
|
||||
- add a new start_tts_interaction command and bridge API to begin IVR or leg interactions before full TTS rendering completes
|
||||
- stream synthesized TTS chunks into the mixer with cancellation handling so prompts can stop cleanly on digit match, leg removal, or shutdown
|
||||
- extract PCM-to-mixer frame conversion for reusable live prompt processing
|
||||
- extend routing pattern matching to support numeric number ranges like start..end, including + prefixed values
|
||||
- add incomingNumbers config typing and frontend config update support for single, range, and regex number modes
|
||||
|
||||
## 2026-04-14 - 1.24.0 - feat(routing)
|
||||
require explicit inbound DID routes and normalize SIP identities for provider-based number matching
|
||||
|
||||
- Inbound route resolution now returns no match unless a configured inbound route explicitly matches the provider and called number.
|
||||
- Normalized routing identities were added for SIP/TEL URIs so inbound DIDs and outbound dialed numbers match consistently across provider-specific formats.
|
||||
- Call handling and incoming call events now use normalized numbers, improving routing accuracy for shared trunk providers.
|
||||
- Route configuration docs and the web route editor were updated to support explicit inbound DID ownership, voicemail fallback, and IVR selection.
|
||||
- Mixer RTP handling was enhanced to better support variable packet durations, timestamp-based gap fill, and non-blocking output drop reporting.
|
||||
|
||||
## 2026-04-14 - 1.23.0 - feat(runtime)
|
||||
refactor runtime state and proxy event handling for typed WebRTC linking and shared status models
|
||||
|
||||
- extract proxy event handling into dedicated runtime modules for status tracking and WebRTC session-to-call linking
|
||||
- introduce shared typed proxy event and status interfaces used by both backend and web UI
|
||||
- update web UI server initialization to use structured options and await async config save hooks
|
||||
- simplify browser signaling by routing WebRTC offer/ICE handling through frontend-to-Rust integration
|
||||
- align device status rendering with the new address/port fields in dashboard views
|
||||
|
||||
## 2026-04-12 - 1.22.0 - feat(proxy-engine)
|
||||
add on-demand TTS caching for voicemail and IVR prompts
|
||||
|
||||
- Route inbound calls directly to configured IVR menus and track them with a dedicated IVR call state
|
||||
- Generate voicemail greetings and IVR menu prompts inside the Rust proxy engine on demand instead of precomputing prompts in TypeScript
|
||||
- Add cacheable TTS output with sidecar metadata and enable Kokoro CMUdict support for improved prompt generation
|
||||
- Extend proxy configuration to include voiceboxes and IVR menus, and update documentation to reflect Kokoro-only prompt generation
|
||||
|
||||
## 2026-04-11 - 1.21.0 - feat(providers)
|
||||
replace provider creation modal with a guided multi-step setup flow
|
||||
|
||||
- Adds a stepper-based provider creation flow with provider type selection, connection, credentials, advanced settings, and review steps.
|
||||
- Applies built-in templates for Sipgate and O2/Alice from the selected provider type instead of separate add actions.
|
||||
- Adds a final review step with generated provider ID preview and duplicate ID collision handling before saving.
|
||||
|
||||
## 2026-04-11 - 1.20.5 - fix(readme)
|
||||
improve architecture and call flow documentation with Mermaid diagrams
|
||||
|
||||
- Replace ASCII architecture and audio pipeline diagrams with Mermaid diagrams for better readability
|
||||
- Document the WebRTC browser call setup sequence, including offer handling and session-to-call linking
|
||||
|
||||
## 2026-04-11 - 1.20.4 - fix(deps)
|
||||
bump @design.estate/dees-catalog to ^3.71.1
|
||||
|
||||
- Updates the @design.estate/dees-catalog dependency from ^3.70.0 to ^3.71.1 in package.json.
|
||||
|
||||
## 2026-04-11 - 1.20.3 - fix(ts-config,proxybridge,voicebox)
|
||||
align voicebox config types and add missing proxy bridge command definitions
|
||||
|
||||
- Reuses the canonical IVoiceboxConfig type from voicebox.ts in config.ts to eliminate duplicated type definitions and optionality mismatches.
|
||||
- Makes voicemail timing and limits optional in voicebox config so defaults can be applied consistently during initialization.
|
||||
- Adds VoiceboxManager.addMessage and updates recording handling to use it directly for persisted voicemail metadata.
|
||||
- Extends proxy bridge command typings with add_leg, remove_leg, and WebRTC signaling commands, and tightens sendCommand typing.
|
||||
|
||||
## 2026-04-11 - 1.20.2 - fix(proxy-engine)
|
||||
fix inbound route browser ringing and provider-facing SDP advertisement while preventing RTP port exhaustion
|
||||
|
||||
- Honor inbound routing `ringBrowsers` when emitting incoming call events so browser toast notifications can be suppressed per route.
|
||||
- Rewrite SDP and Record-Route using the destination leg's routable address, using `public_ip` for provider legs and LAN IP for device and internal legs.
|
||||
- Store provider leg public IP metadata on legs to support correct per-destination SIP message rewriting.
|
||||
- Change the RTP port pool to track sockets with `Weak<UdpSocket>` so ports are reclaimed automatically after calls end, avoiding leaked allocations and eventual 503 failures on new calls.
|
||||
- Remove unused dashboard/status, DTMF, relay, and transport helper code paths as part of engine cleanup.
|
||||
|
||||
## 2026-04-11 - 1.20.1 - fix(docker)
|
||||
install required native build tools for Rust dependencies in the build image
|
||||
|
||||
- Add cmake and pkg-config to the Docker build stage so Rust native dependencies can compile successfully in the container
|
||||
- Document why these tools are needed for transitive Rust crates that build or detect native libraries
|
||||
|
||||
## 2026-04-11 - 1.20.0 - feat(docker)
|
||||
add multi-arch Docker build and tagged release pipeline
|
||||
|
||||
- Add a production Dockerfile for building and running the SIP router with the Rust proxy engine and web bundle
|
||||
- Configure tsdocker and tsrust for linux/amd64 and linux/arm64 image builds and registry mapping
|
||||
- Add a tag-triggered Gitea workflow to build and push Docker images
|
||||
- Update runtime binary resolution to load architecture-specific Rust artifacts in Docker and CI environments
|
||||
- Add Docker-related package scripts, dependency updates, and ignore rules for container builds
|
||||
|
||||
## 2026-04-11 - 1.19.2 - fix(web-ui)
|
||||
normalize lucide icon names across SIP proxy views
|
||||
|
||||
- Updates icon identifiers to the expected PascalCase lucide format in app navigation, calls, IVR, overview, providers, and voicemail views.
|
||||
- Fixes UI icon rendering for stats cards and action menus such as transfer, delete, status, and call direction indicators.
|
||||
|
||||
## 2026-04-10 - 1.19.1 - fix(readme)
|
||||
refresh documentation for jitter buffering, voicemail, and WebSocket signaling details
|
||||
|
||||
- Add adaptive jitter buffer and packet loss concealment details to the audio pipeline documentation
|
||||
- Document voicemail unheard count and heard-state API endpoints
|
||||
- Update WebSocket event and browser signaling examples to reflect current message types
|
||||
|
||||
## 2026-04-10 - 1.19.0 - feat(proxy-engine,codec-lib)
|
||||
add adaptive RTP jitter buffering with Opus packet loss concealment and stable 20ms resampling
|
||||
|
||||
- introduces a per-leg adaptive jitter buffer in the mixer to reorder RTP packets, gate initial playout, and deliver one frame per 20ms tick
|
||||
- adds Opus PLC support to synthesize missing audio frames when packets are lost, with fade-based fallback handling for non-Opus codecs
|
||||
- updates i16 and f32 resamplers to use canonical 20ms chunks so cached resamplers preserve filter state and avoid variable-size cache thrashing
|
||||
|
||||
## 2026-04-10 - 1.18.0 - feat(readme)
|
||||
expand documentation for voicemail, IVR, audio engine, and API capabilities
|
||||
|
||||
- Updates the feature overview to document voicemail, IVR menus, call recording, enhanced TTS, and the 48kHz float audio engine
|
||||
- Refreshes the architecture section to describe the TypeScript control plane, Rust proxy-engine data plane, and JSON-over-stdio IPC
|
||||
- Clarifies REST API and WebSocket coverage with voicemail endpoints, incoming call events, and refined endpoint descriptions
|
||||
|
||||
## 2026-04-10 - 1.17.2 - fix(proxy-engine)
|
||||
use negotiated SDP payload types when wiring SIP legs and enable default nnnoiseless features for telephony denoising
|
||||
|
||||
- Select the negotiated codec payload type from SDP answers instead of always using the first offered codec
|
||||
- Preserve the device leg's preferred payload type from its own INVITE SDP when attaching it to the mixer
|
||||
- Enable default nnnoiseless features in codec-lib and proxy-engine dependencies
|
||||
|
||||
## 2026-04-10 - 1.17.1 - fix(proxy-engine,codec-lib,sip-proto,ts)
|
||||
preserve negotiated media details and improve RTP audio handling across call legs
|
||||
|
||||
- Use native Opus float encode/decode to avoid unnecessary i16 quantization in the f32 audio path.
|
||||
- Parse full RTP headers including extensions and sequence numbers, then sort inbound packets before decoding to keep codec state stable for out-of-order audio.
|
||||
- Capture negotiated codec payload types from SDP offers and answers and include codec, RTP port, remote media, and metadata in leg_added events.
|
||||
- Emit leg_state_changed and leg_removed events more consistently so the dashboard reflects leg lifecycle updates accurately.
|
||||
|
||||
## 2026-04-10 - 1.17.0 - feat(proxy-engine)
|
||||
upgrade the internal audio bus to 48kHz f32 with per-leg denoising and improve SIP leg routing
|
||||
|
||||
- switch mixer, prompt playback, and tool leg audio handling from 16kHz i16 to 48kHz f32 for higher-quality internal processing
|
||||
- add f32 decode/encode and resampling support plus standalone RNNoise denoiser creation in codec-lib
|
||||
- apply per-leg inbound noise suppression in the mixer before mix-minus generation
|
||||
- fix passthrough call routing by matching the actual leg from the signaling source address when Call-IDs are shared
|
||||
- correct dialed number extraction from bare SIP request URIs by parsing the user part directly
|
||||
|
||||
## 2026-04-10 - 1.16.0 - feat(proxy-engine)
|
||||
integrate Kokoro TTS generation into proxy-engine and simplify TypeScript prompt handling to use cached WAV files
|
||||
|
||||
- adds a generate_tts command to proxy-engine with lazy-loaded Kokoro model support and WAV output generation
|
||||
- removes standalone opus-codec and tts-engine workspace binaries by consolidating TTS generation into proxy-engine
|
||||
- updates announcement and prompt cache flows to generate and cache WAV files on disk instead of pre-encoding RTP frames in TypeScript
|
||||
|
||||
## 2026-04-10 - 1.15.0 - feat(proxy-engine)
|
||||
add device leg, leg transfer, and leg replacement call controls
|
||||
|
||||
- adds proxy-engine commands and call manager support for inviting a registered SIP device into an active call
|
||||
- supports transferring an existing leg between calls while preserving the active connection and updating mixer routing
|
||||
- supports replacing a call leg by removing the current leg and dialing a new outbound destination
|
||||
- wires the frontend add-leg API and TypeScript bridge to the new device leg and leg control commands
|
||||
|
||||
## 2026-04-10 - 1.14.0 - feat(proxy-engine)
|
||||
add multiparty call mixing with dynamic SIP and WebRTC leg management
|
||||
|
||||
- replace passthrough call handling with a mixer-backed call model that tracks multiple legs and exposes leg status in call state output
|
||||
- add mixer and leg I/O infrastructure to bridge SIP RTP and WebRTC audio through channel-based mix-minus processing
|
||||
- introduce add_leg and remove_leg proxy commands and wire frontend bridge APIs to manage external call legs
|
||||
- emit leg lifecycle events for observability and mark unimplemented device-leg and transfer HTTP endpoints with 501 responses
|
||||
|
||||
## 2026-04-10 - 1.13.0 - feat(proxy-engine,webrtc)
|
||||
add B2BUA SIP leg handling and WebRTC call bridging for outbound calls
|
||||
|
||||
@@ -153,4 +350,4 @@ Initial SIP-aware proxy for Grandstream HT801 ↔ easybell connectivity.
|
||||
- Added SDP rewriting and per-call RTP relay sockets
|
||||
- Added NAT priming and G.722 silence streaming after `200 OK` so easybell detects inbound media promptly
|
||||
- Inserted `Record-Route` so in-dialog ACK/BYE/re-INVITE continue through the proxy
|
||||
- Included captured device setting snapshots and setup documentation for diagnosing registration issues
|
||||
- Included captured device setting snapshots and setup documentation for diagnosing registration issues
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+27
-9
@@ -1,25 +1,43 @@
|
||||
{
|
||||
"name": "siprouter",
|
||||
"version": "1.13.0",
|
||||
"version": "1.27.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"bundle": "node node_modules/.pnpm/esbuild@0.27.7/node_modules/esbuild/bin/esbuild ts_web/index.ts --bundle --format=esm --outfile=dist_ts_web/bundle.js --platform=browser --target=es2022 --minify",
|
||||
"bundle": "esbuild ts_web/index.ts --bundle --format=esm --outfile=dist_ts_web/bundle.js --platform=browser --target=es2022 --minify",
|
||||
"buildRust": "tsrust",
|
||||
"build": "pnpm run buildRust && pnpm run bundle",
|
||||
"build:docker": "tsdocker build --verbose",
|
||||
"release:docker": "tsdocker push --verbose",
|
||||
"start": "tsx ts/sipproxy.ts",
|
||||
"restartBackground": "pnpm run buildRust && pnpm run bundle; test -f .server.pid && kill $(cat .server.pid) 2>/dev/null; sleep 1; rm -f sip_trace.log proxy.out && nohup tsx ts/sipproxy.ts > proxy.out 2>&1 & echo $! > .server.pid; sleep 2; cat proxy.out"
|
||||
},
|
||||
"dependencies": {
|
||||
"@design.estate/dees-catalog": "^3.70.0",
|
||||
"@design.estate/dees-catalog": "^3.81.0",
|
||||
"@design.estate/dees-element": "^2.2.4",
|
||||
"@push.rocks/smartrust": "^1.3.2",
|
||||
"@push.rocks/smartstate": "^2.3.0",
|
||||
"@push.rocks/smartbucket": "^4.6.1",
|
||||
"@push.rocks/smartdata": "^7.1.7",
|
||||
"@push.rocks/smartrust": "^1.4.0",
|
||||
"@push.rocks/smartstate": "^2.3.1",
|
||||
"tsx": "^4.21.0",
|
||||
"ws": "^8.20.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@git.zone/tsbundle": "^2.10.0",
|
||||
"@git.zone/tsrust": "^1.3.2",
|
||||
"@git.zone/tswatch": "^3.3.2",
|
||||
"@types/ws": "^8.18.1"
|
||||
"@git.zone/tsbundle": "^2.10.1",
|
||||
"@git.zone/tsdocker": "^2.2.5",
|
||||
"@git.zone/tsrust": "^1.3.3",
|
||||
"@git.zone/tswatch": "^3.3.3",
|
||||
"@types/node": "^25.8.0",
|
||||
"@types/ws": "^8.18.1",
|
||||
"esbuild": "^0.27.7"
|
||||
},
|
||||
"pnpm": {
|
||||
"ignoredBuiltDependencies": [
|
||||
"@design.estate/dees-catalog"
|
||||
],
|
||||
"onlyBuiltDependencies": [
|
||||
"esbuild",
|
||||
"mongodb-memory-server"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
Generated
+1880
-1089
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
allowBuilds:
|
||||
esbuild: true
|
||||
mongodb-memory-server: true
|
||||
ignoredBuiltDependencies:
|
||||
- '@design.estate/dees-catalog'
|
||||
@@ -1,298 +1,240 @@
|
||||
# @serve.zone/siprouter
|
||||
# siprouter
|
||||
|
||||
A production-grade **SIP B2BUA + WebRTC bridge** built with TypeScript and Rust. Routes calls between SIP providers, SIP hardware devices, and browser softphones — with real-time codec transcoding, ML noise suppression, neural TTS announcements, and a slick web dashboard.
|
||||
siprouter is a TypeScript control plane plus Rust media/data plane for SIP routing, SIP device registration, SIP trunk calls, browser WebRTC softphones, voicemail/fax storage, and a live operations dashboard. It is intentionally split so TypeScript owns configuration, REST/WebSocket APIs, and UI glue while the Rust `proxy-engine` owns SIP, RTP, WebRTC media, codecs, mixing, jitter handling, fax transport, and real-time call state.
|
||||
|
||||
## Issue Reporting and Security
|
||||
|
||||
For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly.
|
||||
|
||||
---
|
||||
## Current Capabilities
|
||||
|
||||
## 🔥 What It Does
|
||||
- SIP B2BUA behavior for SIP providers and LAN SIP devices, including dialog state, provider registration, digest auth retry, SDP negotiation, BYE/CANCEL handling, and routing decisions in Rust.
|
||||
- Browser WebRTC softphone signaling through the TypeScript dashboard, with WebRTC media sessions implemented in the Rust engine.
|
||||
- A call hub model: each call owns multiple legs and a 20 ms tick mix-minus mixer at a 48 kHz f32 internal bus.
|
||||
- Codec handling for Opus, G.722, PCMU, and PCMA through `codec-lib`, including per-leg transcoding, resampling, packet loss concealment, and jitter buffering.
|
||||
- Device registration push events from Rust into TypeScript, then into the dashboard status stream.
|
||||
- Route configuration for inbound and outbound calls with provider/device matching, number patterns, failover provider fields, browser notification flags, voicemail, fax, and IVR-related action fields.
|
||||
- Voicemail metadata and WAV storage through `VoiceboxManager` under `.nogit/voicemail/{boxId}/`.
|
||||
- Fax box and fax job metadata storage under `.nogit/fax/`, backed by Rust fax handling that includes audio fax and T.38-related code paths.
|
||||
- Web dashboard and REST API on the configured `webUiPort`, served over HTTPS if `.nogit/cert.pem` and `.nogit/key.pem` exist, otherwise HTTP.
|
||||
|
||||
siprouter sits between your SIP trunk providers and your endpoints — hardware phones, ATAs, browser softphones — and handles **everything** in between:
|
||||
## Important Accuracy Notes
|
||||
|
||||
- 📞 **SIP B2BUA** — Terminates and re-originates calls with full RFC 3261 dialog state management
|
||||
- 🌐 **WebRTC Bridge** — Browser-based softphone with bidirectional audio to the SIP network
|
||||
- 🎛️ **Multi-Provider Trunking** — Register with multiple SIP providers simultaneously (sipgate, easybell, o2, etc.)
|
||||
- 🔊 **Rust Codec Engine** — Real-time Opus ↔ G.722 ↔ PCMU ↔ PCMA transcoding in native Rust
|
||||
- 🤖 **ML Noise Suppression** — RNNoise denoiser with per-direction state (to SIP / to browser)
|
||||
- 🗣️ **Neural TTS** — Kokoro-powered "connecting your call" announcements, pre-encoded for instant playback
|
||||
- 🔀 **Hub Model Calls** — N-leg calls with dynamic add/remove, transfer, and RTP fan-out
|
||||
- 🖥️ **Web Dashboard** — Real-time SPA with live call monitoring, browser phone, contact management, provider config
|
||||
- TypeScript does not handle raw SIP or RTP. It sends high-level commands to the Rust engine over `@push.rocks/smartrust` and receives high-level events back.
|
||||
- Browser WebRTC calls use a strict two-stage link flow. The browser first creates a standalone WebRTC session with `webrtc-offer`; only after `webrtc-accept`/linking can Rust attach that session to the call mixer.
|
||||
- Inbound route resolution is wired in Rust, but multi-target inbound forking is not implemented yet. Only the first registered target device from an inbound route is rung.
|
||||
- `ringBrowsers` currently controls browser notifications. It is not first-answer-wins call racing against SIP devices.
|
||||
- `voicemailBox`, `ivrMenuId`, and `noAnswerTimeout` are part of resolved inbound route data, but the project notes mark downstream honoring of those fields as not complete yet.
|
||||
- The `/api/transfer` HTTP endpoint currently returns `501 not yet implemented`.
|
||||
|
||||
---
|
||||
## Architecture
|
||||
|
||||
## 🏗️ Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────┐
|
||||
│ Browser Softphone │
|
||||
│ (WebRTC via WebSocket signaling) │
|
||||
└──────────────┬──────────────────────┘
|
||||
│ Opus/WebRTC
|
||||
▼
|
||||
┌──────────────────────────────────────┐
|
||||
│ siprouter │
|
||||
│ │
|
||||
│ ┌──────────┐ ┌──────────────────┐ │
|
||||
│ │ Call Hub │ │ Rust Transcoder │ │
|
||||
│ │ N legs │──│ Opus/G.722/PCM │ │
|
||||
│ │ fan-out │ │ + RNNoise │ │
|
||||
│ └────┬─────┘ └──────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────┴─────┐ ┌──────────────────┐ │
|
||||
│ │ SIP Stack│ │ Kokoro TTS │ │
|
||||
│ │ Dialog SM│ │ (ONNX Runtime) │ │
|
||||
│ └────┬─────┘ └──────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────┴──────────────────────────┐ │
|
||||
│ │ Local Registrar + Provider │ │
|
||||
│ │ Registration Engine │ │
|
||||
│ └───────────────────────────────┘ │
|
||||
└──────────┬──────────────┬────────────┘
|
||||
│ │
|
||||
┌──────┴──────┐ ┌─────┴──────┐
|
||||
│ SIP Devices │ │ SIP Trunk │
|
||||
│ (HT801, etc)│ │ Providers │
|
||||
└─────────────┘ └────────────┘
|
||||
```text
|
||||
Browser dashboard and softphone
|
||||
|
|
||||
| HTTP + WebSocket signaling
|
||||
v
|
||||
TypeScript control plane
|
||||
ts/sipproxy.ts
|
||||
ts/frontend.ts
|
||||
ts/webrtcbridge.ts
|
||||
ts/config.ts
|
||||
ts/proxybridge.ts
|
||||
|
|
||||
| JSON-over-stdio via @push.rocks/smartrust
|
||||
v
|
||||
Rust proxy-engine
|
||||
SIP transport and dialog state
|
||||
Call manager and call hub
|
||||
RTP port pool and RTP I/O
|
||||
48 kHz f32 mix-minus mixer
|
||||
WebRTC sessions
|
||||
Fax, voicemail, TTS, recorder, tool legs
|
||||
|
|
||||
| SIP/RTP/UDPTL/WebRTC media
|
||||
v
|
||||
SIP providers, SIP devices, and browser clients
|
||||
```
|
||||
|
||||
### The Hub Model
|
||||
## Key Files
|
||||
|
||||
Every call is a **hub** with N legs. Each leg is either a `SipLeg` (hardware device or provider) or a `WebRtcLeg` (browser). RTP flows through the hub — each leg's received audio is forwarded to all other legs, with codec transcoding handled transparently by the Rust engine.
|
||||
| Path | Role |
|
||||
| --- | --- |
|
||||
| `ts/sipproxy.ts` | Process entry point. Loads config, starts web UI, starts Rust, wires event handlers, and handles shutdown. |
|
||||
| `ts/config.ts` | `.nogit/config.json` schema, defaults, and validation. |
|
||||
| `ts/proxybridge.ts` | Typed command bridge to the Rust `proxy-engine` binary. |
|
||||
| `ts/frontend.ts` | HTTP API, static dashboard serving, status WebSocket, and WebRTC message routing. |
|
||||
| `ts/webrtcbridge.ts` | Browser device registration and WebSocket-to-device mapping. |
|
||||
| `ts/voicebox.ts` | Voicemail box config, WAV metadata, unheard counts, and message CRUD. |
|
||||
| `ts/faxbox.ts` | Fax inbox metadata and TIFF file tracking. |
|
||||
| `ts/faxjobs.ts` | Outbound/inbound fax job state persistence. |
|
||||
| `rust/crates/proxy-engine/src/call_manager.rs` | Central call registry, SIP routing, B2BUA state, route resolution, fax metadata, and call orchestration. |
|
||||
| `rust/crates/proxy-engine/src/mixer.rs` | 20 ms mix-minus engine with 48 kHz f32 processing, codec boundaries, jitter, PLC, DTMF, and tool-leg audio. |
|
||||
| `rust/crates/proxy-engine/src/webrtc_engine.rs` | Browser WebRTC sessions. |
|
||||
| `rust/crates/proxy-engine/src/fax_engine.rs` | Fax transfer engine using `spandsp` and `udptl`. |
|
||||
| `rust/crates/sip-proto/` | Zero-dependency SIP data library for parsing, serializing, dialogs, SDP helpers, digest auth, and URI rewriting. |
|
||||
| `ts_web/` | Lit/dees-element dashboard views and WebRTC browser client state. |
|
||||
|
||||
---
|
||||
## Configuration
|
||||
|
||||
## 🚀 Getting Started
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- **Node.js** ≥ 20 with `tsx` globally available
|
||||
- **pnpm** for package management
|
||||
- **Rust** toolchain (for building the codec engine and TTS)
|
||||
|
||||
### Install & Build
|
||||
|
||||
```bash
|
||||
# Clone and install
|
||||
pnpm install
|
||||
|
||||
# Build the Rust binaries (opus-codec + tts-engine)
|
||||
pnpm run buildRust
|
||||
|
||||
# Bundle the web frontend
|
||||
pnpm run bundle
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
Create `.nogit/config.json` with your setup:
|
||||
Create `.nogit/config.json` in the repository root before starting the service.
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"proxy": {
|
||||
"lanIp": "192.168.1.100", // Your server's LAN IP
|
||||
"lanPort": 5070, // SIP signaling port
|
||||
"rtpPortRange": [20000, 20200],// RTP relay port pool (even ports)
|
||||
"webUiPort": 3060 // Dashboard port
|
||||
"lanIp": "192.168.1.100",
|
||||
"lanPort": 5070,
|
||||
"publicIpSeed": null,
|
||||
"rtpPortRange": { "min": 20000, "max": 20200 },
|
||||
"webUiPort": 3060
|
||||
},
|
||||
"providers": [
|
||||
{
|
||||
"id": "my-trunk",
|
||||
"name": "My SIP Provider",
|
||||
"host": "sip.provider.com",
|
||||
"port": 5060,
|
||||
"username": "user",
|
||||
"password": "pass",
|
||||
"codecs": ["G.722", "PCMA", "PCMU"],
|
||||
"registerExpiry": 3600
|
||||
"id": "main-trunk",
|
||||
"displayName": "Main SIP trunk",
|
||||
"domain": "sip.example.net",
|
||||
"outboundProxy": { "address": "sip.example.net", "port": 5060 },
|
||||
"username": "trunk-user",
|
||||
"password": "trunk-password",
|
||||
"registerIntervalSec": 300,
|
||||
"codecs": [9, 0, 8, 101],
|
||||
"quirks": { "earlyMediaSilence": false }
|
||||
}
|
||||
],
|
||||
"devices": [
|
||||
{
|
||||
"id": "desk-phone",
|
||||
"name": "Desk Phone",
|
||||
"type": "sip"
|
||||
"displayName": "Desk Phone",
|
||||
"expectedAddress": "192.168.1.50",
|
||||
"extension": "100"
|
||||
}
|
||||
],
|
||||
"routing": {
|
||||
"inbound": {
|
||||
"default": { "target": "all-devices", "ringBrowser": true }
|
||||
"routes": [
|
||||
{
|
||||
"id": "outbound-default",
|
||||
"name": "Outbound via main trunk",
|
||||
"priority": 100,
|
||||
"enabled": true,
|
||||
"match": { "direction": "outbound" },
|
||||
"action": { "provider": "main-trunk" }
|
||||
},
|
||||
{
|
||||
"id": "inbound-main",
|
||||
"name": "Inbound main number",
|
||||
"priority": 200,
|
||||
"enabled": true,
|
||||
"match": {
|
||||
"direction": "inbound",
|
||||
"sourceProvider": "main-trunk",
|
||||
"numberPattern": "+49421219694"
|
||||
},
|
||||
"action": {
|
||||
"targets": ["desk-phone"],
|
||||
"ringBrowsers": true,
|
||||
"voicemailBox": "main"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"contacts": [],
|
||||
"voiceboxes": [
|
||||
{
|
||||
"id": "main",
|
||||
"enabled": true,
|
||||
"greetingText": "Please leave a message after the tone.",
|
||||
"greetingVoice": "af_bella",
|
||||
"noAnswerTimeoutSec": 25,
|
||||
"maxRecordingSec": 120,
|
||||
"maxMessages": 50
|
||||
}
|
||||
],
|
||||
"faxboxes": [],
|
||||
"ivr": {
|
||||
"enabled": false,
|
||||
"entryMenuId": "main-menu",
|
||||
"menus": []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### TTS Setup (Optional)
|
||||
## Persistent Files
|
||||
|
||||
For neural "connecting your call" announcements, download the Kokoro TTS model:
|
||||
| Path | Purpose |
|
||||
| --- | --- |
|
||||
| `.nogit/config.json` | Main app config. |
|
||||
| `.nogit/cert.pem` and `.nogit/key.pem` | Optional HTTPS certificate for the dashboard. |
|
||||
| `.nogit/voicemail/{boxId}/` | Voicemail WAV files and `messages.json`. |
|
||||
| `.nogit/fax/inboxes/{boxId}/` | Fax inbox files and metadata. |
|
||||
| `.nogit/fax/jobs.json` | Fax job state. |
|
||||
| `.nogit/prompts/` | Cached prompt/TTS assets used by call flows. |
|
||||
| `sip_trace.log` | Runtime log written by `ts/sipproxy.ts`. |
|
||||
|
||||
## HTTP and WebSocket API
|
||||
|
||||
| Endpoint | Purpose |
|
||||
| --- | --- |
|
||||
| `GET /api/status` | Full status snapshot for providers, devices, calls, and dashboard state. |
|
||||
| `POST /api/call` | Originate an outbound call. |
|
||||
| `POST /api/hangup` | Hang up a call. |
|
||||
| `POST /api/fax` | Start an outbound fax. |
|
||||
| `GET /api/fax/jobs` | List fax jobs. |
|
||||
| `GET /api/fax/inboxes/:boxId` | List fax inbox messages. |
|
||||
| `GET /api/fax/inboxes/:boxId/:messageId/file` | Stream a fax TIFF. |
|
||||
| `DELETE /api/fax/inboxes/:boxId/:messageId` | Delete a fax message. |
|
||||
| `POST /api/call/:id/addleg` | Add a registered SIP device leg to an active call. |
|
||||
| `POST /api/call/:id/addexternal` | Add an external dial-out leg to an active call. |
|
||||
| `POST /api/call/:id/removeleg` | Remove a leg from a call. |
|
||||
| `POST /api/transfer` | Present but returns `501 not yet implemented`. |
|
||||
| `GET /api/config` | Read sanitized config. |
|
||||
| `POST /api/config` | Update config and trigger runtime reload where possible. |
|
||||
| `GET /api/voicemail/:boxId` | List voicemail messages. |
|
||||
| `GET /api/voicemail/:boxId/unheard` | Get unheard voicemail count. |
|
||||
| `GET /api/voicemail/:boxId/:messageId/audio` | Stream voicemail WAV audio. |
|
||||
| `POST /api/voicemail/:boxId/:messageId/heard` | Mark voicemail as heard. |
|
||||
| `DELETE /api/voicemail/:boxId/:messageId` | Delete voicemail metadata and WAV file. |
|
||||
| `WS /ws` | Status updates, logs, WebRTC signaling, and browser phone events. |
|
||||
|
||||
## Build and Run
|
||||
|
||||
```bash
|
||||
mkdir -p .nogit/tts
|
||||
# Download the full-quality model (310MB) + voices (27MB)
|
||||
curl -L -o .nogit/tts/kokoro-v1.0.onnx \
|
||||
https://github.com/mzdk100/kokoro/releases/download/V1.0/kokoro-v1.0.onnx
|
||||
curl -L -o .nogit/tts/voices.bin \
|
||||
https://github.com/mzdk100/kokoro/releases/download/V1.0/voices.bin
|
||||
```
|
||||
|
||||
If the model files aren't present, the announcement feature is simply disabled — everything else works fine.
|
||||
|
||||
### Run
|
||||
|
||||
```bash
|
||||
pnpm start
|
||||
```
|
||||
|
||||
The SIP proxy starts on the configured port and the web dashboard is available at `http://<your-ip>:3060`.
|
||||
|
||||
### HTTPS (Optional)
|
||||
|
||||
Place `cert.pem` and `key.pem` in `.nogit/` for TLS on the dashboard.
|
||||
|
||||
---
|
||||
|
||||
## 📂 Project Structure
|
||||
|
||||
```
|
||||
siprouter/
|
||||
├── ts/ # TypeScript source
|
||||
│ ├── sipproxy.ts # Main entry — bootstraps everything
|
||||
│ ├── config.ts # Config loader & validation
|
||||
│ ├── registrar.ts # Local SIP registrar for devices
|
||||
│ ├── providerstate.ts # Per-provider upstream registration engine
|
||||
│ ├── frontend.ts # Web dashboard HTTP/WS server + REST API
|
||||
│ ├── webrtcbridge.ts # WebRTC signaling layer
|
||||
│ ├── opusbridge.ts # Rust IPC bridge (smartrust)
|
||||
│ ├── codec.ts # High-level RTP transcoding interface
|
||||
│ ├── announcement.ts # Neural TTS announcement generator
|
||||
│ ├── sip/ # Zero-dependency SIP protocol library
|
||||
│ │ ├── message.ts # SIP message parser/builder/mutator
|
||||
│ │ ├── dialog.ts # RFC 3261 dialog state machine
|
||||
│ │ ├── helpers.ts # SDP builder, digest auth, codec registry
|
||||
│ │ └── rewrite.ts # SIP URI + SDP body rewriting
|
||||
│ └── call/ # Hub-model call management
|
||||
│ ├── call-manager.ts # Central registry, factory, routing
|
||||
│ ├── call.ts # Call hub — owns N legs, media fan-out
|
||||
│ ├── sip-leg.ts # SIP device/provider connection
|
||||
│ ├── webrtc-leg.ts # Browser WebRTC connection
|
||||
│ └── rtp-port-pool.ts # UDP port allocation
|
||||
├── ts_web/ # Web frontend (Lit-based SPA)
|
||||
│ ├── elements/ # Web components (dashboard, phone, etc.)
|
||||
│ └── state/ # App state, WebRTC client, notifications
|
||||
├── rust/ # Rust workspace
|
||||
│ └── crates/
|
||||
│ ├── opus-codec/ # Real-time audio transcoder (Opus/G.722/PCM)
|
||||
│ └── tts-engine/ # Kokoro neural TTS CLI
|
||||
├── html/ # Static HTML shell
|
||||
├── .nogit/ # Secrets, config, models (gitignored)
|
||||
└── dist_rust/ # Compiled Rust binaries (gitignored)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎧 Codec Engine (Rust)
|
||||
|
||||
The `opus-codec` binary handles all real-time audio processing via a JSON-over-stdio IPC protocol:
|
||||
|
||||
| Codec | Payload Type | Sample Rate | Use Case |
|
||||
|-------|-------------|-------------|----------|
|
||||
| **Opus** | 111 | 48 kHz | WebRTC browsers |
|
||||
| **G.722** | 9 | 16 kHz | HD SIP devices |
|
||||
| **PCMU** (G.711 µ-law) | 0 | 8 kHz | Legacy SIP |
|
||||
| **PCMA** (G.711 A-law) | 8 | 8 kHz | Legacy SIP |
|
||||
|
||||
**Features:**
|
||||
- Per-call isolated codec sessions (no cross-call state corruption)
|
||||
- FFT-based sample rate conversion via `rubato`
|
||||
- **RNNoise ML noise suppression** with per-direction state — denoises audio flowing to SIP separately from audio flowing to the browser
|
||||
- Raw PCM encoding for TTS frame processing
|
||||
|
||||
---
|
||||
|
||||
## 🗣️ Neural TTS (Rust)
|
||||
|
||||
The `tts-engine` binary uses [Kokoro TTS](https://github.com/mzdk100/kokoro) (82M parameter neural model) to synthesize announcements at startup:
|
||||
|
||||
- **24 kHz, 16-bit mono** output
|
||||
- **25+ voice presets** — American/British, male/female (e.g., `af_bella`, `am_adam`, `bf_emma`, `bm_george`)
|
||||
- **~800ms** synthesis time for a 3-second announcement
|
||||
- Pre-encoded to G.722 + Opus for zero-latency RTP playback during call setup
|
||||
|
||||
---
|
||||
|
||||
## 🌐 Web Dashboard & REST API
|
||||
|
||||
### Dashboard Views
|
||||
|
||||
| View | Description |
|
||||
|------|-------------|
|
||||
| **Overview** | Stats tiles — uptime, providers, devices, active calls |
|
||||
| **Calls** | Active calls with leg details, codec info, packet counters. Add/remove legs, transfer, hangup |
|
||||
| **Phone** | Browser softphone — mic/speaker selection, audio meters, dial pad, incoming call popup |
|
||||
| **Contacts** | Contact management with click-to-call |
|
||||
| **Providers** | SIP trunk config with registration status |
|
||||
| **Log** | Live streaming log viewer |
|
||||
|
||||
### REST API
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/api/status` | GET | Full system status (providers, devices, calls) |
|
||||
| `/api/call` | POST | Originate a call |
|
||||
| `/api/hangup` | POST | Hang up a call |
|
||||
| `/api/call/:id/addleg` | POST | Add a leg to an active call |
|
||||
| `/api/call/:id/addexternal` | POST | Add an external participant |
|
||||
| `/api/call/:id/removeleg` | POST | Remove a leg from a call |
|
||||
| `/api/transfer` | POST | Transfer a call |
|
||||
| `/api/config` | GET/POST | Read or update configuration (hot-reload) |
|
||||
|
||||
### WebSocket Events
|
||||
|
||||
Connect to `/ws` for real-time push:
|
||||
|
||||
```jsonc
|
||||
{ "type": "status", "data": { ... } } // Full status snapshot (1s interval)
|
||||
{ "type": "log", "data": { "message": "..." } } // Log lines in real-time
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔌 Ports
|
||||
|
||||
| Port | Protocol | Purpose |
|
||||
|------|----------|---------|
|
||||
| 5070 (configurable) | UDP | SIP signaling |
|
||||
| 20000–20200 (configurable) | UDP | RTP relay (even ports, per-call allocation) |
|
||||
| 3060 (configurable) | TCP | Web dashboard + WebSocket + REST API |
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ Development
|
||||
|
||||
```bash
|
||||
# Start in dev mode
|
||||
pnpm start
|
||||
|
||||
# Build Rust crates
|
||||
pnpm install
|
||||
pnpm run buildRust
|
||||
|
||||
# Bundle web frontend
|
||||
pnpm run bundle
|
||||
|
||||
# Restart background server (build + bundle + restart)
|
||||
pnpm run restartBackground
|
||||
pnpm start
|
||||
```
|
||||
|
||||
### Key Design Decisions
|
||||
Full build:
|
||||
|
||||
- **Hub Model** — Calls are N-leg hubs, not point-to-point. This enables multi-party, dynamic leg manipulation, and transfer without tearing down the call.
|
||||
- **Zero-dependency SIP library** — `ts/sip/` is a pure data-level SIP stack (parse/build/mutate/serialize). No transport or timer logic — those live in the application layer.
|
||||
- **Rust for the hot path** — Codec transcoding and noise suppression run in native Rust for real-time performance. TypeScript handles signaling and orchestration.
|
||||
- **Per-session codec isolation** — Each call gets its own Opus/G.722 encoder/decoder state in the Rust process, preventing stateful codec prediction from leaking between concurrent calls.
|
||||
```bash
|
||||
pnpm build
|
||||
```
|
||||
|
||||
---
|
||||
Docker build scripts are also present:
|
||||
|
||||
```bash
|
||||
pnpm run build:docker
|
||||
pnpm run release:docker
|
||||
```
|
||||
|
||||
`pnpm run buildRust` uses `tsrust`. Per the project notes, do not replace that with a direct `cargo build` when validating the packaged Rust output. The configured build path cross-compiles the Rust engine for Linux amd64 and arm64 targets.
|
||||
|
||||
## Project Map
|
||||
|
||||
```text
|
||||
siprouter/
|
||||
├── ts/ # TypeScript control plane
|
||||
├── ts_web/ # Browser dashboard
|
||||
├── rust/
|
||||
│ └── crates/
|
||||
│ ├── codec-lib/ # Codec and transcoding helpers
|
||||
│ ├── proxy-engine/ # Rust SIP/RTP/WebRTC/fax engine
|
||||
│ └── sip-proto/ # SIP message/dialog/SDP library
|
||||
├── html/ # Dashboard HTML shell
|
||||
├── dist_rust/ # Built Rust binaries
|
||||
├── dist_ts_web/ # Bundled web UI
|
||||
└── .nogit/ # Local config, secrets, voicemail, fax, prompts
|
||||
```
|
||||
|
||||
## License and Legal Information
|
||||
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
# Cross-compile configuration for the proxy-engine crate.
|
||||
#
|
||||
# tsrust builds for both x86_64-unknown-linux-gnu and aarch64-unknown-linux-gnu
|
||||
# from an x86_64 host. Without this config, cargo invokes the host `cc` to
|
||||
# link aarch64 objects and fails with
|
||||
# rust-lld: error: <obj.o> is incompatible with elf64-x86-64
|
||||
#
|
||||
# Required Debian/Ubuntu packages for the aarch64 target to work:
|
||||
# sudo apt install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu \
|
||||
# libc6-dev-arm64-cross libstdc++6-arm64-cross
|
||||
#
|
||||
# The `libstdc++.so` dev symlink (needed by the -lstdc++ flag that the
|
||||
# kokoro-tts/ort build scripts emit) is provided by this repo at
|
||||
# ./crosslibs/aarch64/libstdc++.so, pointing at the versioned shared
|
||||
# library installed by `libstdc++6-arm64-cross`. This avoids requiring
|
||||
# the `libstdc++-13-dev-arm64-cross` package, which is not always
|
||||
# installed alongside the runtime.
|
||||
|
||||
[target.aarch64-unknown-linux-gnu]
|
||||
linker = "aarch64-linux-gnu-gcc"
|
||||
rustflags = ["-C", "link-arg=-L.cargo/crosslibs/aarch64"]
|
||||
|
||||
# Tell cc-rs-based build scripts (ring, zstd-sys, audiopus_sys, ort-sys) to
|
||||
# use the aarch64 cross toolchain when compiling C sources for the aarch64
|
||||
# target. Without these, they'd default to the host `cc` and produce x86_64
|
||||
# objects that the aarch64 linker then rejects.
|
||||
[env]
|
||||
CC_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-gcc"
|
||||
CXX_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-g++"
|
||||
AR_aarch64_unknown_linux_gnu = "aarch64-linux-gnu-ar"
|
||||
PKG_CONFIG_ALLOW_CROSS = "1"
|
||||
PKG_CONFIG_SYSROOT_DIR_aarch64_unknown_linux_gnu = "/"
|
||||
PKG_CONFIG_LIBDIR_aarch64_unknown_linux_gnu = "/usr/lib/aarch64-linux-gnu/pkgconfig:/usr/share/pkgconfig"
|
||||
@@ -0,0 +1 @@
|
||||
/usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
Generated
+394
-49
@@ -165,7 +165,7 @@ dependencies = [
|
||||
"nom",
|
||||
"num-traits",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
]
|
||||
|
||||
@@ -181,7 +181,7 @@ dependencies = [
|
||||
"nom",
|
||||
"num-traits",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
]
|
||||
|
||||
@@ -237,6 +237,17 @@ version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "audiopus"
|
||||
version = "0.3.0-rc.0"
|
||||
@@ -316,6 +327,26 @@ dependencies = [
|
||||
"virtue",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.72.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools",
|
||||
"log",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
@@ -418,6 +449,15 @@ dependencies = [
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cexpr"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.10"
|
||||
@@ -487,6 +527,42 @@ dependencies = [
|
||||
"inout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"libc",
|
||||
"libloading",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "3.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags 1.3.2",
|
||||
"clap_lex",
|
||||
"indexmap 1.9.3",
|
||||
"once_cell",
|
||||
"strsim",
|
||||
"termcolor",
|
||||
"textwrap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
|
||||
dependencies = [
|
||||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.58"
|
||||
@@ -496,6 +572,15 @@ dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cmudict-fast"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c9f73004e928ed46c3e7fd7406d2b12c8674153295f08af084b49860276dc02"
|
||||
dependencies = [
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codec-lib"
|
||||
version = "0.1.0"
|
||||
@@ -700,6 +785,125 @@ version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
|
||||
|
||||
[[package]]
|
||||
name = "dasp"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7381b67da416b639690ac77c73b86a7b5e64a29e31d1f75fb3b1102301ef355a"
|
||||
dependencies = [
|
||||
"dasp_envelope",
|
||||
"dasp_frame",
|
||||
"dasp_interpolate",
|
||||
"dasp_peak",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_rms",
|
||||
"dasp_sample",
|
||||
"dasp_signal",
|
||||
"dasp_slice",
|
||||
"dasp_window",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_envelope"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ec617ce7016f101a87fe85ed44180839744265fae73bb4aa43e7ece1b7668b6"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_peak",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_rms",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_frame"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2a3937f5fe2135702897535c8d4a5553f8b116f76c1529088797f2eee7c5cd6"
|
||||
dependencies = [
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_interpolate"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fc975a6563bb7ca7ec0a6c784ead49983a21c24835b0bc96eea11ee407c7486"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_peak"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cf88559d79c21f3d8523d91250c397f9a15b5fc72fbb3f87fdb0a37b79915bf"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_ring_buffer"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07d79e19b89618a543c4adec9c5a347fe378a19041699b3278e616e387511ea1"
|
||||
|
||||
[[package]]
|
||||
name = "dasp_rms"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6c5dcb30b7e5014486e2822537ea2beae50b19722ffe2ed7549ab03774575aa"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_sample"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f"
|
||||
|
||||
[[package]]
|
||||
name = "dasp_signal"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa1ab7d01689c6ed4eae3d38fe1cea08cba761573fbd2d592528d55b421077e7"
|
||||
dependencies = [
|
||||
"dasp_envelope",
|
||||
"dasp_frame",
|
||||
"dasp_interpolate",
|
||||
"dasp_peak",
|
||||
"dasp_ring_buffer",
|
||||
"dasp_rms",
|
||||
"dasp_sample",
|
||||
"dasp_window",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_slice"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e1c7335d58e7baedafa516cb361360ff38d6f4d3f9d9d5ee2a2fc8e27178fa1"
|
||||
dependencies = [
|
||||
"dasp_frame",
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dasp_window"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99ded7b88821d2ce4e8b842c9f1c86ac911891ab89443cc1de750cae764c5076"
|
||||
dependencies = [
|
||||
"dasp_sample",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data-encoding"
|
||||
version = "2.10.0"
|
||||
@@ -857,6 +1061,12 @@ dependencies = [
|
||||
"signature",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||
|
||||
[[package]]
|
||||
name = "elliptic-curve"
|
||||
version = "0.12.3"
|
||||
@@ -1203,6 +1413,12 @@ dependencies = [
|
||||
"polyval",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||
|
||||
[[package]]
|
||||
name = "group"
|
||||
version = "0.12.1"
|
||||
@@ -1214,6 +1430,12 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.5"
|
||||
@@ -1246,6 +1468,15 @@ version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hex"
|
||||
version = "0.4.3"
|
||||
@@ -1446,6 +1677,16 @@ dependencies = [
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown 0.12.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.14.0"
|
||||
@@ -1479,7 +1720,7 @@ dependencies = [
|
||||
"rand 0.8.5",
|
||||
"rtcp",
|
||||
"rtp",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"waitgroup",
|
||||
"webrtc-srtp",
|
||||
@@ -1492,6 +1733,15 @@ version = "2.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.18"
|
||||
@@ -1544,12 +1794,11 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "kokoro-tts"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68e5d46e20a28fa5fd313d9ffcf4bbcf41570e64841d3944c832eef6b98d208b"
|
||||
dependencies = [
|
||||
"bincode 2.0.1",
|
||||
"cc",
|
||||
"chinese-number",
|
||||
"cmudict-fast",
|
||||
"futures",
|
||||
"jieba-rs",
|
||||
"log",
|
||||
@@ -1604,6 +1853,16 @@ dependencies = [
|
||||
"rle-decode-fast",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.4",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.12.1"
|
||||
@@ -1739,7 +1998,13 @@ version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "805d5964d1e7a0006a7fdced7dae75084d66d18b35f1dfe81bd76929b1f8da0c"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"dasp",
|
||||
"dasp_interpolate",
|
||||
"dasp_ring_buffer",
|
||||
"easyfft",
|
||||
"hound",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
@@ -1881,16 +2146,6 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opus-codec"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"codec-lib",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ort"
|
||||
version = "2.0.0-rc.11"
|
||||
@@ -1915,6 +2170,12 @@ dependencies = [
|
||||
"ureq",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
|
||||
|
||||
[[package]]
|
||||
name = "p256"
|
||||
version = "0.11.1"
|
||||
@@ -2188,12 +2449,17 @@ dependencies = [
|
||||
"base64 0.22.1",
|
||||
"codec-lib",
|
||||
"hound",
|
||||
"kokoro-tts",
|
||||
"nnnoiseless",
|
||||
"ort",
|
||||
"rand 0.8.5",
|
||||
"regex-lite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sip-proto",
|
||||
"spandsp",
|
||||
"tokio",
|
||||
"udptl",
|
||||
"webrtc",
|
||||
]
|
||||
|
||||
@@ -2391,7 +2657,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6423493804221c276d27f3cc383cd5cbe1a1f10f210909fd4951b579b01293cd"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"webrtc-util",
|
||||
]
|
||||
|
||||
@@ -2401,7 +2667,7 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce5248489db464de29835170cd1f6e19933146b0016789effc59cb53d9f13844"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2413,7 +2679,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"webrtc-util",
|
||||
]
|
||||
|
||||
@@ -2424,7 +2690,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7bb90df8268abfe08452ef2dae9e867a54edfdaa71b3127ef47d8b031f77ac73"
|
||||
dependencies = [
|
||||
"smallvec",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2551,7 +2817,7 @@ checksum = "4d22a5ef407871893fd72b4562ee15e4742269b173959db4b8df6f538c414e13"
|
||||
dependencies = [
|
||||
"rand 0.8.5",
|
||||
"substring",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"url",
|
||||
]
|
||||
|
||||
@@ -2764,6 +3030,28 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spandsp"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5f076b6e56f1a1062d6950dcd1c6c1df281ae2828db271929c50c191ec8c79e"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"spandsp-sys",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spandsp-sys"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c05ab99051230293dded61ba3cd32f06eb15b437a8135be21f560f72bab713db"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"cc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.5.2"
|
||||
@@ -2811,7 +3099,7 @@ dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ring",
|
||||
"subtle",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"url",
|
||||
"webrtc-util",
|
||||
@@ -2890,13 +3178,37 @@ dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.16.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057"
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
"thiserror-impl 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
|
||||
dependencies = [
|
||||
"thiserror-impl 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2910,6 +3222,17 @@ dependencies = [
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "2.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.47"
|
||||
@@ -2986,9 +3309,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
|
||||
dependencies = [
|
||||
"pin-project-lite",
|
||||
"tracing-attributes",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.36"
|
||||
@@ -3008,16 +3343,6 @@ dependencies = [
|
||||
"strength_reduce",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tts-engine"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"hound",
|
||||
"kokoro-tts",
|
||||
"ort",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "turn"
|
||||
version = "0.6.1"
|
||||
@@ -3032,7 +3357,7 @@ dependencies = [
|
||||
"rand 0.8.5",
|
||||
"ring",
|
||||
"stun",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"webrtc-util",
|
||||
]
|
||||
@@ -3043,6 +3368,17 @@ version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
||||
|
||||
[[package]]
|
||||
name = "udptl"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b255ad0ff36582a8a453c42a2bcc16c72d00f0ab16a14a4a7aeacb55ccb2a351"
|
||||
dependencies = [
|
||||
"thiserror 2.0.18",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
@@ -3261,7 +3597,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"wasm-encoder",
|
||||
"wasmparser",
|
||||
]
|
||||
@@ -3274,7 +3610,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"hashbrown 0.15.5",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"semver",
|
||||
]
|
||||
|
||||
@@ -3334,7 +3670,7 @@ dependencies = [
|
||||
"sha2",
|
||||
"smol_str",
|
||||
"stun",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
"tokio",
|
||||
"turn",
|
||||
@@ -3359,7 +3695,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"derive_builder",
|
||||
"log",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"webrtc-sctp",
|
||||
"webrtc-util",
|
||||
@@ -3397,7 +3733,7 @@ dependencies = [
|
||||
"sha2",
|
||||
"signature",
|
||||
"subtle",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"webpki",
|
||||
"webrtc-util",
|
||||
@@ -3419,7 +3755,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"stun",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"turn",
|
||||
"url",
|
||||
@@ -3437,7 +3773,7 @@ checksum = "f08dfd7a6e3987e255c4dbe710dde5d94d0f0574f8a21afa95d171376c143106"
|
||||
dependencies = [
|
||||
"log",
|
||||
"socket2 0.4.10",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"webrtc-util",
|
||||
]
|
||||
@@ -3452,7 +3788,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"rand 0.8.5",
|
||||
"rtp",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3467,7 +3803,7 @@ dependencies = [
|
||||
"crc",
|
||||
"log",
|
||||
"rand 0.8.5",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"webrtc-util",
|
||||
]
|
||||
@@ -3490,7 +3826,7 @@ dependencies = [
|
||||
"rtp",
|
||||
"sha1",
|
||||
"subtle",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"webrtc-util",
|
||||
]
|
||||
@@ -3511,7 +3847,7 @@ dependencies = [
|
||||
"log",
|
||||
"nix",
|
||||
"rand 0.8.5",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"winapi",
|
||||
]
|
||||
@@ -3532,6 +3868,15 @@ version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
@@ -3581,7 +3926,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"heck",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"prettyplease",
|
||||
"syn 2.0.117",
|
||||
"wasm-metadata",
|
||||
@@ -3612,7 +3957,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags 2.11.0",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"log",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
@@ -3631,7 +3976,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"id-arena",
|
||||
"indexmap",
|
||||
"indexmap 2.14.0",
|
||||
"log",
|
||||
"semver",
|
||||
"serde",
|
||||
@@ -3673,7 +4018,7 @@ dependencies = [
|
||||
"nom",
|
||||
"oid-registry 0.4.0",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
]
|
||||
|
||||
@@ -3692,7 +4037,7 @@ dependencies = [
|
||||
"oid-registry 0.6.1",
|
||||
"ring",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
]
|
||||
|
||||
|
||||
+3
-2
@@ -1,8 +1,6 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"crates/codec-lib",
|
||||
"crates/opus-codec",
|
||||
"crates/tts-engine",
|
||||
"crates/sip-proto",
|
||||
"crates/proxy-engine",
|
||||
]
|
||||
@@ -11,3 +9,6 @@ resolver = "2"
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
lto = true
|
||||
|
||||
[patch.crates-io]
|
||||
kokoro-tts = { path = "vendor/kokoro-tts" }
|
||||
|
||||
@@ -7,4 +7,4 @@ edition = "2021"
|
||||
audiopus = "0.3.0-rc.0"
|
||||
ezk-g722 = "0.1"
|
||||
rubato = "0.14"
|
||||
nnnoiseless = { version = "0.5", default-features = false }
|
||||
nnnoiseless = "0.5"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Audio codec library for the SIP router.
|
||||
//!
|
||||
//! Handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding with ML noise suppression.
|
||||
//! Used by both the standalone `opus-codec` CLI and the `proxy-engine` binary.
|
||||
//! Used by the `proxy-engine` binary for all audio transcoding.
|
||||
|
||||
use audiopus::coder::{Decoder as OpusDecoder, Encoder as OpusEncoder};
|
||||
use audiopus::packet::Packet as OpusPacket;
|
||||
@@ -104,6 +104,8 @@ pub struct TranscodeState {
|
||||
g722_dec: libg722::decoder::Decoder,
|
||||
/// Cached FFT resamplers keyed by (from_rate, to_rate, chunk_size).
|
||||
resamplers: HashMap<(u32, u32, usize), FftFixedIn<f64>>,
|
||||
/// Cached f32 FFT resamplers keyed by (from_rate, to_rate, chunk_size).
|
||||
resamplers_f32: HashMap<(u32, u32, usize), FftFixedIn<f32>>,
|
||||
/// ML noise suppression for the SIP-bound direction.
|
||||
denoiser_to_sip: Box<DenoiseState<'static>>,
|
||||
/// ML noise suppression for the browser-bound direction.
|
||||
@@ -113,9 +115,8 @@ pub struct TranscodeState {
|
||||
impl TranscodeState {
|
||||
/// Create a new transcoding session with fresh codec state.
|
||||
pub fn new() -> Result<Self, String> {
|
||||
let mut opus_enc =
|
||||
OpusEncoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)
|
||||
.map_err(|e| format!("opus encoder: {e}"))?;
|
||||
let mut opus_enc = OpusEncoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)
|
||||
.map_err(|e| format!("opus encoder: {e}"))?;
|
||||
opus_enc
|
||||
.set_complexity(5)
|
||||
.map_err(|e| format!("opus set_complexity: {e}"))?;
|
||||
@@ -133,14 +134,17 @@ impl TranscodeState {
|
||||
g722_enc,
|
||||
g722_dec,
|
||||
resamplers: HashMap::new(),
|
||||
resamplers_f32: HashMap::new(),
|
||||
denoiser_to_sip: DenoiseState::new(),
|
||||
denoiser_to_browser: DenoiseState::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// High-quality sample rate conversion using rubato FFT resampler.
|
||||
/// Resamplers are cached by (from_rate, to_rate, chunk_size) and reused,
|
||||
/// maintaining proper inter-frame state for continuous audio streams.
|
||||
///
|
||||
/// To maintain continuous filter state, the resampler always processes at a
|
||||
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||
pub fn resample(
|
||||
&mut self,
|
||||
pcm: &[i16],
|
||||
@@ -151,28 +155,56 @@ impl TranscodeState {
|
||||
return Ok(pcm.to_vec());
|
||||
}
|
||||
|
||||
let chunk = pcm.len();
|
||||
let key = (from_rate, to_rate, chunk);
|
||||
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||
let key = (from_rate, to_rate, canonical_chunk);
|
||||
|
||||
if !self.resamplers.contains_key(&key) {
|
||||
let r =
|
||||
FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, chunk, 1, 1)
|
||||
FftFixedIn::<f64>::new(from_rate as usize, to_rate as usize, canonical_chunk, 1, 1)
|
||||
.map_err(|e| format!("resampler {from_rate}->{to_rate}: {e}"))?;
|
||||
self.resamplers.insert(key, r);
|
||||
}
|
||||
let resampler = self.resamplers.get_mut(&key).unwrap();
|
||||
|
||||
let float_in: Vec<f64> = pcm.iter().map(|&s| s as f64 / 32768.0).collect();
|
||||
let input = vec![float_in];
|
||||
let mut output = Vec::with_capacity(
|
||||
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||
);
|
||||
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
||||
let mut offset = 0;
|
||||
while offset < pcm.len() {
|
||||
let remaining = pcm.len() - offset;
|
||||
let copy_len = remaining.min(canonical_chunk);
|
||||
let mut chunk = vec![0.0f64; canonical_chunk];
|
||||
for i in 0..copy_len {
|
||||
chunk[i] = pcm[offset + i] as f64 / 32768.0;
|
||||
}
|
||||
|
||||
Ok(result[0]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect())
|
||||
let input = vec![chunk];
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample {from_rate}->{to_rate}: {e}"))?;
|
||||
|
||||
if remaining < canonical_chunk {
|
||||
let expected =
|
||||
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||
let take = expected.min(result[0].len());
|
||||
output.extend(
|
||||
result[0][..take]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||
);
|
||||
} else {
|
||||
output.extend(
|
||||
result[0]
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16),
|
||||
);
|
||||
}
|
||||
|
||||
offset += canonical_chunk;
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Apply RNNoise ML noise suppression to 48kHz PCM audio.
|
||||
@@ -246,8 +278,7 @@ impl TranscodeState {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut pcm = vec![0i16; 5760]; // up to 120ms at 48kHz
|
||||
let packet =
|
||||
OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
|
||||
let packet = OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
|
||||
let out =
|
||||
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
|
||||
let n: usize = self
|
||||
@@ -293,6 +324,165 @@ impl TranscodeState {
|
||||
_ => Err(format!("unsupported target PT {pt}")),
|
||||
}
|
||||
}
|
||||
|
||||
// ---- f32 API for high-quality internal bus ----------------------------
|
||||
|
||||
/// Decode an encoded audio payload to f32 PCM samples in [-1.0, 1.0].
|
||||
/// Returns (samples, sample_rate).
|
||||
///
|
||||
/// For Opus, uses native float decode (no i16 quantization).
|
||||
/// For G.722/G.711, decodes to i16 then converts (codec is natively i16).
|
||||
pub fn decode_to_f32(&mut self, data: &[u8], pt: u8) -> Result<(Vec<f32>, u32), String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut pcm = vec![0.0f32; 5760]; // up to 120ms at 48kHz
|
||||
let packet = OpusPacket::try_from(data).map_err(|e| format!("opus packet: {e}"))?;
|
||||
let out =
|
||||
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus signals: {e}"))?;
|
||||
let n: usize = self
|
||||
.opus_dec
|
||||
.decode_float(Some(packet), out, false)
|
||||
.map_err(|e| format!("opus decode_float: {e}"))?
|
||||
.into();
|
||||
pcm.truncate(n);
|
||||
Ok((pcm, 48000))
|
||||
}
|
||||
_ => {
|
||||
// G.722, PCMU, PCMA: natively i16 codecs — decode then convert.
|
||||
let (pcm_i16, rate) = self.decode_to_pcm(data, pt)?;
|
||||
let pcm_f32 = pcm_i16.iter().map(|&s| s as f32 / 32768.0).collect();
|
||||
Ok((pcm_f32, rate))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Opus packet loss concealment — synthesize one frame to fill a gap.
|
||||
/// Returns f32 PCM at 48kHz. `frame_size` should be 960 for 20ms.
|
||||
pub fn opus_plc(&mut self, frame_size: usize) -> Result<Vec<f32>, String> {
|
||||
let mut pcm = vec![0.0f32; frame_size];
|
||||
let out =
|
||||
MutSignals::try_from(&mut pcm[..]).map_err(|e| format!("opus plc signals: {e}"))?;
|
||||
let n: usize = self
|
||||
.opus_dec
|
||||
.decode_float(None::<OpusPacket<'_>>, out, false)
|
||||
.map_err(|e| format!("opus plc: {e}"))?
|
||||
.into();
|
||||
pcm.truncate(n);
|
||||
Ok(pcm)
|
||||
}
|
||||
|
||||
/// Encode f32 PCM samples ([-1.0, 1.0]) to an audio codec.
|
||||
///
|
||||
/// For Opus, uses native float encode (no i16 quantization).
|
||||
/// For G.722/G.711, converts to i16 then encodes (codec is natively i16).
|
||||
pub fn encode_from_f32(&mut self, pcm: &[f32], pt: u8) -> Result<Vec<u8>, String> {
|
||||
match pt {
|
||||
PT_OPUS => {
|
||||
let mut buf = vec![0u8; 4000];
|
||||
let n: usize = self
|
||||
.opus_enc
|
||||
.encode_float(pcm, &mut buf)
|
||||
.map_err(|e| format!("opus encode_float: {e}"))?
|
||||
.into();
|
||||
buf.truncate(n);
|
||||
Ok(buf)
|
||||
}
|
||||
_ => {
|
||||
// G.722, PCMU, PCMA: natively i16 codecs.
|
||||
let pcm_i16: Vec<i16> = pcm
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect();
|
||||
self.encode_from_pcm(&pcm_i16, pt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// High-quality sample rate conversion for f32 PCM using rubato FFT resampler.
|
||||
///
|
||||
/// To maintain continuous filter state, the resampler always processes at a
|
||||
/// canonical chunk size (20ms at the source rate). This prevents cache
|
||||
/// thrashing from variable input sizes and preserves inter-frame filter state.
|
||||
pub fn resample_f32(
|
||||
&mut self,
|
||||
pcm: &[f32],
|
||||
from_rate: u32,
|
||||
to_rate: u32,
|
||||
) -> Result<Vec<f32>, String> {
|
||||
if from_rate == to_rate || pcm.is_empty() {
|
||||
return Ok(pcm.to_vec());
|
||||
}
|
||||
|
||||
let canonical_chunk = (from_rate as usize) / 50; // 20ms
|
||||
let key = (from_rate, to_rate, canonical_chunk);
|
||||
|
||||
if !self.resamplers_f32.contains_key(&key) {
|
||||
let r =
|
||||
FftFixedIn::<f32>::new(from_rate as usize, to_rate as usize, canonical_chunk, 1, 1)
|
||||
.map_err(|e| format!("resampler f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
self.resamplers_f32.insert(key, r);
|
||||
}
|
||||
let resampler = self.resamplers_f32.get_mut(&key).unwrap();
|
||||
|
||||
let mut output = Vec::with_capacity(
|
||||
(pcm.len() as f64 * to_rate as f64 / from_rate as f64).ceil() as usize + 16,
|
||||
);
|
||||
|
||||
let mut offset = 0;
|
||||
while offset < pcm.len() {
|
||||
let remaining = pcm.len() - offset;
|
||||
let mut chunk = vec![0.0f32; canonical_chunk];
|
||||
let copy_len = remaining.min(canonical_chunk);
|
||||
chunk[..copy_len].copy_from_slice(&pcm[offset..offset + copy_len]);
|
||||
|
||||
let input = vec![chunk];
|
||||
let result = resampler
|
||||
.process(&input, None)
|
||||
.map_err(|e| format!("resample f32 {from_rate}->{to_rate}: {e}"))?;
|
||||
|
||||
if remaining < canonical_chunk {
|
||||
let expected =
|
||||
(copy_len as f64 * to_rate as f64 / from_rate as f64).round() as usize;
|
||||
output.extend_from_slice(&result[0][..expected.min(result[0].len())]);
|
||||
} else {
|
||||
output.extend_from_slice(&result[0]);
|
||||
}
|
||||
|
||||
offset += canonical_chunk;
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Apply RNNoise ML noise suppression to 48kHz f32 PCM audio.
|
||||
/// Processes in 480-sample (10ms) frames. State persists across calls.
|
||||
/// Operates natively in f32 — no i16 conversion overhead.
|
||||
pub fn denoise_f32(denoiser: &mut DenoiseState, pcm: &[f32]) -> Vec<f32> {
|
||||
let frame_size = DenoiseState::FRAME_SIZE; // 480
|
||||
let total = pcm.len();
|
||||
let whole = (total / frame_size) * frame_size;
|
||||
let mut output = Vec::with_capacity(total);
|
||||
let mut out_buf = [0.0f32; 480];
|
||||
|
||||
// nnnoiseless expects f32 samples scaled as i16 range (-32768..32767).
|
||||
for offset in (0..whole).step_by(frame_size) {
|
||||
let input: Vec<f32> = pcm[offset..offset + frame_size]
|
||||
.iter()
|
||||
.map(|&s| s * 32768.0)
|
||||
.collect();
|
||||
denoiser.process_frame(&mut out_buf, &input);
|
||||
output.extend(out_buf.iter().map(|&s| s / 32768.0));
|
||||
}
|
||||
if whole < total {
|
||||
output.extend_from_slice(&pcm[whole..]);
|
||||
}
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new standalone denoiser for per-leg inbound processing.
|
||||
pub fn new_denoiser() -> Box<DenoiseState<'static>> {
|
||||
DenoiseState::new()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -305,8 +495,10 @@ mod tests {
|
||||
let encoded = mulaw_encode(sample);
|
||||
let decoded = mulaw_decode(encoded);
|
||||
// µ-law is lossy; verify the decoded value is close.
|
||||
assert!((sample as i32 - decoded as i32).abs() < 1000,
|
||||
"µ-law roundtrip failed for {sample}: got {decoded}");
|
||||
assert!(
|
||||
(sample as i32 - decoded as i32).abs() < 1000,
|
||||
"µ-law roundtrip failed for {sample}: got {decoded}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -315,8 +507,10 @@ mod tests {
|
||||
for sample in [-32768i16, -1000, -1, 0, 1, 1000, 32767] {
|
||||
let encoded = alaw_encode(sample);
|
||||
let decoded = alaw_decode(encoded);
|
||||
assert!((sample as i32 - decoded as i32).abs() < 1000,
|
||||
"A-law roundtrip failed for {sample}: got {decoded}");
|
||||
assert!(
|
||||
(sample as i32 - decoded as i32).abs() < 1000,
|
||||
"A-law roundtrip failed for {sample}: got {decoded}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -340,7 +534,9 @@ mod tests {
|
||||
fn pcmu_to_pcma_roundtrip() {
|
||||
let mut st = TranscodeState::new().unwrap();
|
||||
// 160 bytes = 20ms of PCMU at 8kHz
|
||||
let pcmu_data: Vec<u8> = (0..160).map(|i| mulaw_encode((i as i16 * 200) - 16000)).collect();
|
||||
let pcmu_data: Vec<u8> = (0..160)
|
||||
.map(|i| mulaw_encode((i as i16 * 200) - 16000))
|
||||
.collect();
|
||||
let pcma = st.transcode(&pcmu_data, PT_PCMU, PT_PCMA, None).unwrap();
|
||||
assert_eq!(pcma.len(), 160); // Same frame size
|
||||
let back = st.transcode(&pcma, PT_PCMA, PT_PCMU, None).unwrap();
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
[package]
|
||||
name = "opus-codec"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "opus-codec"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
codec-lib = { path = "../codec-lib" }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
base64 = "0.22"
|
||||
@@ -1,286 +0,0 @@
|
||||
/// Audio transcoding bridge for smartrust.
|
||||
///
|
||||
/// Thin CLI wrapper around `codec-lib`. Handles Opus ↔ G.722 ↔ PCMU transcoding.
|
||||
///
|
||||
/// Protocol:
|
||||
/// -> {"id":"1","method":"init","params":{}}
|
||||
/// <- {"id":"1","success":true,"result":{}}
|
||||
/// -> {"id":"2","method":"create_session","params":{"session_id":"call-abc"}}
|
||||
/// <- {"id":"2","success":true,"result":{}}
|
||||
/// -> {"id":"3","method":"transcode","params":{"session_id":"call-abc","data_b64":"...","from_pt":111,"to_pt":9}}
|
||||
/// <- {"id":"3","success":true,"result":{"data_b64":"..."}}
|
||||
/// -> {"id":"4","method":"destroy_session","params":{"session_id":"call-abc"}}
|
||||
/// <- {"id":"4","success":true,"result":{}}
|
||||
|
||||
use base64::engine::general_purpose::STANDARD as B64;
|
||||
use base64::Engine as _;
|
||||
use codec_lib::{codec_sample_rate, TranscodeState};
|
||||
use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::io::{self, BufRead, Write};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Request {
|
||||
id: String,
|
||||
method: String,
|
||||
#[serde(default)]
|
||||
params: serde_json::Value,
|
||||
}
|
||||
|
||||
fn respond(
|
||||
out: &mut impl Write,
|
||||
id: &str,
|
||||
success: bool,
|
||||
result: Option<serde_json::Value>,
|
||||
error: Option<&str>,
|
||||
) {
|
||||
let mut resp = serde_json::json!({ "id": id, "success": success });
|
||||
if let Some(r) = result {
|
||||
resp["result"] = r;
|
||||
}
|
||||
if let Some(e) = error {
|
||||
resp["error"] = serde_json::Value::String(e.to_string());
|
||||
}
|
||||
let _ = writeln!(out, "{}", resp);
|
||||
let _ = out.flush();
|
||||
}
|
||||
|
||||
/// Resolve a session: if session_id is provided, look it up in the sessions map;
|
||||
/// otherwise fall back to the default state (backward compat with `init`).
|
||||
fn get_session<'a>(
|
||||
sessions: &'a mut HashMap<String, TranscodeState>,
|
||||
default: &'a mut Option<TranscodeState>,
|
||||
params: &serde_json::Value,
|
||||
) -> Option<&'a mut TranscodeState> {
|
||||
if let Some(sid) = params.get("session_id").and_then(|v| v.as_str()) {
|
||||
sessions.get_mut(sid)
|
||||
} else {
|
||||
default.as_mut()
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let stdin = io::stdin();
|
||||
let stdout = io::stdout();
|
||||
let mut out = io::BufWriter::new(stdout.lock());
|
||||
|
||||
let _ = writeln!(out, r#"{{"event":"ready","data":{{}}}}"#);
|
||||
let _ = out.flush();
|
||||
|
||||
let mut default_state: Option<TranscodeState> = None;
|
||||
let mut sessions: HashMap<String, TranscodeState> = HashMap::new();
|
||||
|
||||
for line in stdin.lock().lines() {
|
||||
let line = match line {
|
||||
Ok(l) if !l.trim().is_empty() => l,
|
||||
Ok(_) => continue,
|
||||
Err(_) => break,
|
||||
};
|
||||
|
||||
let req: Request = match serde_json::from_str(&line) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
respond(&mut out, "", false, None, Some(&format!("parse: {e}")));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match req.method.as_str() {
|
||||
"init" => match TranscodeState::new() {
|
||||
Ok(s) => {
|
||||
default_state = Some(s);
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
}
|
||||
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
|
||||
},
|
||||
|
||||
"create_session" => {
|
||||
let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
respond(&mut out, &req.id, false, None, Some("missing session_id"));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if sessions.contains_key(&session_id) {
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
continue;
|
||||
}
|
||||
match TranscodeState::new() {
|
||||
Ok(s) => {
|
||||
sessions.insert(session_id, s);
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
}
|
||||
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
|
||||
}
|
||||
}
|
||||
|
||||
"destroy_session" => {
|
||||
let session_id = match req.params.get("session_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(&mut out, &req.id, false, None, Some("missing session_id"));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
sessions.remove(session_id);
|
||||
respond(&mut out, &req.id, true, Some(serde_json::json!({})), None);
|
||||
}
|
||||
|
||||
"transcode" => {
|
||||
let st = match get_session(&mut sessions, &mut default_state, &req.params) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some("not initialized (no session or default state)"),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(&mut out, &req.id, false, None, Some("missing data_b64"));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let from_pt =
|
||||
req.params.get("from_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
|
||||
let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(0) as u8;
|
||||
let direction = req.params.get("direction").and_then(|v| v.as_str());
|
||||
|
||||
let data = match B64.decode(data_b64) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some(&format!("b64: {e}")),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match st.transcode(&data, from_pt, to_pt, direction) {
|
||||
Ok(result) => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
true,
|
||||
Some(serde_json::json!({ "data_b64": B64.encode(&result) })),
|
||||
None,
|
||||
);
|
||||
}
|
||||
Err(e) => respond(&mut out, &req.id, false, None, Some(&e)),
|
||||
}
|
||||
}
|
||||
|
||||
"encode_pcm" => {
|
||||
let st = match get_session(&mut sessions, &mut default_state, &req.params) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some("not initialized (no session or default state)"),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let data_b64 = match req.params.get("data_b64").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
respond(&mut out, &req.id, false, None, Some("missing data_b64"));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let sample_rate = req
|
||||
.params
|
||||
.get("sample_rate")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(22050) as u32;
|
||||
let to_pt = req.params.get("to_pt").and_then(|v| v.as_u64()).unwrap_or(9) as u8;
|
||||
|
||||
let data = match B64.decode(data_b64) {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some(&format!("b64: {e}")),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if data.len() % 2 != 0 {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some("PCM data has odd byte count (expected 16-bit LE samples)"),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let pcm: Vec<i16> = data
|
||||
.chunks_exact(2)
|
||||
.map(|c| i16::from_le_bytes([c[0], c[1]]))
|
||||
.collect();
|
||||
|
||||
let target_rate = codec_sample_rate(to_pt);
|
||||
let resampled = match st.resample(&pcm, sample_rate, target_rate) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
respond(&mut out, &req.id, false, None, Some(&e));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match st.encode_from_pcm(&resampled, to_pt) {
|
||||
Ok(encoded) => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
true,
|
||||
Some(serde_json::json!({ "data_b64": B64.encode(&encoded) })),
|
||||
None,
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
respond(&mut out, &req.id, false, None, Some(&e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
"encode" | "decode" => {
|
||||
respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some("use 'transcode' command instead"),
|
||||
);
|
||||
}
|
||||
|
||||
_ => respond(
|
||||
&mut out,
|
||||
&req.id,
|
||||
false,
|
||||
None,
|
||||
Some(&format!("unknown: {}", req.method)),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ path = "src/main.rs"
|
||||
[dependencies]
|
||||
codec-lib = { path = "../codec-lib" }
|
||||
sip-proto = { path = "../sip-proto" }
|
||||
nnnoiseless = "0.5"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
@@ -18,3 +19,10 @@ regex-lite = "0.1"
|
||||
webrtc = "0.8"
|
||||
rand = "0.8"
|
||||
hound = "3.5"
|
||||
spandsp = "0.1.5"
|
||||
udptl = "0.1.0"
|
||||
kokoro-tts = { version = "0.3", default-features = false, features = ["use-cmudict"] }
|
||||
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
|
||||
"std", "download-binaries", "copy-dylibs", "ndarray",
|
||||
"tls-native-vendored"
|
||||
] }
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//! Audio player — reads a WAV file and streams it as RTP packets.
|
||||
//! Also provides prompt preparation for the leg interaction system.
|
||||
|
||||
use crate::rtp::{build_rtp_header, rtp_clock_increment};
|
||||
use codec_lib::{codec_sample_rate, TranscodeState};
|
||||
@@ -8,6 +9,11 @@ use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::time::{self, Duration};
|
||||
|
||||
/// Mixing sample rate used by the mixer (must stay in sync with mixer::MIX_RATE).
|
||||
const MIX_RATE: u32 = 48000;
|
||||
/// Samples per 20ms frame at the mixing rate.
|
||||
const MIX_FRAME_SIZE: usize = 960;
|
||||
|
||||
/// Play a WAV file as RTP to a destination.
|
||||
/// Returns when playback is complete.
|
||||
pub async fn play_wav_file(
|
||||
@@ -30,10 +36,7 @@ pub async fn play_wav_file(
|
||||
|
||||
// Read all samples as i16.
|
||||
let samples: Vec<i16> = if spec.bits_per_sample == 16 {
|
||||
reader
|
||||
.samples::<i16>()
|
||||
.filter_map(|s| s.ok())
|
||||
.collect()
|
||||
reader.samples::<i16>().filter_map(|s| s.ok()).collect()
|
||||
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
|
||||
reader
|
||||
.samples::<f32>()
|
||||
@@ -171,3 +174,70 @@ pub async fn play_beep(
|
||||
|
||||
Ok((seq, ts))
|
||||
}
|
||||
|
||||
/// Load a WAV file and split it into 20ms f32 PCM frames at 48kHz.
|
||||
/// Used by the leg interaction system to prepare prompt audio for the mixer.
|
||||
pub fn load_prompt_pcm_frames(wav_path: &str) -> Result<Vec<Vec<f32>>, String> {
|
||||
let path = Path::new(wav_path);
|
||||
if !path.exists() {
|
||||
return Err(format!("WAV file not found: {wav_path}"));
|
||||
}
|
||||
|
||||
let mut reader =
|
||||
hound::WavReader::open(path).map_err(|e| format!("open WAV {wav_path}: {e}"))?;
|
||||
let spec = reader.spec();
|
||||
let wav_rate = spec.sample_rate;
|
||||
|
||||
// Read all samples as f32 in [-1.0, 1.0].
|
||||
let samples: Vec<f32> = if spec.bits_per_sample == 16 {
|
||||
reader
|
||||
.samples::<i16>()
|
||||
.filter_map(|s| s.ok())
|
||||
.map(|s| s as f32 / 32768.0)
|
||||
.collect()
|
||||
} else if spec.bits_per_sample == 32 && spec.sample_format == hound::SampleFormat::Float {
|
||||
reader.samples::<f32>().filter_map(|s| s.ok()).collect()
|
||||
} else {
|
||||
return Err(format!(
|
||||
"unsupported WAV format: {}bit {:?}",
|
||||
spec.bits_per_sample, spec.sample_format
|
||||
));
|
||||
};
|
||||
|
||||
if samples.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
pcm_to_mix_frames(&samples, wav_rate)
|
||||
}
|
||||
|
||||
/// Convert PCM samples at an arbitrary rate into 48kHz 20ms mixer frames.
|
||||
pub fn pcm_to_mix_frames(samples: &[f32], sample_rate: u32) -> Result<Vec<Vec<f32>>, String> {
|
||||
if samples.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Resample to MIX_RATE (48kHz) if needed.
|
||||
let resampled = if sample_rate != MIX_RATE {
|
||||
let mut transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
||||
transcoder
|
||||
.resample_f32(samples, sample_rate, MIX_RATE)
|
||||
.map_err(|e| format!("resample: {e}"))?
|
||||
} else {
|
||||
samples.to_vec()
|
||||
};
|
||||
|
||||
// Split into MIX_FRAME_SIZE (960) sample frames.
|
||||
let mut frames = Vec::new();
|
||||
let mut offset = 0;
|
||||
while offset < resampled.len() {
|
||||
let end = (offset + MIX_FRAME_SIZE).min(resampled.len());
|
||||
let mut frame = resampled[offset..end].to_vec();
|
||||
// Pad short final frame with silence.
|
||||
frame.resize(MIX_FRAME_SIZE, 0.0);
|
||||
frames.push(frame);
|
||||
offset += MIX_FRAME_SIZE;
|
||||
}
|
||||
|
||||
Ok(frames)
|
||||
}
|
||||
|
||||
@@ -1,12 +1,20 @@
|
||||
//! Call hub — owns legs and bridges media.
|
||||
//! Call hub — owns N legs and a mixer task.
|
||||
//!
|
||||
//! Each Call has a unique ID and tracks its state, direction, and associated
|
||||
//! SIP Call-IDs for message routing.
|
||||
//! Every call has a central mixer that provides mix-minus audio to all
|
||||
//! participants. Legs can be added and removed dynamically mid-call.
|
||||
|
||||
use crate::mixer::{MixerCommand, RtpPacket};
|
||||
use crate::sip_leg::SipLeg;
|
||||
use sip_proto::message::SipMessage;
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::{mpsc, watch};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
pub type LegId = String;
|
||||
|
||||
/// Call state machine.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
@@ -16,11 +24,14 @@ pub enum CallState {
|
||||
Connected,
|
||||
Voicemail,
|
||||
Ivr,
|
||||
Terminating,
|
||||
Terminated,
|
||||
}
|
||||
|
||||
impl CallState {
|
||||
/// Wire-format string for events/dashboards. Not currently emitted —
|
||||
/// call state changes flow as typed events (`call_answered`, etc.) —
|
||||
/// but kept for future status-snapshot work.
|
||||
#[allow(dead_code)]
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::SettingUp => "setting-up",
|
||||
@@ -28,7 +39,6 @@ impl CallState {
|
||||
Self::Connected => "connected",
|
||||
Self::Voicemail => "voicemail",
|
||||
Self::Ivr => "ivr",
|
||||
Self::Terminating => "terminating",
|
||||
Self::Terminated => "terminated",
|
||||
}
|
||||
}
|
||||
@@ -41,6 +51,8 @@ pub enum CallDirection {
|
||||
}
|
||||
|
||||
impl CallDirection {
|
||||
/// Wire-format string. See CallState::as_str.
|
||||
#[allow(dead_code)]
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Inbound => "inbound",
|
||||
@@ -49,55 +61,236 @@ impl CallDirection {
|
||||
}
|
||||
}
|
||||
|
||||
/// A passthrough call — both sides share the same SIP Call-ID.
|
||||
/// The proxy rewrites SDP/Contact/Request-URI and relays RTP.
|
||||
pub struct PassthroughCall {
|
||||
/// The type of a call leg.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum LegKind {
|
||||
SipProvider,
|
||||
SipDevice,
|
||||
WebRtc,
|
||||
/// Voicemail playback, IVR prompt playback, recording — not yet wired up
|
||||
/// as a distinct leg kind (those paths currently use the mixer's role
|
||||
/// system instead). Kept behind allow so adding a real media leg later
|
||||
/// doesn't require re-introducing the variant.
|
||||
#[allow(dead_code)]
|
||||
Media,
|
||||
Tool, // observer leg for recording, transcription, etc.
|
||||
}
|
||||
|
||||
impl LegKind {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::SipProvider => "sip-provider",
|
||||
Self::SipDevice => "sip-device",
|
||||
Self::WebRtc => "webrtc",
|
||||
Self::Media => "media",
|
||||
Self::Tool => "tool",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-leg state.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum LegState {
|
||||
Inviting,
|
||||
Ringing,
|
||||
Connected,
|
||||
Terminated,
|
||||
}
|
||||
|
||||
impl LegState {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Inviting => "inviting",
|
||||
Self::Ringing => "ringing",
|
||||
Self::Connected => "connected",
|
||||
Self::Terminated => "terminated",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a single leg in a call.
|
||||
pub struct LegInfo {
|
||||
pub id: LegId,
|
||||
pub kind: LegKind,
|
||||
pub state: LegState,
|
||||
pub codec_pt: u8,
|
||||
/// Media transport currently negotiated for this leg.
|
||||
///
|
||||
/// `rtp` covers classic SIP audio media, `t38-udptl` covers T.38 fax,
|
||||
/// `webrtc` is used for browser legs, and `internal` for proxy-local media/tool paths.
|
||||
pub media_protocol: &'static str,
|
||||
/// Whether this leg is currently wired into an active media bridge.
|
||||
pub media_io_active: bool,
|
||||
|
||||
/// For SIP legs: the SIP dialog manager (handles 407 auth, BYE, etc).
|
||||
pub sip_leg: Option<SipLeg>,
|
||||
/// For SIP legs: the SIP Call-ID for message routing.
|
||||
pub sip_call_id: Option<String>,
|
||||
/// For WebRTC legs: the session ID in WebRtcEngine.
|
||||
///
|
||||
/// Populated at leg creation but not yet consumed by the hub —
|
||||
/// WebRTC session lookup currently goes through the session registry
|
||||
/// directly. Kept for introspection/debugging.
|
||||
#[allow(dead_code)]
|
||||
pub webrtc_session_id: Option<String>,
|
||||
/// The RTP socket allocated for this leg.
|
||||
pub rtp_socket: Option<Arc<UdpSocket>>,
|
||||
/// The RTP port number.
|
||||
pub rtp_port: u16,
|
||||
/// Public IP to advertise in SDP/Record-Route when THIS leg is the
|
||||
/// destination of a rewrite. Populated only for provider legs; `None`
|
||||
/// for LAN SIP devices, WebRTC browsers, media, and tool legs (which
|
||||
/// are reachable via `lan_ip`). See `route_passthrough_message` for
|
||||
/// the per-destination advertise-IP logic.
|
||||
pub public_ip: Option<String>,
|
||||
/// The remote media endpoint (learned from SDP or address learning).
|
||||
pub remote_media: Option<SocketAddr>,
|
||||
/// SIP signaling address (provider or device).
|
||||
pub signaling_addr: Option<SocketAddr>,
|
||||
|
||||
/// Flexible key-value metadata (consent state, tool config, etc.).
|
||||
/// Persisted into call history on call end.
|
||||
pub metadata: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct PendingDialogBridge {
|
||||
pub source_leg_id: LegId,
|
||||
pub target_leg_id: LegId,
|
||||
pub source_request: SipMessage,
|
||||
pub target_request: SipMessage,
|
||||
pub method: String,
|
||||
}
|
||||
|
||||
/// A multiparty call with N legs and a central mixer.
|
||||
pub struct Call {
|
||||
// Duplicated from the HashMap key in CallManager. Kept for future
|
||||
// status-snapshot work.
|
||||
#[allow(dead_code)]
|
||||
pub id: String,
|
||||
pub sip_call_id: String,
|
||||
pub state: CallState,
|
||||
// Populated at call creation but not currently consumed — dashboard
|
||||
// pull snapshots are gone (push events only).
|
||||
#[allow(dead_code)]
|
||||
pub direction: CallDirection,
|
||||
pub created_at: Instant,
|
||||
|
||||
// Call metadata.
|
||||
// Metadata.
|
||||
pub caller_number: Option<String>,
|
||||
pub callee_number: Option<String>,
|
||||
#[allow(dead_code)]
|
||||
pub provider_id: String,
|
||||
|
||||
// Provider side.
|
||||
pub provider_addr: SocketAddr,
|
||||
pub provider_media: Option<SocketAddr>,
|
||||
/// Original INVITE from the device (for device-originated outbound calls).
|
||||
/// Used to construct proper 180/200/error responses back to the device.
|
||||
pub device_invite: Option<SipMessage>,
|
||||
|
||||
// Device side.
|
||||
pub device_addr: SocketAddr,
|
||||
pub device_media: Option<SocketAddr>,
|
||||
/// Pending in-dialog B2BUA transaction bridged across two different SIP dialogs.
|
||||
pub pending_dialog_bridge: Option<PendingDialogBridge>,
|
||||
|
||||
// RTP relay.
|
||||
pub rtp_port: u16,
|
||||
pub rtp_socket: Arc<UdpSocket>,
|
||||
/// All legs in this call, keyed by leg ID.
|
||||
pub legs: HashMap<LegId, LegInfo>,
|
||||
|
||||
// Packet counters.
|
||||
pub pkt_from_device: u64,
|
||||
pub pkt_from_provider: u64,
|
||||
/// Channel to send commands to the mixer task.
|
||||
pub mixer_cmd_tx: mpsc::Sender<MixerCommand>,
|
||||
|
||||
/// Active passthrough media bridge mode, if any.
|
||||
pub media_bridge_mode: Option<String>,
|
||||
|
||||
/// Cancellation handles for non-mixer passthrough media tasks.
|
||||
media_bridge_cancel_txs: Vec<watch::Sender<bool>>,
|
||||
|
||||
/// Handle to the mixer task (aborted on call teardown).
|
||||
mixer_task: Option<JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl PassthroughCall {
|
||||
impl Call {
|
||||
pub fn new(
|
||||
id: String,
|
||||
direction: CallDirection,
|
||||
provider_id: String,
|
||||
mixer_cmd_tx: mpsc::Sender<MixerCommand>,
|
||||
mixer_task: JoinHandle<()>,
|
||||
) -> Self {
|
||||
Self {
|
||||
id,
|
||||
state: CallState::SettingUp,
|
||||
direction,
|
||||
created_at: Instant::now(),
|
||||
caller_number: None,
|
||||
callee_number: None,
|
||||
provider_id,
|
||||
device_invite: None,
|
||||
pending_dialog_bridge: None,
|
||||
legs: HashMap::new(),
|
||||
mixer_cmd_tx,
|
||||
media_bridge_mode: None,
|
||||
media_bridge_cancel_txs: Vec::new(),
|
||||
mixer_task: Some(mixer_task),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a leg to the mixer. Sends the AddLeg command with channel endpoints.
|
||||
pub async fn add_leg_to_mixer(
|
||||
&self,
|
||||
leg_id: &str,
|
||||
codec_pt: u8,
|
||||
inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
) {
|
||||
let _ = self
|
||||
.mixer_cmd_tx
|
||||
.send(MixerCommand::AddLeg {
|
||||
leg_id: leg_id.to_string(),
|
||||
codec_pt,
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Remove a leg from the mixer.
|
||||
pub async fn remove_leg_from_mixer(&self, leg_id: &str) {
|
||||
let _ = self
|
||||
.mixer_cmd_tx
|
||||
.send(MixerCommand::RemoveLeg {
|
||||
leg_id: leg_id.to_string(),
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
pub fn duration_secs(&self) -> u64 {
|
||||
self.created_at.elapsed().as_secs()
|
||||
}
|
||||
|
||||
pub fn to_status_json(&self) -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"id": self.id,
|
||||
"state": self.state.as_str(),
|
||||
"direction": self.direction.as_str(),
|
||||
"callerNumber": self.caller_number,
|
||||
"calleeNumber": self.callee_number,
|
||||
"providerUsed": self.provider_id,
|
||||
"createdAt": self.created_at.elapsed().as_millis(),
|
||||
"duration": self.duration_secs(),
|
||||
"rtpPort": self.rtp_port,
|
||||
"pktFromDevice": self.pkt_from_device,
|
||||
"pktFromProvider": self.pkt_from_provider,
|
||||
})
|
||||
pub fn clear_media_bridge(&mut self) {
|
||||
for cancel_tx in self.media_bridge_cancel_txs.drain(..) {
|
||||
let _ = cancel_tx.send(true);
|
||||
}
|
||||
self.media_bridge_mode = None;
|
||||
}
|
||||
|
||||
pub fn install_media_bridge(
|
||||
&mut self,
|
||||
mode: &str,
|
||||
cancel_txs: Vec<watch::Sender<bool>>,
|
||||
) {
|
||||
self.clear_media_bridge();
|
||||
self.media_bridge_mode = Some(mode.to_string());
|
||||
self.media_bridge_cancel_txs = cancel_txs;
|
||||
}
|
||||
|
||||
pub fn note_mixer_bridge(&mut self, mode: &str) {
|
||||
self.clear_media_bridge();
|
||||
self.media_bridge_mode = Some(mode.to_string());
|
||||
}
|
||||
|
||||
/// Shut down the mixer and abort its task.
|
||||
pub async fn shutdown_mixer(&mut self) {
|
||||
self.clear_media_bridge();
|
||||
let _ = self.mixer_cmd_tx.send(MixerCommand::Shutdown).await;
|
||||
if let Some(handle) = self.mixer_task.take() {
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,7 @@
|
||||
//! proxy engine via the `configure` command. These types mirror the TS interfaces.
|
||||
|
||||
use serde::Deserialize;
|
||||
use sip_proto::message::SipMessage;
|
||||
use std::net::SocketAddr;
|
||||
|
||||
/// Network endpoint.
|
||||
@@ -30,6 +31,11 @@ impl Endpoint {
|
||||
}
|
||||
|
||||
/// Provider quirks for codec/protocol workarounds.
|
||||
//
|
||||
// Deserialized from provider config for TS parity. Early-media silence
|
||||
// injection and related workarounds are not yet ported to the Rust engine,
|
||||
// so every field is populated by serde but not yet consumed.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Quirks {
|
||||
#[serde(rename = "earlyMediaSilence")]
|
||||
@@ -44,6 +50,9 @@ pub struct Quirks {
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ProviderConfig {
|
||||
pub id: String,
|
||||
// UI label — populated by serde for parity with the TS config, not
|
||||
// consumed at runtime.
|
||||
#[allow(dead_code)]
|
||||
#[serde(rename = "displayName")]
|
||||
pub display_name: String,
|
||||
pub domain: String,
|
||||
@@ -54,6 +63,8 @@ pub struct ProviderConfig {
|
||||
#[serde(rename = "registerIntervalSec")]
|
||||
pub register_interval_sec: u32,
|
||||
pub codecs: Vec<u8>,
|
||||
// Workaround knobs populated by serde but not yet acted upon — see Quirks.
|
||||
#[allow(dead_code)]
|
||||
pub quirks: Quirks,
|
||||
}
|
||||
|
||||
@@ -84,12 +95,18 @@ pub struct RouteMatch {
|
||||
|
||||
/// Route action.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
// Several fields (voicemail_box, ivr_menu_id, no_answer_timeout) are read
|
||||
// by resolve_inbound_route but not yet honored downstream — see the
|
||||
// multi-target TODO in CallManager::create_inbound_call.
|
||||
#[allow(dead_code)]
|
||||
pub struct RouteAction {
|
||||
pub targets: Option<Vec<String>>,
|
||||
#[serde(rename = "ringBrowsers")]
|
||||
pub ring_browsers: Option<bool>,
|
||||
#[serde(rename = "voicemailBox")]
|
||||
pub voicemail_box: Option<String>,
|
||||
#[serde(rename = "faxBox")]
|
||||
pub fax_box: Option<String>,
|
||||
#[serde(rename = "ivrMenuId")]
|
||||
pub ivr_menu_id: Option<String>,
|
||||
#[serde(rename = "noAnswerTimeout")]
|
||||
@@ -106,7 +123,11 @@ pub struct RouteAction {
|
||||
/// A routing rule.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct Route {
|
||||
// `id` and `name` are UI identifiers, populated by serde but not
|
||||
// consumed by the resolvers.
|
||||
#[allow(dead_code)]
|
||||
pub id: String,
|
||||
#[allow(dead_code)]
|
||||
pub name: String,
|
||||
pub priority: i32,
|
||||
pub enabled: bool,
|
||||
@@ -141,6 +162,12 @@ pub struct AppConfig {
|
||||
pub providers: Vec<ProviderConfig>,
|
||||
pub devices: Vec<DeviceConfig>,
|
||||
pub routing: RoutingConfig,
|
||||
#[serde(default)]
|
||||
pub faxboxes: Vec<FaxBoxConfig>,
|
||||
#[serde(default)]
|
||||
pub voiceboxes: Vec<VoiceboxConfig>,
|
||||
#[serde(default)]
|
||||
pub ivr: Option<IvrConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
@@ -148,12 +175,200 @@ pub struct RoutingConfig {
|
||||
pub routes: Vec<Route>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Voicebox config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct VoiceboxConfig {
|
||||
pub id: String,
|
||||
#[serde(default)]
|
||||
pub enabled: bool,
|
||||
#[serde(rename = "greetingText")]
|
||||
pub greeting_text: Option<String>,
|
||||
#[serde(rename = "greetingVoice")]
|
||||
pub greeting_voice: Option<String>,
|
||||
#[serde(rename = "greetingWavPath")]
|
||||
pub greeting_wav_path: Option<String>,
|
||||
#[serde(rename = "maxRecordingSec")]
|
||||
pub max_recording_sec: Option<u32>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct FaxBoxConfig {
|
||||
pub id: String,
|
||||
#[serde(default)]
|
||||
pub enabled: bool,
|
||||
#[serde(rename = "maxMessages")]
|
||||
pub max_messages: Option<u32>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// IVR config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct IvrConfig {
|
||||
pub enabled: bool,
|
||||
pub menus: Vec<IvrMenuConfig>,
|
||||
#[serde(rename = "entryMenuId")]
|
||||
pub entry_menu_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct IvrMenuConfig {
|
||||
pub id: String,
|
||||
#[serde(rename = "promptText")]
|
||||
pub prompt_text: String,
|
||||
#[serde(rename = "promptVoice")]
|
||||
pub prompt_voice: Option<String>,
|
||||
pub entries: Vec<IvrMenuEntry>,
|
||||
#[serde(rename = "timeoutSec")]
|
||||
pub timeout_sec: Option<u32>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct IvrMenuEntry {
|
||||
pub digit: String,
|
||||
pub action: String,
|
||||
pub target: Option<String>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pattern matching (ported from ts/config.ts)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Extract the URI user part and normalize phone-like identities for routing.
|
||||
///
|
||||
/// This keeps inbound route matching stable across provider-specific URI shapes,
|
||||
/// e.g. `sip:+49 421 219694@trunk.example` and `sip:0049421219694@trunk.example`
|
||||
/// both normalize to `+49421219694`.
|
||||
pub fn normalize_routing_identity(value: &str) -> String {
|
||||
let extracted = SipMessage::extract_uri_user(value).unwrap_or(value).trim();
|
||||
if extracted.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let mut digits = String::new();
|
||||
let mut saw_plus = false;
|
||||
|
||||
for (idx, ch) in extracted.chars().enumerate() {
|
||||
if ch.is_ascii_digit() {
|
||||
digits.push(ch);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ch == '+' && idx == 0 {
|
||||
saw_plus = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if matches!(ch, ' ' | '\t' | '-' | '.' | '/' | '(' | ')') {
|
||||
continue;
|
||||
}
|
||||
|
||||
return extracted.to_string();
|
||||
}
|
||||
|
||||
if digits.is_empty() {
|
||||
return extracted.to_string();
|
||||
}
|
||||
if saw_plus {
|
||||
return format!("+{digits}");
|
||||
}
|
||||
if digits.starts_with("00") && digits.len() > 2 {
|
||||
return format!("+{}", &digits[2..]);
|
||||
}
|
||||
|
||||
digits
|
||||
}
|
||||
|
||||
fn looks_like_phone_identity(value: &str) -> bool {
|
||||
let digits = value.chars().filter(|c| c.is_ascii_digit()).count();
|
||||
digits >= 6 && value.chars().all(|c| c.is_ascii_digit() || c == '+')
|
||||
}
|
||||
|
||||
/// Pick the best inbound called-number identity from common SIP headers.
|
||||
///
|
||||
/// Some providers deliver the DID in `To` / `P-Called-Party-ID` while the
|
||||
/// request URI contains an account username. Prefer a phone-like identity when
|
||||
/// present; otherwise fall back to the request URI user part.
|
||||
pub fn extract_inbound_called_number(msg: &SipMessage) -> String {
|
||||
let request_uri = normalize_routing_identity(msg.request_uri().unwrap_or(""));
|
||||
if looks_like_phone_identity(&request_uri) {
|
||||
return request_uri;
|
||||
}
|
||||
|
||||
for header_name in [
|
||||
"P-Called-Party-ID",
|
||||
"X-Called-Party-ID",
|
||||
"Diversion",
|
||||
"History-Info",
|
||||
"To",
|
||||
] {
|
||||
let candidate = normalize_routing_identity(msg.get_header(header_name).unwrap_or(""));
|
||||
if looks_like_phone_identity(&candidate) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
request_uri
|
||||
}
|
||||
|
||||
fn parse_numeric_range_value(value: &str) -> Option<(bool, &str)> {
|
||||
let trimmed = value.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (has_plus, digits) = if let Some(rest) = trimmed.strip_prefix('+') {
|
||||
(true, rest)
|
||||
} else {
|
||||
(false, trimmed)
|
||||
};
|
||||
|
||||
if digits.is_empty() || !digits.chars().all(|c| c.is_ascii_digit()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((has_plus, digits))
|
||||
}
|
||||
|
||||
fn matches_numeric_range_pattern(pattern: &str, value: &str) -> bool {
|
||||
let Some((start, end)) = pattern.split_once("..") else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let Some((start_plus, start_digits)) = parse_numeric_range_value(start) else {
|
||||
return false;
|
||||
};
|
||||
let Some((end_plus, end_digits)) = parse_numeric_range_value(end) else {
|
||||
return false;
|
||||
};
|
||||
let Some((value_plus, value_digits)) = parse_numeric_range_value(value) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
if start_plus != end_plus || value_plus != start_plus {
|
||||
return false;
|
||||
}
|
||||
if start_digits.len() != end_digits.len() || value_digits.len() != start_digits.len() {
|
||||
return false;
|
||||
}
|
||||
if start_digits > end_digits {
|
||||
return false;
|
||||
}
|
||||
|
||||
value_digits >= start_digits && value_digits <= end_digits
|
||||
}
|
||||
|
||||
/// Test a value against a pattern string.
|
||||
/// - None/empty: matches everything (wildcard)
|
||||
/// - `start..end`: numeric range match
|
||||
/// - Trailing '*': prefix match
|
||||
/// - Starts with '/': regex match
|
||||
/// - Otherwise: exact match
|
||||
@@ -169,6 +384,10 @@ pub fn matches_pattern(pattern: Option<&str>, value: &str) -> bool {
|
||||
return value.starts_with(&pattern[..pattern.len() - 1]);
|
||||
}
|
||||
|
||||
if matches_numeric_range_pattern(pattern, value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Regex match: "/^\\+49/" or "/pattern/i"
|
||||
if pattern.starts_with('/') {
|
||||
if let Some(last_slash) = pattern[1..].rfind('/') {
|
||||
@@ -192,14 +411,25 @@ pub fn matches_pattern(pattern: Option<&str>, value: &str) -> bool {
|
||||
/// Result of resolving an outbound route.
|
||||
pub struct OutboundRouteResult {
|
||||
pub provider: ProviderConfig,
|
||||
// TODO: prefix rewriting is unfinished — this is computed but the
|
||||
// caller ignores it and uses the raw dialed number.
|
||||
#[allow(dead_code)]
|
||||
pub transformed_number: String,
|
||||
}
|
||||
|
||||
/// Result of resolving an inbound route.
|
||||
//
|
||||
// `device_ids`, `ring_all_devices`, and `ring_browsers` are consumed by
|
||||
// create_inbound_call.
|
||||
// The remaining fields (voicemail_box, ivr_menu_id, no_answer_timeout)
|
||||
// are resolved but not yet acted upon — see the multi-target TODO.
|
||||
#[allow(dead_code)]
|
||||
pub struct InboundRouteResult {
|
||||
pub device_ids: Vec<String>,
|
||||
pub ring_all_devices: bool,
|
||||
pub ring_browsers: bool,
|
||||
pub voicemail_box: Option<String>,
|
||||
pub fax_box: Option<String>,
|
||||
pub ivr_menu_id: Option<String>,
|
||||
pub no_answer_timeout: Option<u32>,
|
||||
}
|
||||
@@ -280,7 +510,7 @@ impl AppConfig {
|
||||
provider_id: &str,
|
||||
called_number: &str,
|
||||
caller_number: &str,
|
||||
) -> InboundRouteResult {
|
||||
) -> Option<InboundRouteResult> {
|
||||
let mut routes: Vec<&Route> = self
|
||||
.routing
|
||||
.routes
|
||||
@@ -304,22 +534,190 @@ impl AppConfig {
|
||||
continue;
|
||||
}
|
||||
|
||||
return InboundRouteResult {
|
||||
device_ids: route.action.targets.clone().unwrap_or_default(),
|
||||
let explicit_targets = route.action.targets.clone();
|
||||
return Some(InboundRouteResult {
|
||||
device_ids: explicit_targets.clone().unwrap_or_default(),
|
||||
ring_all_devices: explicit_targets.is_none(),
|
||||
ring_browsers: route.action.ring_browsers.unwrap_or(false),
|
||||
voicemail_box: route.action.voicemail_box.clone(),
|
||||
fax_box: route.action.fax_box.clone(),
|
||||
ivr_menu_id: route.action.ivr_menu_id.clone(),
|
||||
no_answer_timeout: route.action.no_answer_timeout,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Fallback: ring all devices + browsers.
|
||||
InboundRouteResult {
|
||||
device_ids: vec![],
|
||||
ring_browsers: true,
|
||||
voicemail_box: None,
|
||||
ivr_menu_id: None,
|
||||
no_answer_timeout: None,
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn test_app_config(routes: Vec<Route>) -> AppConfig {
|
||||
AppConfig {
|
||||
proxy: ProxyConfig {
|
||||
lan_ip: "127.0.0.1".to_string(),
|
||||
lan_port: 5070,
|
||||
public_ip_seed: None,
|
||||
rtp_port_range: RtpPortRange {
|
||||
min: 20_000,
|
||||
max: 20_100,
|
||||
},
|
||||
},
|
||||
providers: vec![ProviderConfig {
|
||||
id: "provider-a".to_string(),
|
||||
display_name: "Provider A".to_string(),
|
||||
domain: "example.com".to_string(),
|
||||
outbound_proxy: Endpoint {
|
||||
address: "example.com".to_string(),
|
||||
port: 5060,
|
||||
},
|
||||
username: "user".to_string(),
|
||||
password: "pass".to_string(),
|
||||
register_interval_sec: 300,
|
||||
codecs: vec![9],
|
||||
quirks: Quirks {
|
||||
early_media_silence: false,
|
||||
silence_payload_type: None,
|
||||
silence_max_packets: None,
|
||||
},
|
||||
}],
|
||||
devices: vec![DeviceConfig {
|
||||
id: "desk".to_string(),
|
||||
display_name: "Desk".to_string(),
|
||||
expected_address: "127.0.0.1".to_string(),
|
||||
extension: "100".to_string(),
|
||||
}],
|
||||
routing: RoutingConfig { routes },
|
||||
faxboxes: vec![],
|
||||
voiceboxes: vec![],
|
||||
ivr: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_routing_identity_extracts_uri_user_and_phone_number() {
|
||||
assert_eq!(
|
||||
normalize_routing_identity("sip:0049 421 219694@voip.easybell.de"),
|
||||
"+49421219694"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_routing_identity("<tel:+49 (421) 219694>"),
|
||||
"+49421219694"
|
||||
);
|
||||
assert_eq!(normalize_routing_identity("sip:100@pbx.local"), "100");
|
||||
assert_eq!(normalize_routing_identity("sip:alice@pbx.local"), "alice");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_inbound_route_requires_explicit_match() {
|
||||
let cfg = test_app_config(vec![]);
|
||||
assert!(cfg
|
||||
.resolve_inbound_route("provider-a", "+49421219694", "+491701234567")
|
||||
.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_inbound_route_matches_per_number_on_shared_provider() {
|
||||
let cfg = test_app_config(vec![
|
||||
Route {
|
||||
id: "main".to_string(),
|
||||
name: "Main DID".to_string(),
|
||||
priority: 200,
|
||||
enabled: true,
|
||||
match_criteria: RouteMatch {
|
||||
direction: "inbound".to_string(),
|
||||
number_pattern: Some("+49421219694".to_string()),
|
||||
caller_pattern: None,
|
||||
source_provider: Some("provider-a".to_string()),
|
||||
source_device: None,
|
||||
},
|
||||
action: RouteAction {
|
||||
targets: Some(vec!["desk".to_string()]),
|
||||
ring_browsers: Some(true),
|
||||
voicemail_box: None,
|
||||
fax_box: None,
|
||||
ivr_menu_id: None,
|
||||
no_answer_timeout: None,
|
||||
provider: None,
|
||||
failover_providers: None,
|
||||
strip_prefix: None,
|
||||
prepend_prefix: None,
|
||||
},
|
||||
},
|
||||
Route {
|
||||
id: "support".to_string(),
|
||||
name: "Support DID".to_string(),
|
||||
priority: 100,
|
||||
enabled: true,
|
||||
match_criteria: RouteMatch {
|
||||
direction: "inbound".to_string(),
|
||||
number_pattern: Some("+49421219695".to_string()),
|
||||
caller_pattern: None,
|
||||
source_provider: Some("provider-a".to_string()),
|
||||
source_device: None,
|
||||
},
|
||||
action: RouteAction {
|
||||
targets: None,
|
||||
ring_browsers: Some(false),
|
||||
voicemail_box: Some("support-box".to_string()),
|
||||
fax_box: None,
|
||||
ivr_menu_id: None,
|
||||
no_answer_timeout: Some(20),
|
||||
provider: None,
|
||||
failover_providers: None,
|
||||
strip_prefix: None,
|
||||
prepend_prefix: None,
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
let main = cfg
|
||||
.resolve_inbound_route("provider-a", "+49421219694", "+491701234567")
|
||||
.expect("main DID should match");
|
||||
assert_eq!(main.device_ids, vec!["desk".to_string()]);
|
||||
assert!(main.ring_browsers);
|
||||
|
||||
let support = cfg
|
||||
.resolve_inbound_route("provider-a", "+49421219695", "+491701234567")
|
||||
.expect("support DID should match");
|
||||
assert_eq!(support.voicemail_box.as_deref(), Some("support-box"));
|
||||
assert_eq!(support.no_answer_timeout, Some(20));
|
||||
assert!(!support.ring_browsers);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_inbound_called_number_prefers_did_headers_over_username_ruri() {
|
||||
let raw = b"INVITE sip:2830573e1@proxy.example SIP/2.0\r\nTo: <sip:+4942116767548@proxy.example>\r\nFrom: <sip:+491701234567@provider.example>;tag=abc\r\nCall-ID: test-1\r\nCSeq: 1 INVITE\r\nContent-Length: 0\r\n\r\n";
|
||||
let msg = SipMessage::parse(raw).expect("invite should parse");
|
||||
assert_eq!(extract_inbound_called_number(&msg), "+4942116767548");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_inbound_called_number_keeps_phone_ruri_when_already_present() {
|
||||
let raw = b"INVITE sip:042116767548@proxy.example SIP/2.0\r\nTo: <sip:2830573e1@proxy.example>\r\nFrom: <sip:+491701234567@provider.example>;tag=abc\r\nCall-ID: test-2\r\nCSeq: 1 INVITE\r\nContent-Length: 0\r\n\r\n";
|
||||
let msg = SipMessage::parse(raw).expect("invite should parse");
|
||||
assert_eq!(extract_inbound_called_number(&msg), "042116767548");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_pattern_supports_numeric_ranges() {
|
||||
assert!(matches_pattern(
|
||||
Some("042116767546..042116767548"),
|
||||
"042116767547"
|
||||
));
|
||||
assert!(!matches_pattern(
|
||||
Some("042116767546..042116767548"),
|
||||
"042116767549"
|
||||
));
|
||||
assert!(matches_pattern(
|
||||
Some("+4942116767546..+4942116767548"),
|
||||
"+4942116767547"
|
||||
));
|
||||
assert!(!matches_pattern(
|
||||
Some("+4942116767546..+4942116767548"),
|
||||
"042116767547"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,200 +0,0 @@
|
||||
//! DTMF detection — parses RFC 2833 telephone-event RTP packets.
|
||||
//!
|
||||
//! Deduplicates repeated packets (same digit sent multiple times with
|
||||
//! increasing duration) and fires once per detected digit.
|
||||
//!
|
||||
//! Ported from ts/call/dtmf-detector.ts.
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
|
||||
/// RFC 2833 event ID → character mapping.
|
||||
const EVENT_CHARS: &[char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '*', '#', 'A', 'B', 'C', 'D',
|
||||
];
|
||||
|
||||
/// Safety timeout: report digit if no End packet arrives within this many ms.
|
||||
const SAFETY_TIMEOUT_MS: u64 = 200;
|
||||
|
||||
/// DTMF detector for a single RTP stream.
|
||||
pub struct DtmfDetector {
|
||||
/// Negotiated telephone-event payload type (default 101).
|
||||
telephone_event_pt: u8,
|
||||
/// Clock rate for duration calculation (default 8000 Hz).
|
||||
clock_rate: u32,
|
||||
/// Call ID for event emission.
|
||||
call_id: String,
|
||||
|
||||
// Deduplication state.
|
||||
current_event_id: Option<u8>,
|
||||
current_event_ts: Option<u32>,
|
||||
current_event_reported: bool,
|
||||
current_event_duration: u16,
|
||||
|
||||
out_tx: OutTx,
|
||||
}
|
||||
|
||||
impl DtmfDetector {
|
||||
pub fn new(call_id: String, out_tx: OutTx) -> Self {
|
||||
Self {
|
||||
telephone_event_pt: 101,
|
||||
clock_rate: 8000,
|
||||
call_id,
|
||||
current_event_id: None,
|
||||
current_event_ts: None,
|
||||
current_event_reported: false,
|
||||
current_event_duration: 0,
|
||||
out_tx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Feed an RTP packet. Checks PT; ignores non-DTMF packets.
|
||||
/// Returns Some(digit_char) if a digit was detected.
|
||||
pub fn process_rtp(&mut self, data: &[u8]) -> Option<char> {
|
||||
if data.len() < 16 {
|
||||
return None; // 12-byte header + 4-byte telephone-event minimum
|
||||
}
|
||||
|
||||
let pt = data[1] & 0x7F;
|
||||
if pt != self.telephone_event_pt {
|
||||
return None;
|
||||
}
|
||||
|
||||
let marker = (data[1] & 0x80) != 0;
|
||||
let rtp_timestamp = u32::from_be_bytes([data[4], data[5], data[6], data[7]]);
|
||||
|
||||
// Parse telephone-event payload.
|
||||
let event_id = data[12];
|
||||
let end_bit = (data[13] & 0x80) != 0;
|
||||
let duration = u16::from_be_bytes([data[14], data[15]]);
|
||||
|
||||
if event_id as usize >= EVENT_CHARS.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Detect new event.
|
||||
let is_new = marker
|
||||
|| self.current_event_id != Some(event_id)
|
||||
|| self.current_event_ts != Some(rtp_timestamp);
|
||||
|
||||
if is_new {
|
||||
// Report pending unreported event.
|
||||
let pending = self.report_pending();
|
||||
|
||||
self.current_event_id = Some(event_id);
|
||||
self.current_event_ts = Some(rtp_timestamp);
|
||||
self.current_event_reported = false;
|
||||
self.current_event_duration = duration;
|
||||
|
||||
if pending.is_some() {
|
||||
return pending;
|
||||
}
|
||||
}
|
||||
|
||||
if duration > self.current_event_duration {
|
||||
self.current_event_duration = duration;
|
||||
}
|
||||
|
||||
// Report on End bit (first time only).
|
||||
if end_bit && !self.current_event_reported {
|
||||
self.current_event_reported = true;
|
||||
let digit = EVENT_CHARS[event_id as usize];
|
||||
let duration_ms = (self.current_event_duration as f64 / self.clock_rate as f64) * 1000.0;
|
||||
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"dtmf_digit",
|
||||
serde_json::json!({
|
||||
"call_id": self.call_id,
|
||||
"digit": digit.to_string(),
|
||||
"duration_ms": duration_ms.round() as u32,
|
||||
"source": "rfc2833",
|
||||
}),
|
||||
);
|
||||
|
||||
return Some(digit);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Report a pending unreported event.
|
||||
fn report_pending(&mut self) -> Option<char> {
|
||||
if let Some(event_id) = self.current_event_id {
|
||||
if !self.current_event_reported && (event_id as usize) < EVENT_CHARS.len() {
|
||||
self.current_event_reported = true;
|
||||
let digit = EVENT_CHARS[event_id as usize];
|
||||
let duration_ms =
|
||||
(self.current_event_duration as f64 / self.clock_rate as f64) * 1000.0;
|
||||
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"dtmf_digit",
|
||||
serde_json::json!({
|
||||
"call_id": self.call_id,
|
||||
"digit": digit.to_string(),
|
||||
"duration_ms": duration_ms.round() as u32,
|
||||
"source": "rfc2833",
|
||||
}),
|
||||
);
|
||||
|
||||
return Some(digit);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Process a SIP INFO message body for DTMF.
|
||||
pub fn process_sip_info(&mut self, content_type: &str, body: &str) -> Option<char> {
|
||||
let ct = content_type.to_ascii_lowercase();
|
||||
|
||||
if ct.contains("application/dtmf-relay") {
|
||||
// Format: "Signal= 5\r\nDuration= 160\r\n"
|
||||
let signal = body
|
||||
.lines()
|
||||
.find(|l| l.to_ascii_lowercase().starts_with("signal"))
|
||||
.and_then(|l| l.split('=').nth(1))
|
||||
.map(|s| s.trim().to_string())?;
|
||||
|
||||
if signal.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
let digit = signal.chars().next()?.to_ascii_uppercase();
|
||||
if !"0123456789*#ABCD".contains(digit) {
|
||||
return None;
|
||||
}
|
||||
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"dtmf_digit",
|
||||
serde_json::json!({
|
||||
"call_id": self.call_id,
|
||||
"digit": digit.to_string(),
|
||||
"source": "sip-info",
|
||||
}),
|
||||
);
|
||||
|
||||
return Some(digit);
|
||||
}
|
||||
|
||||
if ct.contains("application/dtmf") {
|
||||
let digit = body.trim().chars().next()?.to_ascii_uppercase();
|
||||
if !"0123456789*#ABCD".contains(digit) {
|
||||
return None;
|
||||
}
|
||||
|
||||
emit_event(
|
||||
&self.out_tx,
|
||||
"dtmf_digit",
|
||||
serde_json::json!({
|
||||
"call_id": self.call_id,
|
||||
"digit": digit.to_string(),
|
||||
"source": "sip-info",
|
||||
}),
|
||||
);
|
||||
|
||||
return Some(digit);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,7 +19,13 @@ pub struct Command {
|
||||
}
|
||||
|
||||
/// Send a response to a command.
|
||||
pub fn respond(tx: &OutTx, id: &str, success: bool, result: Option<serde_json::Value>, error: Option<&str>) {
|
||||
pub fn respond(
|
||||
tx: &OutTx,
|
||||
id: &str,
|
||||
success: bool,
|
||||
result: Option<serde_json::Value>,
|
||||
error: Option<&str>,
|
||||
) {
|
||||
let mut resp = serde_json::json!({ "id": id, "success": success });
|
||||
if let Some(r) = result {
|
||||
resp["result"] = r;
|
||||
|
||||
@@ -0,0 +1,188 @@
|
||||
//! Per-leg adaptive jitter buffer for the audio mixer.
|
||||
//!
|
||||
//! Sits between inbound RTP packet reception and the mixer's decode step.
|
||||
//! Reorders packets by sequence number and delivers exactly one frame per
|
||||
//! 20ms mixer tick, smoothing out network jitter. When a packet is missing,
|
||||
//! the mixer can invoke codec PLC to conceal the gap.
|
||||
|
||||
use crate::mixer::RtpPacket;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
/// Per-leg jitter buffer. Collects RTP packets keyed by sequence number,
|
||||
/// delivers one frame per 20ms tick in sequence order.
|
||||
///
|
||||
/// Adaptive target depth: starts at 3 frames (60ms), adjusts between
|
||||
/// 2–6 frames based on observed jitter.
|
||||
pub struct JitterBuffer {
|
||||
/// Packets waiting for playout, keyed by seq number.
|
||||
buffer: BTreeMap<u16, RtpPacket>,
|
||||
/// Next expected sequence number for playout.
|
||||
next_seq: Option<u16>,
|
||||
/// Target buffer depth in frames (adaptive).
|
||||
target_depth: u32,
|
||||
/// Current fill level high-water mark (for adaptation).
|
||||
max_fill_seen: u32,
|
||||
/// Ticks since last adaptation adjustment.
|
||||
adapt_counter: u32,
|
||||
/// Consecutive ticks where buffer was empty (for ramp-up).
|
||||
empty_streak: u32,
|
||||
/// Consecutive ticks where buffer had excess (for ramp-down).
|
||||
excess_streak: u32,
|
||||
/// Whether we've started playout (initial fill complete).
|
||||
playing: bool,
|
||||
/// Number of frames consumed since start (for stats).
|
||||
frames_consumed: u64,
|
||||
/// Number of frames lost (gap in sequence).
|
||||
frames_lost: u64,
|
||||
}
|
||||
|
||||
/// What the mixer gets back each tick.
|
||||
pub enum JitterResult {
|
||||
/// A packet is available for decoding.
|
||||
Packet(RtpPacket),
|
||||
/// Packet was expected but missing — invoke PLC.
|
||||
Missing,
|
||||
/// Buffer is in initial fill phase — output silence.
|
||||
Filling,
|
||||
}
|
||||
|
||||
impl JitterBuffer {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
buffer: BTreeMap::new(),
|
||||
next_seq: None,
|
||||
target_depth: 3, // 60ms initial target
|
||||
max_fill_seen: 0,
|
||||
adapt_counter: 0,
|
||||
empty_streak: 0,
|
||||
excess_streak: 0,
|
||||
playing: false,
|
||||
frames_consumed: 0,
|
||||
frames_lost: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Push a received RTP packet into the buffer.
|
||||
pub fn push(&mut self, pkt: RtpPacket) {
|
||||
// Ignore duplicates.
|
||||
if self.buffer.contains_key(&pkt.seq) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Detect large forward seq jump (hold/resume, SSRC change).
|
||||
if let Some(next) = self.next_seq {
|
||||
let jump = pkt.seq.wrapping_sub(next);
|
||||
if jump > 1000 && jump < 0x8000 {
|
||||
// Massive forward jump — reset buffer.
|
||||
self.reset();
|
||||
self.next_seq = Some(pkt.seq);
|
||||
}
|
||||
}
|
||||
|
||||
if self.next_seq.is_none() {
|
||||
self.next_seq = Some(pkt.seq);
|
||||
}
|
||||
|
||||
self.buffer.insert(pkt.seq, pkt);
|
||||
}
|
||||
|
||||
/// Consume one frame for the current 20ms tick.
|
||||
/// Called once per mixer tick per leg.
|
||||
pub fn consume(&mut self) -> JitterResult {
|
||||
// Track fill level for adaptation.
|
||||
let fill = self.buffer.len() as u32;
|
||||
if fill > self.max_fill_seen {
|
||||
self.max_fill_seen = fill;
|
||||
}
|
||||
|
||||
// Initial fill phase: wait until we have target_depth packets.
|
||||
if !self.playing {
|
||||
if fill >= self.target_depth {
|
||||
self.playing = true;
|
||||
} else {
|
||||
return JitterResult::Filling;
|
||||
}
|
||||
}
|
||||
|
||||
let seq = match self.next_seq {
|
||||
Some(s) => s,
|
||||
None => return JitterResult::Filling,
|
||||
};
|
||||
|
||||
// Advance next_seq (wrapping u16).
|
||||
self.next_seq = Some(seq.wrapping_add(1));
|
||||
|
||||
// Try to pull the expected sequence number.
|
||||
if let Some(pkt) = self.buffer.remove(&seq) {
|
||||
self.frames_consumed += 1;
|
||||
self.empty_streak = 0;
|
||||
|
||||
// Adaptive: if buffer is consistently deep, we can tighten.
|
||||
if fill > self.target_depth + 2 {
|
||||
self.excess_streak += 1;
|
||||
} else {
|
||||
self.excess_streak = 0;
|
||||
}
|
||||
|
||||
JitterResult::Packet(pkt)
|
||||
} else {
|
||||
// Packet missing — PLC needed.
|
||||
self.frames_lost += 1;
|
||||
self.empty_streak += 1;
|
||||
self.excess_streak = 0;
|
||||
|
||||
JitterResult::Missing
|
||||
}
|
||||
}
|
||||
|
||||
/// Run adaptation logic. Call every tick; internally gates to ~1s intervals.
|
||||
pub fn adapt(&mut self) {
|
||||
self.adapt_counter += 1;
|
||||
if self.adapt_counter < 50 {
|
||||
return;
|
||||
}
|
||||
self.adapt_counter = 0;
|
||||
|
||||
// If we had many empty ticks, increase depth.
|
||||
if self.empty_streak > 3 && self.target_depth < 6 {
|
||||
self.target_depth += 1;
|
||||
}
|
||||
// If buffer consistently overfull, decrease depth.
|
||||
else if self.excess_streak > 25 && self.target_depth > 2 {
|
||||
self.target_depth -= 1;
|
||||
}
|
||||
|
||||
self.max_fill_seen = 0;
|
||||
}
|
||||
|
||||
/// Discard packets that are too old (seq far behind next_seq).
|
||||
/// Prevents unbounded memory growth from reordered/late packets.
|
||||
pub fn prune_stale(&mut self) {
|
||||
if let Some(next) = self.next_seq {
|
||||
// Remove anything more than 100 frames behind playout point.
|
||||
// Use wrapping arithmetic: if (next - seq) > 100, it's stale.
|
||||
let stale: Vec<u16> = self
|
||||
.buffer
|
||||
.keys()
|
||||
.filter(|&&seq| {
|
||||
let age = next.wrapping_sub(seq);
|
||||
age > 100 && age < 0x8000 // < 0x8000 means it's actually behind, not ahead
|
||||
})
|
||||
.copied()
|
||||
.collect();
|
||||
for seq in stale {
|
||||
self.buffer.remove(&seq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset the buffer (e.g., after re-INVITE / hold-resume).
|
||||
pub fn reset(&mut self) {
|
||||
self.buffer.clear();
|
||||
self.next_seq = None;
|
||||
self.playing = false;
|
||||
self.empty_streak = 0;
|
||||
self.excess_streak = 0;
|
||||
self.adapt_counter = 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,164 @@
|
||||
//! Leg I/O task spawners.
|
||||
//!
|
||||
//! Each SIP leg gets two tasks:
|
||||
//! - Inbound: recv_from on RTP socket → strip header → send RtpPacket to mixer channel
|
||||
//! - Outbound: recv encoded RTP from mixer channel → send_to remote media endpoint
|
||||
//!
|
||||
//! WebRTC leg I/O is handled inside webrtc_engine.rs (on_track + track.write).
|
||||
|
||||
use crate::mixer::RtpPacket;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::{mpsc, watch};
|
||||
|
||||
/// Channel pair for connecting a leg to the mixer.
|
||||
pub struct LegChannels {
|
||||
/// Mixer receives decoded packets from this leg.
|
||||
pub inbound_tx: mpsc::Sender<RtpPacket>,
|
||||
pub inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
/// Mixer sends encoded RTP to this leg.
|
||||
pub outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
pub outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
}
|
||||
|
||||
/// Create a channel pair for a leg.
|
||||
pub fn create_leg_channels() -> LegChannels {
|
||||
let (inbound_tx, inbound_rx) = mpsc::channel::<RtpPacket>(64);
|
||||
let (outbound_tx, outbound_rx) = mpsc::channel::<Vec<u8>>(8);
|
||||
LegChannels {
|
||||
inbound_tx,
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
outbound_rx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn the inbound I/O task for a SIP leg.
|
||||
/// Reads RTP from the socket, parses the variable-length header (RFC 3550),
|
||||
/// and sends the payload to the mixer.
|
||||
/// Returns the JoinHandle (exits when the inbound_tx channel is dropped).
|
||||
pub fn spawn_sip_inbound(
|
||||
rtp_socket: Arc<UdpSocket>,
|
||||
inbound_tx: mpsc::Sender<RtpPacket>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 1500];
|
||||
loop {
|
||||
match rtp_socket.recv_from(&mut buf).await {
|
||||
Ok((n, _from)) => {
|
||||
if n < 12 {
|
||||
continue; // Too small for RTP header.
|
||||
}
|
||||
let pt = buf[1] & 0x7F;
|
||||
let marker = (buf[1] & 0x80) != 0;
|
||||
let seq = u16::from_be_bytes([buf[2], buf[3]]);
|
||||
let timestamp = u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]);
|
||||
|
||||
// RFC 3550: header length = 12 + (CC * 4) + optional extension.
|
||||
let cc = (buf[0] & 0x0F) as usize;
|
||||
let has_extension = (buf[0] & 0x10) != 0;
|
||||
let mut offset = 12 + cc * 4;
|
||||
if has_extension {
|
||||
if offset + 4 > n {
|
||||
continue; // Malformed: extension header truncated.
|
||||
}
|
||||
let ext_len =
|
||||
u16::from_be_bytes([buf[offset + 2], buf[offset + 3]]) as usize;
|
||||
offset += 4 + ext_len * 4;
|
||||
}
|
||||
if offset >= n {
|
||||
continue; // No payload after header.
|
||||
}
|
||||
|
||||
let payload = buf[offset..n].to_vec();
|
||||
if payload.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if inbound_tx
|
||||
.send(RtpPacket {
|
||||
payload,
|
||||
payload_type: pt,
|
||||
marker,
|
||||
seq,
|
||||
timestamp,
|
||||
})
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
break; // Channel closed — leg removed.
|
||||
}
|
||||
}
|
||||
Err(_) => break, // Socket error.
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Spawn the outbound I/O task for a SIP leg.
|
||||
/// Reads encoded RTP packets from the mixer and sends them to the remote media endpoint.
|
||||
/// Returns the JoinHandle (exits when the outbound_rx channel is closed).
|
||||
pub fn spawn_sip_outbound(
|
||||
rtp_socket: Arc<UdpSocket>,
|
||||
remote_media: SocketAddr,
|
||||
mut outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
while let Some(rtp_data) = outbound_rx.recv().await {
|
||||
let _ = rtp_socket.send_to(&rtp_data, remote_media).await;
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Spawn a raw UDP inbound task for non-RTP passthrough media such as T.38 UDPTL.
|
||||
pub fn spawn_raw_udp_inbound(
|
||||
media_socket: Arc<UdpSocket>,
|
||||
inbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
mut cancel_rx: watch::Receiver<bool>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 2048];
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = cancel_rx.changed() => break,
|
||||
recv = media_socket.recv_from(&mut buf) => {
|
||||
match recv {
|
||||
Ok((n, _from)) => {
|
||||
if n == 0 {
|
||||
continue;
|
||||
}
|
||||
if inbound_tx.send(buf[..n].to_vec()).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Spawn a raw UDP outbound task for non-RTP passthrough media such as T.38 UDPTL.
|
||||
pub fn spawn_raw_udp_outbound(
|
||||
media_socket: Arc<UdpSocket>,
|
||||
remote_media: SocketAddr,
|
||||
mut outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
mut cancel_rx: watch::Receiver<bool>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = cancel_rx.changed() => break,
|
||||
pkt = outbound_rx.recv() => {
|
||||
match pkt {
|
||||
Some(packet) => {
|
||||
let _ = media_socket.send_to(&packet, remote_media).await;
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
+1232
-118
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,908 @@
|
||||
//! Audio mixer — mix-minus engine for multiparty calls.
|
||||
//!
|
||||
//! Each Call spawns one mixer task. Legs communicate with the mixer via
|
||||
//! tokio mpsc channels — no shared mutable state, no lock contention.
|
||||
//!
|
||||
//! Internal bus format: 48kHz f32 PCM (960 samples per 20ms frame).
|
||||
//! All encoding/decoding happens at leg boundaries. Per-leg inbound denoising at 48kHz.
|
||||
//!
|
||||
//! The mixer runs a 20ms tick loop:
|
||||
//! 1. Drain inbound channels, reorder RTP, decode variable-duration packets to 48kHz,
|
||||
//! and queue them in per-leg PCM buffers
|
||||
//! 2. Compute total mix (sum of all **participant** legs' f32 PCM as f64)
|
||||
//! 3. For each participant leg: mix-minus = total - own, resample to leg codec rate, encode, send
|
||||
//! 4. For each isolated leg: play prompt frame or silence, check DTMF
|
||||
//! 5. For each tool leg: send per-source unmerged audio batch
|
||||
//! 6. Forward DTMF between participant legs only
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::jitter_buffer::{JitterBuffer, JitterResult};
|
||||
use crate::rtp::{build_rtp_header, rtp_clock_increment, rtp_clock_rate};
|
||||
use crate::tts::TtsStreamMessage;
|
||||
use codec_lib::{codec_sample_rate, new_denoiser, TranscodeState};
|
||||
use nnnoiseless::DenoiseState;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use tokio::sync::{mpsc, oneshot, watch};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::{self, Duration, MissedTickBehavior};
|
||||
|
||||
/// Mixing sample rate — 48kHz. Opus is native, G.722 needs 3× upsample, G.711 needs 6× upsample.
|
||||
/// All processing (denoising, mixing) happens at this rate in f32 for maximum quality.
|
||||
const MIX_RATE: u32 = 48000;
|
||||
/// Samples per 20ms frame at the mixing rate.
|
||||
const MIX_FRAME_SIZE: usize = 960; // 48000 * 0.020
|
||||
/// Safety cap for how much timestamp-derived gap fill we synthesize at once.
|
||||
const MAX_GAP_FILL_SAMPLES: usize = MIX_FRAME_SIZE * 6; // 120ms
|
||||
/// Bound how many decode / concealment steps a leg can consume in one tick.
|
||||
const MAX_PACKET_STEPS_PER_TICK: usize = 24;
|
||||
/// Report the first output drop immediately, then every N drops.
|
||||
const DROP_REPORT_INTERVAL: u64 = 50;
|
||||
|
||||
/// A raw RTP payload received from a leg (no RTP header).
|
||||
pub struct RtpPacket {
|
||||
pub payload: Vec<u8>,
|
||||
pub payload_type: u8,
|
||||
/// RTP marker bit (first packet of a DTMF event, etc.).
|
||||
pub marker: bool,
|
||||
/// RTP sequence number for reordering.
|
||||
pub seq: u16,
|
||||
/// RTP timestamp from the original packet header.
|
||||
pub timestamp: u32,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Leg roles
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// What role a leg currently plays in the mixer.
|
||||
enum LegRole {
|
||||
/// Normal participant: contributes to mix, receives mix-minus.
|
||||
Participant,
|
||||
/// Temporarily isolated for IVR/consent interaction.
|
||||
Isolated(IsolationState),
|
||||
}
|
||||
|
||||
struct IsolationState {
|
||||
/// PCM frames at MIX_RATE (960 samples each, 48kHz f32) queued for playback.
|
||||
prompt_frames: VecDeque<Vec<f32>>,
|
||||
/// Live TTS frames arrive here while playback is already in progress.
|
||||
prompt_stream_rx: Option<mpsc::Receiver<TtsStreamMessage>>,
|
||||
/// Cancels the background TTS producer when the interaction ends early.
|
||||
prompt_cancel_tx: Option<watch::Sender<bool>>,
|
||||
/// Whether the live prompt stream has ended.
|
||||
prompt_stream_finished: bool,
|
||||
/// Digits that complete the interaction (e.g., ['1', '2']).
|
||||
expected_digits: Vec<char>,
|
||||
/// Ticks remaining before timeout (decremented each tick after prompt ends).
|
||||
timeout_ticks_remaining: u32,
|
||||
/// Whether we've finished playing the prompt.
|
||||
prompt_done: bool,
|
||||
/// Channel to send the result back to the command handler.
|
||||
result_tx: Option<oneshot::Sender<InteractionResult>>,
|
||||
}
|
||||
|
||||
/// Result of a leg interaction (consent prompt, IVR, etc.).
|
||||
pub enum InteractionResult {
|
||||
/// The participant pressed one of the expected digits.
|
||||
Digit(char),
|
||||
/// No digit was received within the timeout.
|
||||
Timeout,
|
||||
/// The leg was removed or the call tore down before completion.
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool legs
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Type of tool leg.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ToolType {
|
||||
Recording,
|
||||
Transcription,
|
||||
}
|
||||
|
||||
/// Per-source audio delivered to a tool leg each mixer tick.
|
||||
pub struct ToolAudioBatch {
|
||||
pub sources: Vec<ToolAudioSource>,
|
||||
}
|
||||
|
||||
/// One participant's 20ms audio frame.
|
||||
pub struct ToolAudioSource {
|
||||
pub leg_id: String,
|
||||
/// PCM at 48kHz f32, MIX_FRAME_SIZE (960) samples.
|
||||
pub pcm_48k: Vec<f32>,
|
||||
}
|
||||
|
||||
/// Internal storage for a tool leg inside the mixer.
|
||||
struct ToolLegSlot {
|
||||
#[allow(dead_code)]
|
||||
tool_type: ToolType,
|
||||
audio_tx: mpsc::Sender<ToolAudioBatch>,
|
||||
dropped_batches: u64,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Commands
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Commands sent to the mixer task via a control channel.
|
||||
pub enum MixerCommand {
|
||||
/// Add a new participant leg to the mix.
|
||||
AddLeg {
|
||||
leg_id: String,
|
||||
codec_pt: u8,
|
||||
inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
},
|
||||
/// Remove a leg from the mix (channels are dropped, I/O tasks exit).
|
||||
RemoveLeg { leg_id: String },
|
||||
/// Shut down the mixer.
|
||||
Shutdown,
|
||||
|
||||
/// Isolate a leg and start an interaction (consent prompt, IVR).
|
||||
/// The leg is removed from the mix and hears the prompt instead.
|
||||
/// DTMF from the leg is checked against expected_digits.
|
||||
StartInteraction {
|
||||
leg_id: String,
|
||||
/// PCM frames at MIX_RATE (48kHz f32), each 960 samples.
|
||||
prompt_pcm_frames: Vec<Vec<f32>>,
|
||||
/// Optional live prompt stream. Frames are appended as they are synthesized.
|
||||
prompt_stream_rx: Option<mpsc::Receiver<TtsStreamMessage>>,
|
||||
/// Optional cancellation handle for the live prompt stream.
|
||||
prompt_cancel_tx: Option<watch::Sender<bool>>,
|
||||
expected_digits: Vec<char>,
|
||||
timeout_ms: u32,
|
||||
result_tx: oneshot::Sender<InteractionResult>,
|
||||
},
|
||||
|
||||
/// Add a tool leg that receives per-source unmerged audio.
|
||||
AddToolLeg {
|
||||
leg_id: String,
|
||||
tool_type: ToolType,
|
||||
audio_tx: mpsc::Sender<ToolAudioBatch>,
|
||||
},
|
||||
/// Remove a tool leg (drops the channel, background task finalizes).
|
||||
RemoveToolLeg { leg_id: String },
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mixer internals
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Internal per-leg state inside the mixer.
|
||||
struct MixerLegSlot {
|
||||
codec_pt: u8,
|
||||
transcoder: TranscodeState,
|
||||
/// Per-leg inbound denoiser (48kHz, 480-sample frames).
|
||||
denoiser: Box<DenoiseState<'static>>,
|
||||
inbound_rx: mpsc::Receiver<RtpPacket>,
|
||||
outbound_tx: mpsc::Sender<Vec<u8>>,
|
||||
/// Decoded PCM waiting for playout. Variable-duration RTP packets are
|
||||
/// decoded into this FIFO; the mixer consumes exactly one 20ms frame per tick.
|
||||
pcm_buffer: VecDeque<f32>,
|
||||
/// Last decoded+denoised PCM frame at MIX_RATE (960 samples, 48kHz f32).
|
||||
last_pcm_frame: Vec<f32>,
|
||||
/// Next RTP timestamp expected from the inbound stream.
|
||||
expected_rtp_timestamp: Option<u32>,
|
||||
/// Best-effort estimate of packet duration in RTP clock units.
|
||||
estimated_packet_ts: u32,
|
||||
/// Number of consecutive ticks with no inbound packet.
|
||||
silent_ticks: u32,
|
||||
/// Per-leg jitter buffer for packet reordering and timing.
|
||||
jitter: JitterBuffer,
|
||||
// RTP output state.
|
||||
rtp_seq: u16,
|
||||
rtp_ts: u32,
|
||||
rtp_ssrc: u32,
|
||||
/// Dropped outbound frames for this leg (queue full / closed).
|
||||
outbound_drops: u64,
|
||||
/// Current role of this leg in the mixer.
|
||||
role: LegRole,
|
||||
}
|
||||
|
||||
fn mix_samples_to_rtp_ts(codec_pt: u8, mix_samples: usize) -> u32 {
|
||||
let clock_rate = rtp_clock_rate(codec_pt).max(1) as u64;
|
||||
(((mix_samples as u64 * clock_rate) + (MIX_RATE as u64 / 2)) / MIX_RATE as u64) as u32
|
||||
}
|
||||
|
||||
fn rtp_ts_to_mix_samples(codec_pt: u8, rtp_ts: u32) -> usize {
|
||||
let clock_rate = rtp_clock_rate(codec_pt).max(1) as u64;
|
||||
(((rtp_ts as u64 * MIX_RATE as u64) + (clock_rate / 2)) / clock_rate) as usize
|
||||
}
|
||||
|
||||
fn is_forward_rtp_delta(delta: u32) -> bool {
|
||||
delta > 0 && delta < 0x8000_0000
|
||||
}
|
||||
|
||||
fn should_emit_drop_event(total_drops: u64) -> bool {
|
||||
total_drops == 1 || total_drops % DROP_REPORT_INTERVAL == 0
|
||||
}
|
||||
|
||||
fn emit_output_drop_event(
|
||||
out_tx: &OutTx,
|
||||
call_id: &str,
|
||||
leg_id: Option<&str>,
|
||||
tool_leg_id: Option<&str>,
|
||||
stream: &str,
|
||||
reason: &str,
|
||||
total_drops: u64,
|
||||
) {
|
||||
if !should_emit_drop_event(total_drops) {
|
||||
return;
|
||||
}
|
||||
|
||||
emit_event(
|
||||
out_tx,
|
||||
"mixer_output_drop",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": leg_id,
|
||||
"tool_leg_id": tool_leg_id,
|
||||
"stream": stream,
|
||||
"reason": reason,
|
||||
"total_drops": total_drops,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn fade_concealment_from_last_frame(slot: &mut MixerLegSlot, samples: usize, decay: f32) {
|
||||
let mut template = if slot.last_pcm_frame.is_empty() {
|
||||
vec![0.0f32; MIX_FRAME_SIZE]
|
||||
} else {
|
||||
slot.last_pcm_frame.clone()
|
||||
};
|
||||
|
||||
let mut remaining = samples;
|
||||
while remaining > 0 {
|
||||
for sample in &mut template {
|
||||
*sample *= decay;
|
||||
}
|
||||
let take = remaining.min(template.len());
|
||||
slot.pcm_buffer.extend(template.iter().take(take).copied());
|
||||
remaining -= take;
|
||||
}
|
||||
}
|
||||
|
||||
fn append_packet_loss_concealment(slot: &mut MixerLegSlot, samples: usize) {
|
||||
let mut remaining = samples.max(1);
|
||||
while remaining > 0 {
|
||||
let chunk = remaining.min(MIX_FRAME_SIZE);
|
||||
if slot.codec_pt == codec_lib::PT_OPUS {
|
||||
match slot.transcoder.opus_plc(chunk) {
|
||||
Ok(mut pcm) => {
|
||||
pcm.resize(chunk, 0.0);
|
||||
slot.pcm_buffer.extend(pcm);
|
||||
}
|
||||
Err(_) => fade_concealment_from_last_frame(slot, chunk, 0.8),
|
||||
}
|
||||
} else {
|
||||
fade_concealment_from_last_frame(slot, chunk, 0.85);
|
||||
}
|
||||
remaining -= chunk;
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_packet_to_mix_pcm(slot: &mut MixerLegSlot, pkt: &RtpPacket) -> Option<Vec<f32>> {
|
||||
let (pcm, rate) = slot
|
||||
.transcoder
|
||||
.decode_to_f32(&pkt.payload, pkt.payload_type)
|
||||
.ok()?;
|
||||
|
||||
let pcm_48k = if rate == MIX_RATE {
|
||||
pcm
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&pcm, rate, MIX_RATE)
|
||||
.unwrap_or_else(|_| vec![0.0f32; MIX_FRAME_SIZE])
|
||||
};
|
||||
|
||||
let processed = if slot.codec_pt != codec_lib::PT_OPUS {
|
||||
TranscodeState::denoise_f32(&mut slot.denoiser, &pcm_48k)
|
||||
} else {
|
||||
pcm_48k
|
||||
};
|
||||
|
||||
Some(processed)
|
||||
}
|
||||
|
||||
fn queue_inbound_packet(slot: &mut MixerLegSlot, pkt: RtpPacket) {
|
||||
if let Some(pcm_48k) = decode_packet_to_mix_pcm(slot, &pkt) {
|
||||
if pcm_48k.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(expected_ts) = slot.expected_rtp_timestamp {
|
||||
let gap_ts = pkt.timestamp.wrapping_sub(expected_ts);
|
||||
if is_forward_rtp_delta(gap_ts) {
|
||||
let gap_samples = rtp_ts_to_mix_samples(slot.codec_pt, gap_ts);
|
||||
if gap_samples <= MAX_GAP_FILL_SAMPLES {
|
||||
append_packet_loss_concealment(slot, gap_samples);
|
||||
} else {
|
||||
slot.pcm_buffer.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let packet_ts = mix_samples_to_rtp_ts(slot.codec_pt, pcm_48k.len());
|
||||
if packet_ts > 0 {
|
||||
slot.estimated_packet_ts = packet_ts;
|
||||
slot.expected_rtp_timestamp = Some(pkt.timestamp.wrapping_add(packet_ts));
|
||||
}
|
||||
slot.pcm_buffer.extend(pcm_48k);
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_leg_playout_buffer(slot: &mut MixerLegSlot) {
|
||||
let mut steps = 0usize;
|
||||
while slot.pcm_buffer.len() < MIX_FRAME_SIZE && steps < MAX_PACKET_STEPS_PER_TICK {
|
||||
steps += 1;
|
||||
match slot.jitter.consume() {
|
||||
JitterResult::Packet(pkt) => queue_inbound_packet(slot, pkt),
|
||||
JitterResult::Missing => {
|
||||
let conceal_ts = slot
|
||||
.estimated_packet_ts
|
||||
.max(rtp_clock_increment(slot.codec_pt));
|
||||
let conceal_samples =
|
||||
rtp_ts_to_mix_samples(slot.codec_pt, conceal_ts).clamp(1, MAX_GAP_FILL_SAMPLES);
|
||||
append_packet_loss_concealment(slot, conceal_samples);
|
||||
if let Some(expected_ts) = slot.expected_rtp_timestamp {
|
||||
slot.expected_rtp_timestamp = Some(expected_ts.wrapping_add(conceal_ts));
|
||||
}
|
||||
}
|
||||
JitterResult::Filling => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn take_mix_frame(slot: &mut MixerLegSlot) -> Vec<f32> {
|
||||
let mut frame = Vec::with_capacity(MIX_FRAME_SIZE);
|
||||
while frame.len() < MIX_FRAME_SIZE {
|
||||
if let Some(sample) = slot.pcm_buffer.pop_front() {
|
||||
frame.push(sample);
|
||||
} else {
|
||||
frame.push(0.0);
|
||||
}
|
||||
}
|
||||
frame
|
||||
}
|
||||
|
||||
fn soft_limit_sample(sample: f32) -> f32 {
|
||||
const KNEE: f32 = 0.85;
|
||||
|
||||
let abs = sample.abs();
|
||||
if abs <= KNEE {
|
||||
sample
|
||||
} else {
|
||||
let excess = abs - KNEE;
|
||||
let compressed = KNEE + (excess / (1.0 + (excess / (1.0 - KNEE))));
|
||||
sample.signum() * compressed.min(1.0)
|
||||
}
|
||||
}
|
||||
|
||||
fn try_send_leg_output(
|
||||
out_tx: &OutTx,
|
||||
call_id: &str,
|
||||
leg_id: &str,
|
||||
slot: &mut MixerLegSlot,
|
||||
rtp: Vec<u8>,
|
||||
stream: &str,
|
||||
) {
|
||||
let reason = match slot.outbound_tx.try_send(rtp) {
|
||||
Ok(()) => return,
|
||||
Err(mpsc::error::TrySendError::Full(_)) => "full",
|
||||
Err(mpsc::error::TrySendError::Closed(_)) => "closed",
|
||||
};
|
||||
|
||||
slot.outbound_drops += 1;
|
||||
emit_output_drop_event(
|
||||
out_tx,
|
||||
call_id,
|
||||
Some(leg_id),
|
||||
None,
|
||||
stream,
|
||||
reason,
|
||||
slot.outbound_drops,
|
||||
);
|
||||
}
|
||||
|
||||
fn try_send_tool_output(
|
||||
out_tx: &OutTx,
|
||||
call_id: &str,
|
||||
tool_leg_id: &str,
|
||||
tool: &mut ToolLegSlot,
|
||||
batch: ToolAudioBatch,
|
||||
) {
|
||||
let reason = match tool.audio_tx.try_send(batch) {
|
||||
Ok(()) => return,
|
||||
Err(mpsc::error::TrySendError::Full(_)) => "full",
|
||||
Err(mpsc::error::TrySendError::Closed(_)) => "closed",
|
||||
};
|
||||
|
||||
tool.dropped_batches += 1;
|
||||
emit_output_drop_event(
|
||||
out_tx,
|
||||
call_id,
|
||||
None,
|
||||
Some(tool_leg_id),
|
||||
"tool-batch",
|
||||
reason,
|
||||
tool.dropped_batches,
|
||||
);
|
||||
}
|
||||
|
||||
fn cancel_prompt_producer(state: &mut IsolationState) {
|
||||
if let Some(cancel_tx) = state.prompt_cancel_tx.take() {
|
||||
let _ = cancel_tx.send(true);
|
||||
}
|
||||
}
|
||||
|
||||
fn cancel_isolated_interaction(state: &mut IsolationState) {
|
||||
cancel_prompt_producer(state);
|
||||
if let Some(tx) = state.result_tx.take() {
|
||||
let _ = tx.send(InteractionResult::Cancelled);
|
||||
}
|
||||
}
|
||||
|
||||
fn drain_prompt_stream(
|
||||
out_tx: &OutTx,
|
||||
call_id: &str,
|
||||
leg_id: &str,
|
||||
state: &mut IsolationState,
|
||||
) {
|
||||
loop {
|
||||
let Some(mut stream_rx) = state.prompt_stream_rx.take() else {
|
||||
return;
|
||||
};
|
||||
|
||||
match stream_rx.try_recv() {
|
||||
Ok(TtsStreamMessage::Frames(frames)) => {
|
||||
state.prompt_frames.extend(frames);
|
||||
state.prompt_stream_rx = Some(stream_rx);
|
||||
}
|
||||
Ok(TtsStreamMessage::Finished) => {
|
||||
state.prompt_stream_finished = true;
|
||||
return;
|
||||
}
|
||||
Ok(TtsStreamMessage::Failed(error)) => {
|
||||
emit_event(
|
||||
out_tx,
|
||||
"mixer_error",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": leg_id,
|
||||
"error": format!("tts stream failed: {error}"),
|
||||
}),
|
||||
);
|
||||
state.prompt_stream_finished = true;
|
||||
return;
|
||||
}
|
||||
Err(mpsc::error::TryRecvError::Empty) => {
|
||||
state.prompt_stream_rx = Some(stream_rx);
|
||||
return;
|
||||
}
|
||||
Err(mpsc::error::TryRecvError::Disconnected) => {
|
||||
state.prompt_stream_finished = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn the mixer task for a call. Returns the command sender and task handle.
|
||||
pub fn spawn_mixer(call_id: String, out_tx: OutTx) -> (mpsc::Sender<MixerCommand>, JoinHandle<()>) {
|
||||
let (cmd_tx, cmd_rx) = mpsc::channel::<MixerCommand>(32);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
mixer_loop(call_id, cmd_rx, out_tx).await;
|
||||
});
|
||||
|
||||
(cmd_tx, handle)
|
||||
}
|
||||
|
||||
/// The 20ms mixing loop.
|
||||
async fn mixer_loop(call_id: String, mut cmd_rx: mpsc::Receiver<MixerCommand>, out_tx: OutTx) {
|
||||
let mut legs: HashMap<String, MixerLegSlot> = HashMap::new();
|
||||
let mut tool_legs: HashMap<String, ToolLegSlot> = HashMap::new();
|
||||
let mut interval = time::interval(Duration::from_millis(20));
|
||||
interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
// ── 1. Process control commands (non-blocking). ─────────────
|
||||
loop {
|
||||
match cmd_rx.try_recv() {
|
||||
Ok(MixerCommand::AddLeg {
|
||||
leg_id,
|
||||
codec_pt,
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
}) => {
|
||||
let transcoder = match TranscodeState::new() {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"mixer_error",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"leg_id": leg_id,
|
||||
"error": format!("codec init: {e}"),
|
||||
}),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
legs.insert(
|
||||
leg_id,
|
||||
MixerLegSlot {
|
||||
codec_pt,
|
||||
transcoder,
|
||||
denoiser: new_denoiser(),
|
||||
inbound_rx,
|
||||
outbound_tx,
|
||||
pcm_buffer: VecDeque::new(),
|
||||
last_pcm_frame: vec![0.0f32; MIX_FRAME_SIZE],
|
||||
expected_rtp_timestamp: None,
|
||||
estimated_packet_ts: rtp_clock_increment(codec_pt),
|
||||
silent_ticks: 0,
|
||||
rtp_seq: 0,
|
||||
rtp_ts: 0,
|
||||
rtp_ssrc: rand::random(),
|
||||
outbound_drops: 0,
|
||||
role: LegRole::Participant,
|
||||
jitter: JitterBuffer::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
Ok(MixerCommand::RemoveLeg { leg_id }) => {
|
||||
// If the leg is isolated, send Cancelled before dropping.
|
||||
if let Some(slot) = legs.get_mut(&leg_id) {
|
||||
if let LegRole::Isolated(ref mut state) = slot.role {
|
||||
cancel_isolated_interaction(state);
|
||||
}
|
||||
}
|
||||
legs.remove(&leg_id);
|
||||
// Channels drop → I/O tasks exit cleanly.
|
||||
}
|
||||
Ok(MixerCommand::Shutdown) => {
|
||||
// Cancel all outstanding interactions before shutting down.
|
||||
for slot in legs.values_mut() {
|
||||
if let LegRole::Isolated(ref mut state) = slot.role {
|
||||
cancel_isolated_interaction(state);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
Ok(MixerCommand::StartInteraction {
|
||||
leg_id,
|
||||
prompt_pcm_frames,
|
||||
prompt_stream_rx,
|
||||
prompt_cancel_tx,
|
||||
expected_digits,
|
||||
timeout_ms,
|
||||
result_tx,
|
||||
}) => {
|
||||
if let Some(slot) = legs.get_mut(&leg_id) {
|
||||
// Cancel any existing interaction first.
|
||||
if let LegRole::Isolated(ref mut old_state) = slot.role {
|
||||
cancel_isolated_interaction(old_state);
|
||||
}
|
||||
let timeout_ticks = timeout_ms / 20;
|
||||
slot.role = LegRole::Isolated(IsolationState {
|
||||
prompt_frames: VecDeque::from(prompt_pcm_frames),
|
||||
prompt_stream_rx,
|
||||
prompt_cancel_tx,
|
||||
prompt_stream_finished: false,
|
||||
expected_digits,
|
||||
timeout_ticks_remaining: timeout_ticks,
|
||||
prompt_done: false,
|
||||
result_tx: Some(result_tx),
|
||||
});
|
||||
} else {
|
||||
// Leg not found — immediately cancel.
|
||||
if let Some(cancel_tx) = prompt_cancel_tx {
|
||||
let _ = cancel_tx.send(true);
|
||||
}
|
||||
let _ = result_tx.send(InteractionResult::Cancelled);
|
||||
}
|
||||
}
|
||||
Ok(MixerCommand::AddToolLeg {
|
||||
leg_id,
|
||||
tool_type,
|
||||
audio_tx,
|
||||
}) => {
|
||||
tool_legs.insert(
|
||||
leg_id,
|
||||
ToolLegSlot {
|
||||
tool_type,
|
||||
audio_tx,
|
||||
dropped_batches: 0,
|
||||
},
|
||||
);
|
||||
}
|
||||
Ok(MixerCommand::RemoveToolLeg { leg_id }) => {
|
||||
tool_legs.remove(&leg_id);
|
||||
// Dropping the ToolLegSlot drops audio_tx → background task sees channel close.
|
||||
}
|
||||
Err(mpsc::error::TryRecvError::Empty) => break,
|
||||
Err(mpsc::error::TryRecvError::Disconnected) => return,
|
||||
}
|
||||
}
|
||||
|
||||
if legs.is_empty() && tool_legs.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── 2. Drain inbound packets, decode to 48kHz f32 PCM. ────
|
||||
// DTMF (PT 101) packets are collected separately.
|
||||
// Audio packets are sorted by sequence number and decoded
|
||||
// in order to maintain codec state (critical for G.722 ADPCM).
|
||||
let leg_ids: Vec<String> = legs.keys().cloned().collect();
|
||||
let mut dtmf_forward: Vec<(String, RtpPacket)> = Vec::new();
|
||||
|
||||
for lid in &leg_ids {
|
||||
let slot = legs.get_mut(lid).unwrap();
|
||||
|
||||
// Step 2a: Drain all pending packets into the jitter buffer.
|
||||
let mut got_audio = false;
|
||||
loop {
|
||||
match slot.inbound_rx.try_recv() {
|
||||
Ok(pkt) => {
|
||||
if pkt.payload_type == 101 {
|
||||
dtmf_forward.push((lid.clone(), pkt));
|
||||
} else {
|
||||
got_audio = true;
|
||||
slot.jitter.push(pkt);
|
||||
}
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2b: Decode enough RTP to cover one 20ms playout frame.
|
||||
// Variable-duration packets (10ms, 20ms, 60ms, ...) accumulate in
|
||||
// the per-leg PCM FIFO; we pop exactly one 20ms frame below.
|
||||
fill_leg_playout_buffer(slot);
|
||||
slot.last_pcm_frame = take_mix_frame(slot);
|
||||
|
||||
// Run jitter adaptation + prune stale packets.
|
||||
slot.jitter.adapt();
|
||||
slot.jitter.prune_stale();
|
||||
|
||||
// Silent ticks: based on actual network reception, not jitter buffer state.
|
||||
if got_audio || dtmf_forward.iter().any(|(src, _)| src == lid) {
|
||||
slot.silent_ticks = 0;
|
||||
} else {
|
||||
slot.silent_ticks += 1;
|
||||
}
|
||||
if slot.silent_ticks > 150 {
|
||||
slot.last_pcm_frame = vec![0.0f32; MIX_FRAME_SIZE];
|
||||
slot.pcm_buffer.clear();
|
||||
slot.expected_rtp_timestamp = None;
|
||||
slot.estimated_packet_ts = rtp_clock_increment(slot.codec_pt);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 3. Compute total mix from PARTICIPANT legs only. ────────
|
||||
// Accumulate as f64 to prevent precision loss when summing f32.
|
||||
let mut total_mix = vec![0.0f64; MIX_FRAME_SIZE];
|
||||
for slot in legs.values() {
|
||||
if matches!(slot.role, LegRole::Participant) {
|
||||
for (i, &s) in slot.last_pcm_frame.iter().enumerate().take(MIX_FRAME_SIZE) {
|
||||
total_mix[i] += s as f64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── 4. Per-leg output. ──────────────────────────────────────
|
||||
// Collect interaction completions to apply after the loop
|
||||
// (can't mutate role while iterating mutably for encode).
|
||||
let mut completed_interactions: Vec<(String, InteractionResult)> = Vec::new();
|
||||
|
||||
for (lid, slot) in legs.iter_mut() {
|
||||
match &mut slot.role {
|
||||
LegRole::Participant => {
|
||||
// Mix-minus: total minus this leg's own contribution.
|
||||
// Apply a light soft limiter instead of hard clipping the sum.
|
||||
let mut mix_minus = Vec::with_capacity(MIX_FRAME_SIZE);
|
||||
for i in 0..MIX_FRAME_SIZE {
|
||||
let sample = (total_mix[i] - slot.last_pcm_frame[i] as f64) as f32;
|
||||
mix_minus.push(soft_limit_sample(sample));
|
||||
}
|
||||
|
||||
// Resample from 48kHz to the leg's codec native rate.
|
||||
let target_rate = codec_sample_rate(slot.codec_pt);
|
||||
let resampled = if target_rate == MIX_RATE {
|
||||
mix_minus
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&mix_minus, MIX_RATE, target_rate)
|
||||
.unwrap_or_default()
|
||||
};
|
||||
|
||||
// Encode to the leg's codec (f32 → i16 → codec inside encode_from_f32).
|
||||
let encoded = match slot.transcoder.encode_from_f32(&resampled, slot.codec_pt) {
|
||||
Ok(e) if !e.is_empty() => e,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Build RTP packet with header.
|
||||
let header =
|
||||
build_rtp_header(slot.codec_pt, slot.rtp_seq, slot.rtp_ts, slot.rtp_ssrc);
|
||||
let mut rtp = header.to_vec();
|
||||
rtp.extend_from_slice(&encoded);
|
||||
|
||||
slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
|
||||
slot.rtp_ts = slot.rtp_ts.wrapping_add(rtp_clock_increment(slot.codec_pt));
|
||||
|
||||
try_send_leg_output(&out_tx, &call_id, lid, slot, rtp, "participant-audio");
|
||||
}
|
||||
LegRole::Isolated(state) => {
|
||||
drain_prompt_stream(&out_tx, &call_id, lid, state);
|
||||
|
||||
// Check for DTMF digit from this leg.
|
||||
let mut matched_digit: Option<char> = None;
|
||||
for (src_lid, dtmf_pkt) in &dtmf_forward {
|
||||
if src_lid == lid && dtmf_pkt.payload.len() >= 4 {
|
||||
let event_id = dtmf_pkt.payload[0];
|
||||
let end_bit = (dtmf_pkt.payload[1] & 0x80) != 0;
|
||||
if end_bit {
|
||||
const EVENT_CHARS: &[char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '*', '#',
|
||||
'A', 'B', 'C', 'D',
|
||||
];
|
||||
if let Some(&ch) = EVENT_CHARS.get(event_id as usize) {
|
||||
if state.expected_digits.contains(&ch) {
|
||||
matched_digit = Some(ch);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(digit) = matched_digit {
|
||||
// Interaction complete — digit matched.
|
||||
completed_interactions.push((lid.clone(), InteractionResult::Digit(digit)));
|
||||
} else {
|
||||
// Play prompt frame, wait for live TTS, or move to timeout once the
|
||||
// prompt stream has fully drained.
|
||||
let pcm_frame = if let Some(frame) = state.prompt_frames.pop_front() {
|
||||
frame
|
||||
} else if !state.prompt_stream_finished {
|
||||
vec![0.0f32; MIX_FRAME_SIZE]
|
||||
} else {
|
||||
state.prompt_done = true;
|
||||
vec![0.0f32; MIX_FRAME_SIZE]
|
||||
};
|
||||
|
||||
// Encode prompt frame to the leg's codec.
|
||||
let target_rate = codec_sample_rate(slot.codec_pt);
|
||||
let resampled = if target_rate == MIX_RATE {
|
||||
pcm_frame
|
||||
} else {
|
||||
slot.transcoder
|
||||
.resample_f32(&pcm_frame, MIX_RATE, target_rate)
|
||||
.unwrap_or_default()
|
||||
};
|
||||
|
||||
let mut prompt_rtp: Option<Vec<u8>> = None;
|
||||
if let Ok(encoded) =
|
||||
slot.transcoder.encode_from_f32(&resampled, slot.codec_pt)
|
||||
{
|
||||
if !encoded.is_empty() {
|
||||
let header = build_rtp_header(
|
||||
slot.codec_pt,
|
||||
slot.rtp_seq,
|
||||
slot.rtp_ts,
|
||||
slot.rtp_ssrc,
|
||||
);
|
||||
let mut rtp = header.to_vec();
|
||||
rtp.extend_from_slice(&encoded);
|
||||
slot.rtp_seq = slot.rtp_seq.wrapping_add(1);
|
||||
slot.rtp_ts =
|
||||
slot.rtp_ts.wrapping_add(rtp_clock_increment(slot.codec_pt));
|
||||
prompt_rtp = Some(rtp);
|
||||
}
|
||||
}
|
||||
|
||||
// Check timeout (only after prompt finishes).
|
||||
if state.prompt_done {
|
||||
if state.timeout_ticks_remaining == 0 {
|
||||
completed_interactions
|
||||
.push((lid.clone(), InteractionResult::Timeout));
|
||||
} else {
|
||||
state.timeout_ticks_remaining -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(rtp) = prompt_rtp {
|
||||
try_send_leg_output(
|
||||
&out_tx,
|
||||
&call_id,
|
||||
lid,
|
||||
slot,
|
||||
rtp,
|
||||
"isolated-prompt",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply completed interactions — revert legs to Participant.
|
||||
for (lid, result) in completed_interactions {
|
||||
if let Some(slot) = legs.get_mut(&lid) {
|
||||
if let LegRole::Isolated(ref mut state) = slot.role {
|
||||
cancel_prompt_producer(state);
|
||||
if let Some(tx) = state.result_tx.take() {
|
||||
let _ = tx.send(result);
|
||||
}
|
||||
}
|
||||
slot.role = LegRole::Participant;
|
||||
}
|
||||
}
|
||||
|
||||
// ── 5. Distribute per-source audio to tool legs. ────────────
|
||||
if !tool_legs.is_empty() {
|
||||
// Collect participant PCM frames (computed in step 2).
|
||||
let sources: Vec<ToolAudioSource> = legs
|
||||
.iter()
|
||||
.filter(|(_, s)| matches!(s.role, LegRole::Participant))
|
||||
.map(|(lid, s)| ToolAudioSource {
|
||||
leg_id: lid.clone(),
|
||||
pcm_48k: s.last_pcm_frame.clone(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
for (tool_leg_id, tool) in tool_legs.iter_mut() {
|
||||
let batch = ToolAudioBatch {
|
||||
sources: sources
|
||||
.iter()
|
||||
.map(|s| ToolAudioSource {
|
||||
leg_id: s.leg_id.clone(),
|
||||
pcm_48k: s.pcm_48k.clone(),
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
try_send_tool_output(&out_tx, &call_id, tool_leg_id, tool, batch);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 6. Forward DTMF packets between participant legs only. ──
|
||||
for (source_lid, dtmf_pkt) in &dtmf_forward {
|
||||
// Skip if the source is an isolated leg (its DTMF was handled in step 4).
|
||||
if let Some(src_slot) = legs.get(source_lid) {
|
||||
if matches!(src_slot.role, LegRole::Isolated(_)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
for (target_lid, target_slot) in legs.iter_mut() {
|
||||
if target_lid == source_lid {
|
||||
continue; // Don't echo DTMF back to sender.
|
||||
}
|
||||
// Don't forward to isolated legs.
|
||||
if matches!(target_slot.role, LegRole::Isolated(_)) {
|
||||
continue;
|
||||
}
|
||||
let mut header = build_rtp_header(
|
||||
101,
|
||||
target_slot.rtp_seq,
|
||||
target_slot.rtp_ts,
|
||||
target_slot.rtp_ssrc,
|
||||
);
|
||||
if dtmf_pkt.marker {
|
||||
header[1] |= 0x80; // Set marker bit.
|
||||
}
|
||||
let mut rtp_out = header.to_vec();
|
||||
rtp_out.extend_from_slice(&dtmf_pkt.payload);
|
||||
target_slot.rtp_seq = target_slot.rtp_seq.wrapping_add(1);
|
||||
// Don't increment rtp_ts for DTMF — it shares timestamp context with audio.
|
||||
try_send_leg_output(&out_tx, &call_id, target_lid, target_slot, rtp_out, "dtmf");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -267,11 +267,7 @@ impl ProviderManager {
|
||||
|
||||
/// Try to handle a SIP response as a provider registration response.
|
||||
/// Returns true if consumed.
|
||||
pub async fn handle_response(
|
||||
&self,
|
||||
msg: &SipMessage,
|
||||
socket: &UdpSocket,
|
||||
) -> bool {
|
||||
pub async fn handle_response(&self, msg: &SipMessage, socket: &UdpSocket) -> bool {
|
||||
for ps_arc in &self.providers {
|
||||
let mut ps = ps_arc.lock().await;
|
||||
let was_registered = ps.is_registered;
|
||||
@@ -317,19 +313,39 @@ impl ProviderManager {
|
||||
if ps.config.outbound_proxy.address == addr.ip().to_string() {
|
||||
return Some(ps_arc.clone());
|
||||
}
|
||||
|
||||
// Hostname-based providers (e.g. sipgate.de) often deliver inbound
|
||||
// INVITEs from resolved IPs rather than the literal configured host.
|
||||
// Resolve the proxy host and accept any matching IP/port variant.
|
||||
use std::net::ToSocketAddrs;
|
||||
if let Ok(resolved) = format!(
|
||||
"{}:{}",
|
||||
ps.config.outbound_proxy.address, ps.config.outbound_proxy.port
|
||||
)
|
||||
.to_socket_addrs()
|
||||
{
|
||||
for resolved_addr in resolved {
|
||||
if resolved_addr == *addr || resolved_addr.ip() == addr.ip() {
|
||||
return Some(ps_arc.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Check if a provider is currently registered.
|
||||
pub async fn is_registered(&self, provider_id: &str) -> bool {
|
||||
/// Find a provider by its config ID (e.g. "easybell").
|
||||
pub async fn find_by_provider_id(
|
||||
&self,
|
||||
provider_id: &str,
|
||||
) -> Option<Arc<Mutex<ProviderState>>> {
|
||||
for ps_arc in &self.providers {
|
||||
let ps = ps_arc.lock().await;
|
||||
if ps.config.id == provider_id {
|
||||
return ps.is_registered;
|
||||
return Some(ps_arc.clone());
|
||||
}
|
||||
}
|
||||
false
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,8 +25,7 @@ impl Recorder {
|
||||
) -> Result<Self, String> {
|
||||
// Ensure parent directory exists.
|
||||
if let Some(parent) = Path::new(file_path).parent() {
|
||||
std::fs::create_dir_all(parent)
|
||||
.map_err(|e| format!("create dir: {e}"))?;
|
||||
std::fs::create_dir_all(parent).map_err(|e| format!("create dir: {e}"))?;
|
||||
}
|
||||
|
||||
let sample_rate = 8000u32; // Record at 8kHz (standard telephony)
|
||||
@@ -55,6 +54,59 @@ impl Recorder {
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a recorder that writes raw PCM at a given sample rate.
|
||||
/// Used by tool legs that already have decoded PCM (no RTP processing needed).
|
||||
pub fn new_pcm(
|
||||
file_path: &str,
|
||||
sample_rate: u32,
|
||||
max_duration_ms: Option<u64>,
|
||||
) -> Result<Self, String> {
|
||||
if let Some(parent) = Path::new(file_path).parent() {
|
||||
std::fs::create_dir_all(parent).map_err(|e| format!("create dir: {e}"))?;
|
||||
}
|
||||
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let writer = hound::WavWriter::create(file_path, spec)
|
||||
.map_err(|e| format!("create WAV {file_path}: {e}"))?;
|
||||
|
||||
// source_pt is unused for PCM recording; set to 0.
|
||||
let transcoder = TranscodeState::new().map_err(|e| format!("codec init: {e}"))?;
|
||||
let max_samples = max_duration_ms.map(|ms| (sample_rate as u64 * ms) / 1000);
|
||||
|
||||
Ok(Self {
|
||||
writer,
|
||||
transcoder,
|
||||
source_pt: 0,
|
||||
total_samples: 0,
|
||||
sample_rate,
|
||||
max_samples,
|
||||
file_path: file_path.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Write raw PCM samples directly (no RTP decoding).
|
||||
/// Returns true if recording should continue, false if max duration reached.
|
||||
pub fn write_pcm(&mut self, samples: &[i16]) -> bool {
|
||||
for &sample in samples {
|
||||
if self.writer.write_sample(sample).is_err() {
|
||||
return false;
|
||||
}
|
||||
self.total_samples += 1;
|
||||
if let Some(max) = self.max_samples {
|
||||
if self.total_samples >= max {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Process an incoming RTP packet (full packet with header).
|
||||
/// Returns true if recording should continue, false if max duration reached.
|
||||
pub fn process_rtp(&mut self, data: &[u8]) -> bool {
|
||||
@@ -128,5 +180,8 @@ impl Recorder {
|
||||
pub struct RecordingResult {
|
||||
pub file_path: String,
|
||||
pub duration_ms: u64,
|
||||
// Running-sample total kept for parity with the TS recorder; not yet
|
||||
// surfaced through any event or dashboard field.
|
||||
#[allow(dead_code)]
|
||||
pub total_samples: u64,
|
||||
}
|
||||
|
||||
@@ -19,11 +19,19 @@ const MAX_EXPIRES: u32 = 300;
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RegisteredDevice {
|
||||
pub device_id: String,
|
||||
// These fields are populated at REGISTER time for logging/debugging but are
|
||||
// not read back — device identity flows via the `device_registered` push
|
||||
// event, not via struct queries. Kept behind allow(dead_code) because
|
||||
// removing them would churn handle_register for no runtime benefit.
|
||||
#[allow(dead_code)]
|
||||
pub display_name: String,
|
||||
#[allow(dead_code)]
|
||||
pub extension: String,
|
||||
pub contact_addr: SocketAddr,
|
||||
#[allow(dead_code)]
|
||||
pub registered_at: Instant,
|
||||
pub expires_at: Instant,
|
||||
#[allow(dead_code)]
|
||||
pub aor: String,
|
||||
}
|
||||
|
||||
@@ -52,18 +60,17 @@ impl Registrar {
|
||||
|
||||
/// Try to handle a SIP REGISTER from a device.
|
||||
/// Returns Some(response_bytes) if handled, None if not a known device.
|
||||
pub fn handle_register(
|
||||
&mut self,
|
||||
msg: &SipMessage,
|
||||
from_addr: SocketAddr,
|
||||
) -> Option<Vec<u8>> {
|
||||
pub fn handle_register(&mut self, msg: &SipMessage, from_addr: SocketAddr) -> Option<Vec<u8>> {
|
||||
if msg.method() != Some("REGISTER") {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the device by matching the source IP against expectedAddress.
|
||||
let from_ip = from_addr.ip().to_string();
|
||||
let device = self.devices.iter().find(|d| d.expected_address == from_ip)?;
|
||||
let device = self
|
||||
.devices
|
||||
.iter()
|
||||
.find(|d| d.expected_address == from_ip)?;
|
||||
|
||||
let from_header = msg.get_header("From").unwrap_or("");
|
||||
let aor = SipMessage::extract_uri(from_header)
|
||||
@@ -71,9 +78,7 @@ impl Registrar {
|
||||
.unwrap_or_else(|| format!("sip:{}@{}", device.extension, from_ip));
|
||||
|
||||
let expires_header = msg.get_header("Expires");
|
||||
let requested: u32 = expires_header
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(3600);
|
||||
let requested: u32 = expires_header.and_then(|s| s.parse().ok()).unwrap_or(3600);
|
||||
let expires = requested.min(MAX_EXPIRES);
|
||||
|
||||
let entry = RegisteredDevice {
|
||||
@@ -114,10 +119,7 @@ impl Registrar {
|
||||
Some(ResponseOptions {
|
||||
to_tag: Some(generate_tag()),
|
||||
contact: Some(contact),
|
||||
extra_headers: Some(vec![(
|
||||
"Expires".to_string(),
|
||||
expires.to_string(),
|
||||
)]),
|
||||
extra_headers: Some(vec![("Expires".to_string(), expires.to_string())]),
|
||||
..Default::default()
|
||||
}),
|
||||
);
|
||||
@@ -134,38 +136,11 @@ impl Registrar {
|
||||
Some(entry.contact_addr)
|
||||
}
|
||||
|
||||
/// Check if a source address belongs to a known device.
|
||||
pub fn is_known_device_address(&self, addr: &str) -> bool {
|
||||
self.devices.iter().any(|d| d.expected_address == addr)
|
||||
}
|
||||
|
||||
/// Find a registered device by its source IP address.
|
||||
pub fn find_by_address(&self, addr: &SocketAddr) -> Option<&RegisteredDevice> {
|
||||
let ip = addr.ip().to_string();
|
||||
self.registered.values().find(|e| {
|
||||
e.contact_addr.ip().to_string() == ip && Instant::now() <= e.expires_at
|
||||
})
|
||||
}
|
||||
|
||||
/// Get all device statuses for the dashboard.
|
||||
pub fn get_all_statuses(&self) -> Vec<serde_json::Value> {
|
||||
let now = Instant::now();
|
||||
let mut result = Vec::new();
|
||||
|
||||
for dc in &self.devices {
|
||||
let reg = self.registered.get(&dc.id);
|
||||
let connected = reg.map(|r| now <= r.expires_at).unwrap_or(false);
|
||||
result.push(serde_json::json!({
|
||||
"id": dc.id,
|
||||
"displayName": dc.display_name,
|
||||
"address": reg.filter(|_| connected).map(|r| r.contact_addr.ip().to_string()),
|
||||
"port": reg.filter(|_| connected).map(|r| r.contact_addr.port()),
|
||||
"aor": reg.map(|r| r.aor.as_str()).unwrap_or(""),
|
||||
"connected": connected,
|
||||
"isBrowser": false,
|
||||
}));
|
||||
}
|
||||
|
||||
result
|
||||
self.registered
|
||||
.values()
|
||||
.find(|e| e.contact_addr.ip().to_string() == ip && Instant::now() <= e.expires_at)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
//! RTP port pool and media forwarding.
|
||||
//! RTP port pool for media sockets.
|
||||
//!
|
||||
//! Manages a pool of even-numbered UDP ports for RTP media.
|
||||
//! Each port gets a bound tokio UdpSocket. Supports:
|
||||
//! - Direct forwarding (SIP-to-SIP, no transcoding)
|
||||
//! - Transcoding forwarding (via codec-lib, e.g. G.722 ↔ Opus)
|
||||
//! - Silence generation
|
||||
//! - NAT priming
|
||||
//! Manages a pool of even-numbered UDP ports for RTP media. `allocate()`
|
||||
//! hands back an `Arc<UdpSocket>` to the caller (stored on the owning
|
||||
//! `LegInfo`), while the pool itself keeps only a `Weak<UdpSocket>`. When
|
||||
//! the call terminates and `LegInfo` is dropped, the strong refcount
|
||||
//! reaches zero, the socket is closed, and `allocate()` prunes the dead
|
||||
//! weak ref the next time it scans that slot — so the port automatically
|
||||
//! becomes available for reuse without any explicit `release()` plumbing.
|
||||
//!
|
||||
//! Ported from ts/call/rtp-port-pool.ts + sip-leg.ts RTP handling.
|
||||
//! This fixes the previous leak where the pool held `Arc<UdpSocket>` and
|
||||
//! `release()` was never called, eventually exhausting the port range and
|
||||
//! causing "503 Service Unavailable" on new calls.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Weak};
|
||||
use tokio::net::UdpSocket;
|
||||
|
||||
/// A single RTP port allocation.
|
||||
@@ -24,7 +26,7 @@ pub struct RtpAllocation {
|
||||
pub struct RtpPortPool {
|
||||
min: u16,
|
||||
max: u16,
|
||||
allocated: HashMap<u16, Arc<UdpSocket>>,
|
||||
allocated: HashMap<u16, Weak<UdpSocket>>,
|
||||
}
|
||||
|
||||
impl RtpPortPool {
|
||||
@@ -41,11 +43,19 @@ impl RtpPortPool {
|
||||
pub async fn allocate(&mut self) -> Option<RtpAllocation> {
|
||||
let mut port = self.min;
|
||||
while port < self.max {
|
||||
// Prune a dead weak ref at this slot: if the last strong Arc
|
||||
// (held by the owning LegInfo) was dropped when the call ended,
|
||||
// the socket is already closed and the slot is free again.
|
||||
if let Some(weak) = self.allocated.get(&port) {
|
||||
if weak.strong_count() == 0 {
|
||||
self.allocated.remove(&port);
|
||||
}
|
||||
}
|
||||
if !self.allocated.contains_key(&port) {
|
||||
match UdpSocket::bind(format!("0.0.0.0:{port}")).await {
|
||||
Ok(sock) => {
|
||||
let sock = Arc::new(sock);
|
||||
self.allocated.insert(port, sock.clone());
|
||||
self.allocated.insert(port, Arc::downgrade(&sock));
|
||||
return Some(RtpAllocation { port, socket: sock });
|
||||
}
|
||||
Err(_) => {
|
||||
@@ -57,83 +67,6 @@ impl RtpPortPool {
|
||||
}
|
||||
None // Pool exhausted.
|
||||
}
|
||||
|
||||
/// Release a port back to the pool.
|
||||
pub fn release(&mut self, port: u16) {
|
||||
self.allocated.remove(&port);
|
||||
// Socket is dropped when the last Arc reference goes away.
|
||||
}
|
||||
|
||||
pub fn size(&self) -> usize {
|
||||
self.allocated.len()
|
||||
}
|
||||
|
||||
pub fn capacity(&self) -> usize {
|
||||
((self.max - self.min) / 2) as usize
|
||||
}
|
||||
}
|
||||
|
||||
/// An active RTP relay between two endpoints.
|
||||
/// Receives on `local_socket` and forwards to `remote_addr`.
|
||||
pub struct RtpRelay {
|
||||
pub local_port: u16,
|
||||
pub local_socket: Arc<UdpSocket>,
|
||||
pub remote_addr: Option<SocketAddr>,
|
||||
/// If set, transcode packets using this codec session before forwarding.
|
||||
pub transcode: Option<TranscodeConfig>,
|
||||
/// Packets received counter.
|
||||
pub pkt_received: u64,
|
||||
/// Packets sent counter.
|
||||
pub pkt_sent: u64,
|
||||
}
|
||||
|
||||
pub struct TranscodeConfig {
|
||||
pub from_pt: u8,
|
||||
pub to_pt: u8,
|
||||
pub session_id: String,
|
||||
}
|
||||
|
||||
impl RtpRelay {
|
||||
pub fn new(port: u16, socket: Arc<UdpSocket>) -> Self {
|
||||
Self {
|
||||
local_port: port,
|
||||
local_socket: socket,
|
||||
remote_addr: None,
|
||||
transcode: None,
|
||||
pkt_received: 0,
|
||||
pkt_sent: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_remote(&mut self, addr: SocketAddr) {
|
||||
self.remote_addr = Some(addr);
|
||||
}
|
||||
}
|
||||
|
||||
/// Send a 1-byte NAT priming packet to open a pinhole.
|
||||
pub async fn prime_nat(socket: &UdpSocket, remote: SocketAddr) {
|
||||
let _ = socket.send_to(&[0u8], remote).await;
|
||||
}
|
||||
|
||||
/// Build an RTP silence frame for PCMU (payload type 0).
|
||||
pub fn silence_frame_pcmu() -> Vec<u8> {
|
||||
// 12-byte RTP header + 160 bytes of µ-law silence (0xFF)
|
||||
let mut frame = vec![0u8; 172];
|
||||
frame[0] = 0x80; // V=2
|
||||
frame[1] = 0; // PT=0 (PCMU)
|
||||
// seq, timestamp, ssrc left as 0 — caller should set these
|
||||
frame[12..].fill(0xFF); // µ-law silence
|
||||
frame
|
||||
}
|
||||
|
||||
/// Build an RTP silence frame for G.722 (payload type 9).
|
||||
pub fn silence_frame_g722() -> Vec<u8> {
|
||||
// 12-byte RTP header + 160 bytes of G.722 silence
|
||||
let mut frame = vec![0u8; 172];
|
||||
frame[0] = 0x80; // V=2
|
||||
frame[1] = 9; // PT=9 (G.722)
|
||||
// G.722 silence: all zeros is valid silence
|
||||
frame
|
||||
}
|
||||
|
||||
/// Build an RTP header with the given parameters.
|
||||
@@ -149,10 +82,15 @@ pub fn build_rtp_header(pt: u8, seq: u16, timestamp: u32, ssrc: u32) -> [u8; 12]
|
||||
|
||||
/// Get the RTP clock increment per 20ms frame for a payload type.
|
||||
pub fn rtp_clock_increment(pt: u8) -> u32 {
|
||||
rtp_clock_rate(pt) / 50
|
||||
}
|
||||
|
||||
/// Get the RTP clock rate for a payload type.
|
||||
pub fn rtp_clock_rate(pt: u8) -> u32 {
|
||||
match pt {
|
||||
9 => 160, // G.722: 8000 Hz clock rate (despite 16kHz audio) × 0.02s
|
||||
0 | 8 => 160, // PCMU/PCMA: 8000 × 0.02
|
||||
111 => 960, // Opus: 48000 × 0.02
|
||||
_ => 160,
|
||||
9 => 8000, // G.722 uses an 8kHz RTP clock despite 16kHz audio.
|
||||
0 | 8 => 8000, // PCMU/PCMA
|
||||
111 => 48000, // Opus
|
||||
_ => 8000,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ use sip_proto::helpers::{
|
||||
};
|
||||
use sip_proto::message::{RequestOptions, SipMessage};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
|
||||
/// State of a SIP leg.
|
||||
@@ -40,6 +39,9 @@ pub struct SipLegConfig {
|
||||
/// SIP target endpoint (provider outbound proxy or device address).
|
||||
pub sip_target: SocketAddr,
|
||||
/// Provider credentials (for 407 auth).
|
||||
// username is carried for parity with the provider config but digest auth
|
||||
// rebuilds the username from the registered AOR, so this slot is never read.
|
||||
#[allow(dead_code)]
|
||||
pub username: Option<String>,
|
||||
pub password: Option<String>,
|
||||
pub registered_aor: Option<String>,
|
||||
@@ -51,6 +53,10 @@ pub struct SipLegConfig {
|
||||
|
||||
/// A SIP leg with full dialog management.
|
||||
pub struct SipLeg {
|
||||
// Leg identity is tracked via the enclosing LegInfo's key in the call's
|
||||
// leg map; SipLeg itself never reads this field back. Kept to preserve
|
||||
// the (id, config) constructor shape used by the call manager.
|
||||
#[allow(dead_code)]
|
||||
pub id: String,
|
||||
pub state: LegState,
|
||||
pub config: SipLegConfig,
|
||||
@@ -102,6 +108,24 @@ impl SipLeg {
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
self.send_invite_with_sdp(from_uri, to_uri, sip_call_id, socket, sdp)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub async fn send_invite_with_sdp(
|
||||
&mut self,
|
||||
from_uri: &str,
|
||||
to_uri: &str,
|
||||
sip_call_id: &str,
|
||||
socket: &UdpSocket,
|
||||
sdp: String,
|
||||
) {
|
||||
let ip = self
|
||||
.config
|
||||
.public_ip
|
||||
.as_deref()
|
||||
.unwrap_or(&self.config.lan_ip);
|
||||
|
||||
let invite = SipMessage::create_request(
|
||||
"INVITE",
|
||||
to_uri,
|
||||
@@ -122,17 +146,24 @@ impl SipLeg {
|
||||
max_forwards: Some(70),
|
||||
body: Some(sdp),
|
||||
content_type: Some("application/sdp".to_string()),
|
||||
extra_headers: Some(vec![
|
||||
("User-Agent".to_string(), "SipRouter/1.0".to_string()),
|
||||
]),
|
||||
extra_headers: Some(vec![(
|
||||
"User-Agent".to_string(),
|
||||
"SipRouter/1.0".to_string(),
|
||||
)]),
|
||||
},
|
||||
);
|
||||
|
||||
self.dialog = Some(SipDialog::from_uac_invite(&invite, ip, self.config.lan_port));
|
||||
self.dialog = Some(SipDialog::from_uac_invite(
|
||||
&invite,
|
||||
ip,
|
||||
self.config.lan_port,
|
||||
));
|
||||
self.invite = Some(invite.clone());
|
||||
self.state = LegState::Inviting;
|
||||
|
||||
let _ = socket.send_to(&invite.serialize(), self.config.sip_target).await;
|
||||
let _ = socket
|
||||
.send_to(&invite.serialize(), self.config.sip_target)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Handle an incoming SIP message routed to this leg.
|
||||
@@ -388,6 +419,10 @@ impl SipLeg {
|
||||
return SipLegAction::Send(ok.serialize());
|
||||
}
|
||||
|
||||
if method == "INVITE" || method == "UPDATE" {
|
||||
return SipLegAction::InDialogRequest(method.to_string());
|
||||
}
|
||||
|
||||
SipLegAction::None
|
||||
}
|
||||
|
||||
@@ -411,11 +446,6 @@ impl SipLeg {
|
||||
dialog.terminate();
|
||||
Some(msg.serialize())
|
||||
}
|
||||
|
||||
/// Get the SIP Call-ID for routing.
|
||||
pub fn sip_call_id(&self) -> Option<&str> {
|
||||
self.dialog.as_ref().map(|d| d.call_id.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
/// Actions produced by the SipLeg message handler.
|
||||
@@ -428,6 +458,9 @@ pub enum SipLegAction {
|
||||
StateChange(LegState),
|
||||
/// Connected — send this ACK.
|
||||
ConnectedWithAck(Vec<u8>),
|
||||
/// Provider sent an in-dialog request (re-INVITE / UPDATE) that needs
|
||||
/// call-manager-specific handling.
|
||||
InDialogRequest(String),
|
||||
/// Terminated with a reason.
|
||||
Terminated(String),
|
||||
/// Send 200 OK and terminate.
|
||||
@@ -442,10 +475,7 @@ pub enum SipLegAction {
|
||||
/// Build an ACK for a non-2xx response (same transaction as the INVITE).
|
||||
fn build_non_2xx_ack(original_invite: &SipMessage, response: &SipMessage) -> SipMessage {
|
||||
let via = original_invite.get_header("Via").unwrap_or("").to_string();
|
||||
let from = original_invite
|
||||
.get_header("From")
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let from = original_invite.get_header("From").unwrap_or("").to_string();
|
||||
let to = response.get_header("To").unwrap_or("").to_string();
|
||||
let call_id = original_invite.call_id().to_string();
|
||||
let cseq_num: u32 = original_invite
|
||||
|
||||
@@ -27,27 +27,9 @@ impl SipTransport {
|
||||
self.socket.clone()
|
||||
}
|
||||
|
||||
/// Send a raw SIP message to a destination.
|
||||
pub async fn send_to(&self, data: &[u8], dest: SocketAddr) -> Result<usize, String> {
|
||||
self.socket
|
||||
.send_to(data, dest)
|
||||
.await
|
||||
.map_err(|e| format!("send to {dest}: {e}"))
|
||||
}
|
||||
|
||||
/// Send a raw SIP message to an address:port pair.
|
||||
pub async fn send_to_addr(&self, data: &[u8], addr: &str, port: u16) -> Result<usize, String> {
|
||||
let dest: SocketAddr = format!("{addr}:{port}")
|
||||
.parse()
|
||||
.map_err(|e| format!("bad address {addr}:{port}: {e}"))?;
|
||||
self.send_to(data, dest).await
|
||||
}
|
||||
|
||||
/// Spawn the UDP receive loop. Calls the handler for every received packet.
|
||||
pub fn spawn_receiver<F>(
|
||||
&self,
|
||||
handler: F,
|
||||
) where
|
||||
pub fn spawn_receiver<F>(&self, handler: F)
|
||||
where
|
||||
F: Fn(&[u8], SocketAddr) + Send + 'static,
|
||||
{
|
||||
let socket = self.socket.clone();
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
//! Tool leg consumers — background tasks that process per-source unmerged audio.
|
||||
//!
|
||||
//! Tool legs are observer legs that receive individual audio streams from each
|
||||
//! participant in a call. The mixer pipes `ToolAudioBatch` every 20ms containing
|
||||
//! each participant's decoded PCM@48kHz f32 tagged with source leg ID.
|
||||
//!
|
||||
//! Consumers:
|
||||
//! - **Recording**: writes per-source WAV files for speaker-separated recording.
|
||||
//! - **Transcription**: stub for future Whisper integration (accumulates audio in Rust).
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::mixer::ToolAudioBatch;
|
||||
use crate::recorder::Recorder;
|
||||
use std::collections::HashMap;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Recording consumer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Spawn a recording tool leg that writes per-source WAV files.
|
||||
///
|
||||
/// Returns the channel sender (for the mixer to send batches) and the task handle.
|
||||
/// When the channel is closed (tool leg removed), all WAV files are finalized
|
||||
/// and a `tool_recording_done` event is emitted.
|
||||
pub fn spawn_recording_tool(
|
||||
tool_leg_id: String,
|
||||
call_id: String,
|
||||
base_dir: String,
|
||||
out_tx: OutTx,
|
||||
) -> (mpsc::Sender<ToolAudioBatch>, JoinHandle<()>) {
|
||||
let (tx, mut rx) = mpsc::channel::<ToolAudioBatch>(64);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let mut recorders: HashMap<String, Recorder> = HashMap::new();
|
||||
|
||||
while let Some(batch) = rx.recv().await {
|
||||
for source in &batch.sources {
|
||||
// Skip silence-only frames (near-zero = no audio activity).
|
||||
let has_audio = source.pcm_48k.iter().any(|&s| s.abs() > 1e-6);
|
||||
if !has_audio && !recorders.contains_key(&source.leg_id) {
|
||||
continue; // Don't create a file for silence-only sources.
|
||||
}
|
||||
|
||||
let recorder = recorders.entry(source.leg_id.clone()).or_insert_with(|| {
|
||||
let path = format!("{}/{}-{}.wav", base_dir, call_id, source.leg_id);
|
||||
Recorder::new_pcm(&path, 48000, None).unwrap_or_else(|e| {
|
||||
panic!("failed to create recorder for {}: {e}", source.leg_id);
|
||||
})
|
||||
});
|
||||
|
||||
// Convert f32 [-1.0, 1.0] to i16 for WAV writing.
|
||||
let pcm_i16: Vec<i16> = source
|
||||
.pcm_48k
|
||||
.iter()
|
||||
.map(|&s| (s * 32767.0).round().clamp(-32768.0, 32767.0) as i16)
|
||||
.collect();
|
||||
if !recorder.write_pcm(&pcm_i16) {
|
||||
// Max duration reached — stop recording this source.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Channel closed — finalize all recordings.
|
||||
let mut files = Vec::new();
|
||||
for (leg_id, rec) in recorders {
|
||||
let result = rec.stop();
|
||||
files.push(serde_json::json!({
|
||||
"source_leg_id": leg_id,
|
||||
"file_path": result.file_path,
|
||||
"duration_ms": result.duration_ms,
|
||||
}));
|
||||
}
|
||||
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"tool_recording_done",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"tool_leg_id": tool_leg_id,
|
||||
"files": files,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
(tx, handle)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Transcription consumer (stub — real plumbing, stub consumer)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Spawn a transcription tool leg.
|
||||
///
|
||||
/// The plumbing is fully real: it receives per-source unmerged PCM@48kHz f32 from
|
||||
/// the mixer every 20ms. The consumer is a stub that accumulates audio and
|
||||
/// reports metadata on close. Future: will stream to a Whisper HTTP endpoint.
|
||||
pub fn spawn_transcription_tool(
|
||||
tool_leg_id: String,
|
||||
call_id: String,
|
||||
out_tx: OutTx,
|
||||
) -> (mpsc::Sender<ToolAudioBatch>, JoinHandle<()>) {
|
||||
let (tx, mut rx) = mpsc::channel::<ToolAudioBatch>(64);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
// Track per-source sample counts for duration reporting.
|
||||
let mut source_samples: HashMap<String, u64> = HashMap::new();
|
||||
|
||||
while let Some(batch) = rx.recv().await {
|
||||
for source in &batch.sources {
|
||||
*source_samples.entry(source.leg_id.clone()).or_insert(0) +=
|
||||
source.pcm_48k.len() as u64;
|
||||
|
||||
// TODO: Future — accumulate chunks and stream to Whisper endpoint.
|
||||
// For now, the audio is received and counted but not processed.
|
||||
}
|
||||
}
|
||||
|
||||
// Channel closed — report metadata.
|
||||
let sources: Vec<serde_json::Value> = source_samples
|
||||
.iter()
|
||||
.map(|(leg_id, samples)| {
|
||||
serde_json::json!({
|
||||
"source_leg_id": leg_id,
|
||||
"duration_ms": (samples * 1000) / 48000,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"tool_transcription_done",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"tool_leg_id": tool_leg_id,
|
||||
"sources": sources,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
(tx, handle)
|
||||
}
|
||||
@@ -0,0 +1,392 @@
|
||||
//! Text-to-speech engine — synthesizes text to WAV files using Kokoro neural TTS.
|
||||
//!
|
||||
//! The model is loaded lazily on first use. If the model/voices files are not
|
||||
//! present, the generate command returns an error and the caller skips the prompt.
|
||||
//!
|
||||
//! Caching is handled internally via a `.meta` sidecar file next to each WAV.
|
||||
//! When `cacheable` is true, the engine checks whether the existing WAV was
|
||||
//! generated from the same text+voice; if so it returns immediately (cache hit).
|
||||
//! Callers never need to check for cached files — that is entirely this module's
|
||||
//! responsibility.
|
||||
|
||||
use crate::audio_player::pcm_to_mix_frames;
|
||||
use kokoro_tts::{KokoroTts, Voice};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use tokio::sync::{mpsc, watch};
|
||||
|
||||
pub const DEFAULT_MODEL_PATH: &str = ".nogit/tts/kokoro-v1.0.onnx";
|
||||
pub const DEFAULT_VOICES_PATH: &str = ".nogit/tts/voices.bin";
|
||||
const TTS_OUTPUT_RATE: u32 = 24000;
|
||||
const MAX_CHUNK_CHARS: usize = 220;
|
||||
const MIN_CHUNK_CHARS: usize = 80;
|
||||
|
||||
pub enum TtsStreamMessage {
|
||||
Frames(Vec<Vec<f32>>),
|
||||
Finished,
|
||||
Failed(String),
|
||||
}
|
||||
|
||||
pub struct TtsLivePrompt {
|
||||
pub initial_frames: Vec<Vec<f32>>,
|
||||
pub stream_rx: mpsc::Receiver<TtsStreamMessage>,
|
||||
pub cancel_tx: watch::Sender<bool>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TtsPromptRequest {
|
||||
pub model_path: String,
|
||||
pub voices_path: String,
|
||||
pub voice_name: String,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
/// Wraps the Kokoro TTS engine with lazy model loading.
|
||||
pub struct TtsEngine {
|
||||
tts: Option<Arc<KokoroTts>>,
|
||||
/// Path that was used to load the current model (for cache invalidation).
|
||||
loaded_model_path: String,
|
||||
loaded_voices_path: String,
|
||||
/// On-disk TTS WAVs are cacheable only within a single engine lifetime.
|
||||
/// Every restart gets a new generation token, so prior process outputs are
|
||||
/// treated as stale and regenerated on first use.
|
||||
cache_generation: String,
|
||||
}
|
||||
|
||||
impl TtsEngine {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
tts: None,
|
||||
loaded_model_path: String::new(),
|
||||
loaded_voices_path: String::new(),
|
||||
cache_generation: SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos().to_string())
|
||||
.unwrap_or_else(|_| "0".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_loaded(
|
||||
&mut self,
|
||||
model_path: &str,
|
||||
voices_path: &str,
|
||||
) -> Result<Arc<KokoroTts>, String> {
|
||||
if !Path::new(model_path).exists() {
|
||||
return Err(format!("model not found: {model_path}"));
|
||||
}
|
||||
if !Path::new(voices_path).exists() {
|
||||
return Err(format!("voices not found: {voices_path}"));
|
||||
}
|
||||
|
||||
if self.tts.is_none()
|
||||
|| self.loaded_model_path != model_path
|
||||
|| self.loaded_voices_path != voices_path
|
||||
{
|
||||
eprintln!("[tts] loading model: {model_path}");
|
||||
let tts = Arc::new(
|
||||
KokoroTts::new(model_path, voices_path)
|
||||
.await
|
||||
.map_err(|e| format!("model load failed: {e:?}"))?,
|
||||
);
|
||||
self.tts = Some(tts);
|
||||
self.loaded_model_path = model_path.to_string();
|
||||
self.loaded_voices_path = voices_path.to_string();
|
||||
}
|
||||
|
||||
Ok(self.tts.as_ref().unwrap().clone())
|
||||
}
|
||||
|
||||
pub async fn start_live_prompt(
|
||||
&mut self,
|
||||
request: TtsPromptRequest,
|
||||
) -> Result<TtsLivePrompt, String> {
|
||||
if request.text.trim().is_empty() {
|
||||
return Err("empty text".into());
|
||||
}
|
||||
|
||||
let tts = self
|
||||
.ensure_loaded(&request.model_path, &request.voices_path)
|
||||
.await?;
|
||||
let voice = select_voice(&request.voice_name);
|
||||
let chunks = chunk_text(&request.text);
|
||||
if chunks.is_empty() {
|
||||
return Err("empty text".into());
|
||||
}
|
||||
|
||||
let initial_frames = synth_text_to_mix_frames(&tts, chunks[0].as_str(), voice).await?;
|
||||
let remaining_chunks: Vec<String> = chunks.into_iter().skip(1).collect();
|
||||
let (stream_tx, stream_rx) = mpsc::channel(8);
|
||||
let (cancel_tx, cancel_rx) = watch::channel(false);
|
||||
|
||||
tokio::spawn(async move {
|
||||
stream_live_prompt_chunks(tts, voice, remaining_chunks, stream_tx, cancel_rx).await;
|
||||
});
|
||||
|
||||
Ok(TtsLivePrompt {
|
||||
initial_frames,
|
||||
stream_rx,
|
||||
cancel_tx,
|
||||
})
|
||||
}
|
||||
|
||||
/// Generate a WAV file from text.
|
||||
///
|
||||
/// Params (from IPC JSON):
|
||||
/// - `model`: path to the ONNX model file
|
||||
/// - `voices`: path to the voices.bin file
|
||||
/// - `voice`: voice name (e.g. "af_bella")
|
||||
/// - `text`: text to synthesize
|
||||
/// - `output`: output WAV file path
|
||||
/// - `cacheable`: if true, skip synthesis when the output WAV already
|
||||
/// matches the same text+voice (checked via a `.meta` sidecar file)
|
||||
pub async fn generate(
|
||||
&mut self,
|
||||
params: &serde_json::Value,
|
||||
) -> Result<serde_json::Value, String> {
|
||||
let model_path = params
|
||||
.get("model")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing 'model' param")?;
|
||||
let voices_path = params
|
||||
.get("voices")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing 'voices' param")?;
|
||||
let voice_name = params
|
||||
.get("voice")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("af_bella");
|
||||
let text = params
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing 'text' param")?;
|
||||
let output_path = params
|
||||
.get("output")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or("missing 'output' param")?;
|
||||
let cacheable = params
|
||||
.get("cacheable")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
if text.is_empty() {
|
||||
return Err("empty text".into());
|
||||
}
|
||||
|
||||
// Cache check: if cacheable and the sidecar matches, return immediately.
|
||||
if cacheable && self.is_cache_hit(output_path, text, voice_name) {
|
||||
eprintln!("[tts] cache hit: {output_path}");
|
||||
return Ok(serde_json::json!({ "output": output_path }));
|
||||
}
|
||||
|
||||
// Ensure parent directory exists.
|
||||
if let Some(parent) = Path::new(output_path).parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
|
||||
let tts = self.ensure_loaded(model_path, voices_path).await?;
|
||||
let voice = select_voice(voice_name);
|
||||
|
||||
eprintln!("[tts] synthesizing WAV voice '{voice_name}' to {output_path}");
|
||||
let (samples, duration) = tts
|
||||
.synth(text, voice)
|
||||
.await
|
||||
.map_err(|e| format!("synthesis failed: {e:?}"))?;
|
||||
eprintln!(
|
||||
"[tts] synthesized {} samples in {duration:?}",
|
||||
samples.len()
|
||||
);
|
||||
|
||||
// Write 24kHz 16-bit mono WAV.
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate: 24000,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let mut writer = hound::WavWriter::create(output_path, spec)
|
||||
.map_err(|e| format!("WAV create failed: {e}"))?;
|
||||
for &sample in &samples {
|
||||
let s16 = (sample * 32767.0).round().clamp(-32768.0, 32767.0) as i16;
|
||||
writer
|
||||
.write_sample(s16)
|
||||
.map_err(|e| format!("WAV write: {e}"))?;
|
||||
}
|
||||
writer
|
||||
.finalize()
|
||||
.map_err(|e| format!("WAV finalize: {e}"))?;
|
||||
|
||||
// Write sidecar for future cache checks.
|
||||
if cacheable {
|
||||
self.write_cache_meta(output_path, text, voice_name);
|
||||
}
|
||||
|
||||
eprintln!("[tts] wrote {output_path}");
|
||||
Ok(serde_json::json!({ "output": output_path }))
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Cache helpers
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/// Check if the WAV + sidecar on disk match the given text+voice.
|
||||
fn is_cache_hit(&self, output_path: &str, text: &str, voice: &str) -> bool {
|
||||
let meta_path = format!("{output_path}.meta");
|
||||
if !Path::new(output_path).exists() || !Path::new(&meta_path).exists() {
|
||||
return false;
|
||||
}
|
||||
match std::fs::read_to_string(&meta_path) {
|
||||
Ok(contents) => contents == self.cache_key(text, voice),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the sidecar `.meta` file next to the WAV.
|
||||
fn write_cache_meta(&self, output_path: &str, text: &str, voice: &str) {
|
||||
let meta_path = format!("{output_path}.meta");
|
||||
let _ = std::fs::write(&meta_path, self.cache_key(text, voice));
|
||||
}
|
||||
|
||||
/// Build the cache key from process generation + text + voice.
|
||||
fn cache_key(&self, text: &str, voice: &str) -> String {
|
||||
format!("{}\0{}\0{}", self.cache_generation, text, voice)
|
||||
}
|
||||
}
|
||||
|
||||
async fn synth_text_to_mix_frames(
|
||||
tts: &Arc<KokoroTts>,
|
||||
text: &str,
|
||||
voice: Voice,
|
||||
) -> Result<Vec<Vec<f32>>, String> {
|
||||
let (samples, duration) = tts
|
||||
.synth(text, voice)
|
||||
.await
|
||||
.map_err(|e| format!("synthesis failed: {e:?}"))?;
|
||||
eprintln!(
|
||||
"[tts] synthesized chunk ({} chars, {} samples) in {duration:?}",
|
||||
text.chars().count(),
|
||||
samples.len()
|
||||
);
|
||||
pcm_to_mix_frames(&samples, TTS_OUTPUT_RATE)
|
||||
}
|
||||
|
||||
async fn stream_live_prompt_chunks(
|
||||
tts: Arc<KokoroTts>,
|
||||
voice: Voice,
|
||||
chunks: Vec<String>,
|
||||
stream_tx: mpsc::Sender<TtsStreamMessage>,
|
||||
mut cancel_rx: watch::Receiver<bool>,
|
||||
) {
|
||||
for chunk in chunks {
|
||||
if *cancel_rx.borrow() {
|
||||
break;
|
||||
}
|
||||
|
||||
match synth_text_to_mix_frames(&tts, &chunk, voice).await {
|
||||
Ok(frames) => {
|
||||
if *cancel_rx.borrow() {
|
||||
break;
|
||||
}
|
||||
if stream_tx.send(TtsStreamMessage::Frames(frames)).await.is_err() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
let _ = stream_tx.send(TtsStreamMessage::Failed(error)).await;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if cancel_rx.has_changed().unwrap_or(false) && *cancel_rx.borrow_and_update() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let _ = stream_tx.send(TtsStreamMessage::Finished).await;
|
||||
}
|
||||
|
||||
fn chunk_text(text: &str) -> Vec<String> {
|
||||
let mut chunks = Vec::new();
|
||||
let mut current = String::new();
|
||||
|
||||
for ch in text.chars() {
|
||||
current.push(ch);
|
||||
|
||||
let len = current.chars().count();
|
||||
let hard_split = len >= MAX_CHUNK_CHARS && (ch.is_whitespace() || is_soft_boundary(ch));
|
||||
let natural_split = len >= MIN_CHUNK_CHARS && is_sentence_boundary(ch);
|
||||
|
||||
if natural_split || hard_split {
|
||||
push_chunk(&mut chunks, &mut current);
|
||||
}
|
||||
}
|
||||
|
||||
push_chunk(&mut chunks, &mut current);
|
||||
|
||||
if chunks.len() >= 2 {
|
||||
let last_len = chunks.last().unwrap().chars().count();
|
||||
if last_len < (MIN_CHUNK_CHARS / 2) {
|
||||
let tail = chunks.pop().unwrap();
|
||||
if let Some(prev) = chunks.last_mut() {
|
||||
prev.push(' ');
|
||||
prev.push_str(tail.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
fn push_chunk(chunks: &mut Vec<String>, current: &mut String) {
|
||||
let trimmed = current.trim();
|
||||
if !trimmed.is_empty() {
|
||||
chunks.push(trimmed.to_string());
|
||||
}
|
||||
current.clear();
|
||||
}
|
||||
|
||||
fn is_sentence_boundary(ch: char) -> bool {
|
||||
matches!(ch, '.' | '!' | '?' | '\n' | ';' | ':')
|
||||
}
|
||||
|
||||
fn is_soft_boundary(ch: char) -> bool {
|
||||
matches!(ch, ',' | ';' | ':' | ')' | ']' | '\n')
|
||||
}
|
||||
|
||||
/// Map voice name string to Kokoro Voice enum variant.
|
||||
fn select_voice(name: &str) -> Voice {
|
||||
match name {
|
||||
"af_bella" => Voice::AfBella(1.0),
|
||||
"af_heart" => Voice::AfHeart(1.0),
|
||||
"af_jessica" => Voice::AfJessica(1.0),
|
||||
"af_nicole" => Voice::AfNicole(1.0),
|
||||
"af_nova" => Voice::AfNova(1.0),
|
||||
"af_sarah" => Voice::AfSarah(1.0),
|
||||
"af_sky" => Voice::AfSky(1.0),
|
||||
"af_river" => Voice::AfRiver(1.0),
|
||||
"af_alloy" => Voice::AfAlloy(1.0),
|
||||
"af_aoede" => Voice::AfAoede(1.0),
|
||||
"af_kore" => Voice::AfKore(1.0),
|
||||
"am_adam" => Voice::AmAdam(1.0),
|
||||
"am_echo" => Voice::AmEcho(1.0),
|
||||
"am_eric" => Voice::AmEric(1.0),
|
||||
"am_fenrir" => Voice::AmFenrir(1.0),
|
||||
"am_liam" => Voice::AmLiam(1.0),
|
||||
"am_michael" => Voice::AmMichael(1.0),
|
||||
"am_onyx" => Voice::AmOnyx(1.0),
|
||||
"am_puck" => Voice::AmPuck(1.0),
|
||||
"bf_alice" => Voice::BfAlice(1.0),
|
||||
"bf_emma" => Voice::BfEmma(1.0),
|
||||
"bf_isabella" => Voice::BfIsabella(1.0),
|
||||
"bf_lily" => Voice::BfLily(1.0),
|
||||
"bm_daniel" => Voice::BmDaniel(1.0),
|
||||
"bm_fable" => Voice::BmFable(1.0),
|
||||
"bm_george" => Voice::BmGeorge(1.0),
|
||||
"bm_lewis" => Voice::BmLewis(1.0),
|
||||
_ => {
|
||||
eprintln!("[tts] unknown voice '{name}', falling back to af_bella");
|
||||
Voice::AfBella(1.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@ pub async fn run_voicemail_session(
|
||||
rtp_socket: Arc<UdpSocket>,
|
||||
provider_media: SocketAddr,
|
||||
codec_pt: u8,
|
||||
voicebox_id: Option<String>,
|
||||
greeting_wav: Option<String>,
|
||||
recording_path: String,
|
||||
max_recording_ms: u64,
|
||||
@@ -33,6 +34,7 @@ pub async fn run_voicemail_session(
|
||||
"voicemail_started",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"voicebox_id": voicebox_id,
|
||||
"caller_number": caller_number,
|
||||
}),
|
||||
);
|
||||
@@ -102,6 +104,7 @@ pub async fn run_voicemail_session(
|
||||
"recording_done",
|
||||
serde_json::json!({
|
||||
"call_id": call_id,
|
||||
"voicebox_id": voicebox_id,
|
||||
"file_path": result.file_path,
|
||||
"duration_ms": result.duration_ms,
|
||||
"caller_number": caller_number,
|
||||
@@ -128,8 +131,8 @@ async fn record_from_socket(
|
||||
break; // Max duration reached.
|
||||
}
|
||||
}
|
||||
Ok(Err(_)) => break, // Socket error (closed).
|
||||
Err(_) => break, // Timeout (max duration + grace).
|
||||
Ok(Err(_)) => break, // Socket error (closed).
|
||||
Err(_) => break, // Timeout (max duration + grace).
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
//! WebRTC engine — manages browser PeerConnections with SIP audio bridging.
|
||||
//! WebRTC engine — manages browser PeerConnections.
|
||||
//!
|
||||
//! Browser Opus audio → Rust PeerConnection → transcode via codec-lib → SIP RTP
|
||||
//! SIP RTP → transcode via codec-lib → Rust PeerConnection → Browser Opus
|
||||
//! Audio bridging is now channel-based:
|
||||
//! - Browser Opus audio → on_track → mixer inbound channel
|
||||
//! - Mixer outbound channel → Opus RTP → TrackLocalStaticRTP → browser
|
||||
//!
|
||||
//! The mixer handles all transcoding. The WebRTC engine just shuttles raw Opus.
|
||||
|
||||
use crate::ipc::{emit_event, OutTx};
|
||||
use crate::rtp::{build_rtp_header, rtp_clock_increment};
|
||||
use codec_lib::{TranscodeState, PT_G722, PT_OPUS};
|
||||
use crate::mixer::RtpPacket;
|
||||
use codec_lib::PT_OPUS;
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
use webrtc::api::media_engine::MediaEngine;
|
||||
use webrtc::api::APIBuilder;
|
||||
use webrtc::ice_transport::ice_candidate::RTCIceCandidateInit;
|
||||
@@ -22,26 +23,14 @@ use webrtc::rtp_transceiver::rtp_codec::RTCRtpCodecCapability;
|
||||
use webrtc::track::track_local::track_local_static_rtp::TrackLocalStaticRTP;
|
||||
use webrtc::track::track_local::{TrackLocal, TrackLocalWriter};
|
||||
|
||||
/// SIP-side bridge info for a WebRTC session.
|
||||
#[derive(Clone)]
|
||||
pub struct SipBridgeInfo {
|
||||
/// Provider's media endpoint (RTP destination).
|
||||
pub provider_media: SocketAddr,
|
||||
/// Provider's codec payload type (e.g. 9 for G.722).
|
||||
pub sip_pt: u8,
|
||||
/// The allocated RTP socket for bidirectional audio with the provider.
|
||||
/// This is the socket whose port was advertised in SDP, so the provider
|
||||
/// sends RTP here and expects RTP from this port.
|
||||
pub rtp_socket: Arc<UdpSocket>,
|
||||
}
|
||||
|
||||
/// A managed WebRTC session.
|
||||
struct WebRtcSession {
|
||||
pc: Arc<RTCPeerConnection>,
|
||||
local_track: Arc<TrackLocalStaticRTP>,
|
||||
call_id: Option<String>,
|
||||
/// SIP bridge — set when the session is linked to a call.
|
||||
sip_bridge: Arc<Mutex<Option<SipBridgeInfo>>>,
|
||||
/// Channel sender for forwarding browser Opus audio to the mixer.
|
||||
/// Set when the session is linked to a call via link_to_mixer().
|
||||
mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>>,
|
||||
}
|
||||
|
||||
/// Manages all WebRTC sessions.
|
||||
@@ -58,7 +47,7 @@ impl WebRtcEngine {
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle a WebRTC offer from a browser.
|
||||
/// Handle a WebRTC offer from a browser — create PeerConnection, return SDP answer.
|
||||
pub async fn handle_offer(
|
||||
&mut self,
|
||||
session_id: &str,
|
||||
@@ -69,9 +58,7 @@ impl WebRtcEngine {
|
||||
.register_default_codecs()
|
||||
.map_err(|e| format!("register codecs: {e}"))?;
|
||||
|
||||
let api = APIBuilder::new()
|
||||
.with_media_engine(media_engine)
|
||||
.build();
|
||||
let api = APIBuilder::new().with_media_engine(media_engine).build();
|
||||
|
||||
let config = RTCConfiguration {
|
||||
ice_servers: vec![],
|
||||
@@ -101,8 +88,8 @@ impl WebRtcEngine {
|
||||
.await
|
||||
.map_err(|e| format!("add track: {e}"))?;
|
||||
|
||||
// Shared SIP bridge info (populated when linked to a call).
|
||||
let sip_bridge: Arc<Mutex<Option<SipBridgeInfo>>> = Arc::new(Mutex::new(None));
|
||||
// Shared mixer channel sender (populated when linked to a call).
|
||||
let mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>> = Arc::new(Mutex::new(None));
|
||||
|
||||
// ICE candidate handler.
|
||||
let out_tx_ice = self.out_tx.clone();
|
||||
@@ -153,14 +140,14 @@ impl WebRtcEngine {
|
||||
}));
|
||||
|
||||
// Track handler — receives Opus audio from the browser.
|
||||
// When SIP bridge is set, transcodes and forwards to provider.
|
||||
// Forwards raw Opus payload to the mixer channel (when linked).
|
||||
let out_tx_track = self.out_tx.clone();
|
||||
let sid_track = session_id.to_string();
|
||||
let sip_bridge_for_track = sip_bridge.clone();
|
||||
let mixer_tx_for_track = mixer_tx.clone();
|
||||
pc.on_track(Box::new(move |track, _receiver, _transceiver| {
|
||||
let out_tx = out_tx_track.clone();
|
||||
let sid = sid_track.clone();
|
||||
let bridge = sip_bridge_for_track.clone();
|
||||
let mixer_tx = mixer_tx_for_track.clone();
|
||||
Box::pin(async move {
|
||||
let codec_info = track.codec();
|
||||
emit_event(
|
||||
@@ -173,8 +160,8 @@ impl WebRtcEngine {
|
||||
}),
|
||||
);
|
||||
|
||||
// Spawn the browser→SIP audio forwarding task.
|
||||
tokio::spawn(browser_to_sip_loop(track, bridge, out_tx, sid));
|
||||
// Spawn browser→mixer forwarding task.
|
||||
tokio::spawn(browser_to_mixer_loop(track, mixer_tx, out_tx, sid));
|
||||
})
|
||||
}));
|
||||
|
||||
@@ -201,43 +188,41 @@ impl WebRtcEngine {
|
||||
pc,
|
||||
local_track,
|
||||
call_id: None,
|
||||
sip_bridge,
|
||||
mixer_tx,
|
||||
},
|
||||
);
|
||||
|
||||
Ok(answer_sdp)
|
||||
}
|
||||
|
||||
/// Link a WebRTC session to a SIP call — sets up bidirectional audio bridge.
|
||||
/// - Browser→SIP: already running via on_track handler, will start forwarding
|
||||
/// once bridge info is set.
|
||||
/// - SIP→Browser: spawned here, reads from the RTP socket and sends to browser.
|
||||
pub async fn link_to_sip(
|
||||
/// Link a WebRTC session to a call's mixer via channels.
|
||||
/// - `inbound_tx`: browser audio goes TO the mixer through this channel
|
||||
/// - `outbound_rx`: mixed audio comes FROM the mixer through this channel
|
||||
pub async fn link_to_mixer(
|
||||
&mut self,
|
||||
session_id: &str,
|
||||
call_id: &str,
|
||||
bridge_info: SipBridgeInfo,
|
||||
inbound_tx: mpsc::Sender<RtpPacket>,
|
||||
outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
) -> bool {
|
||||
if let Some(session) = self.sessions.get_mut(session_id) {
|
||||
session.call_id = Some(call_id.to_string());
|
||||
let session = match self.sessions.get_mut(session_id) {
|
||||
Some(s) => s,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
// Spawn SIP → browser audio loop (provider RTP → transcode → Opus → WebRTC track).
|
||||
let local_track = session.local_track.clone();
|
||||
let rtp_socket = bridge_info.rtp_socket.clone();
|
||||
let sip_pt = bridge_info.sip_pt;
|
||||
let out_tx = self.out_tx.clone();
|
||||
let sid = session_id.to_string();
|
||||
tokio::spawn(sip_to_browser_loop(
|
||||
rtp_socket, local_track, sip_pt, out_tx, sid,
|
||||
));
|
||||
session.call_id = Some(call_id.to_string());
|
||||
|
||||
// Set bridge info — this unblocks the browser→SIP loop (already running).
|
||||
let mut bridge = session.sip_bridge.lock().await;
|
||||
*bridge = Some(bridge_info);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
// Set the mixer sender so the on_track loop starts forwarding.
|
||||
{
|
||||
let mut tx = session.mixer_tx.lock().await;
|
||||
*tx = Some(inbound_tx);
|
||||
}
|
||||
|
||||
// Spawn mixer→browser outbound task.
|
||||
let local_track = session.local_track.clone();
|
||||
tokio::spawn(mixer_to_browser_loop(outbound_rx, local_track));
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
pub async fn add_ice_candidate(
|
||||
@@ -268,94 +253,59 @@ impl WebRtcEngine {
|
||||
|
||||
pub async fn close_session(&mut self, session_id: &str) -> Result<(), String> {
|
||||
if let Some(session) = self.sessions.remove(session_id) {
|
||||
session.pc.close().await.map_err(|e| format!("close: {e}"))?;
|
||||
session
|
||||
.pc
|
||||
.close()
|
||||
.await
|
||||
.map_err(|e| format!("close: {e}"))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn has_session(&self, session_id: &str) -> bool {
|
||||
self.sessions.contains_key(session_id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Browser → SIP audio forwarding loop.
|
||||
/// Reads Opus RTP from the browser, transcodes to the SIP codec, sends to provider.
|
||||
async fn browser_to_sip_loop(
|
||||
/// Browser → Mixer audio forwarding loop.
|
||||
/// Reads Opus RTP from the browser track, sends raw Opus payload to the mixer channel.
|
||||
async fn browser_to_mixer_loop(
|
||||
track: Arc<webrtc::track::track_remote::TrackRemote>,
|
||||
sip_bridge: Arc<Mutex<Option<SipBridgeInfo>>>,
|
||||
mixer_tx: Arc<Mutex<Option<mpsc::Sender<RtpPacket>>>>,
|
||||
out_tx: OutTx,
|
||||
session_id: String,
|
||||
) {
|
||||
// Create a persistent codec state for this direction.
|
||||
let mut transcoder = match TranscodeState::new() {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_error",
|
||||
serde_json::json!({ "session_id": session_id, "error": format!("codec init: {e}") }),
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let mut buf = vec![0u8; 1500];
|
||||
let mut count = 0u64;
|
||||
let mut to_sip_seq: u16 = 0;
|
||||
let mut to_sip_ts: u32 = 0;
|
||||
let to_sip_ssrc: u32 = rand::random();
|
||||
|
||||
loop {
|
||||
match track.read(&mut buf).await {
|
||||
Ok((rtp_packet, _attributes)) => {
|
||||
count += 1;
|
||||
|
||||
// Get the SIP bridge info (may not be set yet if call isn't linked).
|
||||
let bridge = sip_bridge.lock().await;
|
||||
let bridge_info = match bridge.as_ref() {
|
||||
Some(b) => b.clone(),
|
||||
None => continue, // Not linked to a SIP call yet — drop the packet.
|
||||
};
|
||||
drop(bridge); // Release lock before doing I/O.
|
||||
|
||||
// Extract Opus payload from the RTP packet (skip 12-byte header).
|
||||
let payload = &rtp_packet.payload;
|
||||
if payload.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Transcode Opus → SIP codec (e.g. G.722).
|
||||
let sip_payload = match transcoder.transcode(
|
||||
payload,
|
||||
PT_OPUS,
|
||||
bridge_info.sip_pt,
|
||||
Some("to_sip"),
|
||||
) {
|
||||
Ok(p) if !p.is_empty() => p,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Build SIP RTP packet.
|
||||
let header = build_rtp_header(bridge_info.sip_pt, to_sip_seq, to_sip_ts, to_sip_ssrc);
|
||||
let mut sip_rtp = header.to_vec();
|
||||
sip_rtp.extend_from_slice(&sip_payload);
|
||||
|
||||
to_sip_seq = to_sip_seq.wrapping_add(1);
|
||||
to_sip_ts = to_sip_ts.wrapping_add(rtp_clock_increment(bridge_info.sip_pt));
|
||||
|
||||
// Send to provider via the RTP socket (correct source port matching our SDP).
|
||||
let _ = bridge_info
|
||||
.rtp_socket
|
||||
.send_to(&sip_rtp, bridge_info.provider_media)
|
||||
.await;
|
||||
// Send raw Opus payload to mixer (if linked).
|
||||
let tx = mixer_tx.lock().await;
|
||||
if let Some(ref tx) = *tx {
|
||||
let _ = tx
|
||||
.send(RtpPacket {
|
||||
payload: payload.to_vec(),
|
||||
payload_type: PT_OPUS,
|
||||
marker: rtp_packet.header.marker,
|
||||
seq: rtp_packet.header.sequence_number,
|
||||
timestamp: rtp_packet.header.timestamp,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
drop(tx);
|
||||
|
||||
if count == 1 || count == 50 || count % 500 == 0 {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_audio_tx",
|
||||
"webrtc_audio_rx",
|
||||
serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"direction": "browser_to_sip",
|
||||
"direction": "browser_to_mixer",
|
||||
"packet_count": count,
|
||||
}),
|
||||
);
|
||||
@@ -366,85 +316,13 @@ async fn browser_to_sip_loop(
|
||||
}
|
||||
}
|
||||
|
||||
/// SIP → Browser audio forwarding loop.
|
||||
/// Reads RTP from the provider (via the allocated RTP socket), transcodes to Opus,
|
||||
/// and writes to the WebRTC local track for delivery to the browser.
|
||||
async fn sip_to_browser_loop(
|
||||
rtp_socket: Arc<UdpSocket>,
|
||||
/// Mixer → Browser audio forwarding loop.
|
||||
/// Reads Opus-encoded RTP packets from the mixer and writes to the WebRTC track.
|
||||
async fn mixer_to_browser_loop(
|
||||
mut outbound_rx: mpsc::Receiver<Vec<u8>>,
|
||||
local_track: Arc<TrackLocalStaticRTP>,
|
||||
sip_pt: u8,
|
||||
out_tx: OutTx,
|
||||
session_id: String,
|
||||
) {
|
||||
let mut transcoder = match TranscodeState::new() {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_error",
|
||||
serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"error": format!("sip_to_browser codec init: {e}"),
|
||||
}),
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let mut buf = vec![0u8; 1500];
|
||||
let mut count = 0u64;
|
||||
let mut seq: u16 = 0;
|
||||
let mut ts: u32 = 0;
|
||||
let ssrc: u32 = rand::random();
|
||||
|
||||
loop {
|
||||
match rtp_socket.recv_from(&mut buf).await {
|
||||
Ok((n, _from)) => {
|
||||
if n < 12 {
|
||||
continue; // Too small for RTP header.
|
||||
}
|
||||
count += 1;
|
||||
|
||||
// Extract payload (skip 12-byte RTP header).
|
||||
let payload = &buf[12..n];
|
||||
if payload.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Transcode SIP codec → Opus.
|
||||
let opus_payload = match transcoder.transcode(
|
||||
payload,
|
||||
sip_pt,
|
||||
PT_OPUS,
|
||||
Some("sip_to_browser"),
|
||||
) {
|
||||
Ok(p) if !p.is_empty() => p,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Build Opus RTP packet.
|
||||
let header = build_rtp_header(PT_OPUS, seq, ts, ssrc);
|
||||
let mut packet = header.to_vec();
|
||||
packet.extend_from_slice(&opus_payload);
|
||||
|
||||
seq = seq.wrapping_add(1);
|
||||
ts = ts.wrapping_add(960); // Opus: 48000 Hz × 20ms = 960 samples
|
||||
|
||||
let _ = local_track.write(&packet).await;
|
||||
|
||||
if count == 1 || count == 50 || count % 500 == 0 {
|
||||
emit_event(
|
||||
&out_tx,
|
||||
"webrtc_audio_rx",
|
||||
serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"direction": "sip_to_browser",
|
||||
"packet_count": count,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(_) => break, // Socket closed.
|
||||
}
|
||||
while let Some(rtp_data) = outbound_rx.recv().await {
|
||||
let _ = local_track.write(&rtp_data).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,9 +51,7 @@ impl SipDialog {
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(generate_tag),
|
||||
remote_tag: None,
|
||||
local_uri: SipMessage::extract_uri(from)
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
local_uri: SipMessage::extract_uri(from).unwrap_or("").to_string(),
|
||||
remote_uri: SipMessage::extract_uri(to).unwrap_or("").to_string(),
|
||||
local_cseq,
|
||||
remote_cseq: 0,
|
||||
@@ -181,10 +179,7 @@ impl SipDialog {
|
||||
format!("<{}>{remote_tag_str}", self.remote_uri),
|
||||
),
|
||||
("Call-ID".to_string(), self.call_id.clone()),
|
||||
(
|
||||
"CSeq".to_string(),
|
||||
format!("{} {method}", self.local_cseq),
|
||||
),
|
||||
("CSeq".to_string(), format!("{} {method}", self.local_cseq)),
|
||||
("Max-Forwards".to_string(), "70".to_string()),
|
||||
];
|
||||
|
||||
@@ -243,10 +238,7 @@ impl SipDialog {
|
||||
format!("<{}>{remote_tag_str}", self.remote_uri),
|
||||
),
|
||||
("Call-ID".to_string(), self.call_id.clone()),
|
||||
(
|
||||
"CSeq".to_string(),
|
||||
format!("{} ACK", self.local_cseq),
|
||||
),
|
||||
("CSeq".to_string(), format!("{} ACK", self.local_cseq)),
|
||||
("Max-Forwards".to_string(), "70".to_string()),
|
||||
];
|
||||
|
||||
@@ -271,10 +263,7 @@ impl SipDialog {
|
||||
("From".to_string(), from),
|
||||
("To".to_string(), to),
|
||||
("Call-ID".to_string(), self.call_id.clone()),
|
||||
(
|
||||
"CSeq".to_string(),
|
||||
format!("{} CANCEL", self.local_cseq),
|
||||
),
|
||||
("CSeq".to_string(), format!("{} CANCEL", self.local_cseq)),
|
||||
("Max-Forwards".to_string(), "70".to_string()),
|
||||
("Content-Length".to_string(), "0".to_string()),
|
||||
];
|
||||
@@ -284,11 +273,7 @@ impl SipDialog {
|
||||
.unwrap_or(&self.remote_target)
|
||||
.to_string();
|
||||
|
||||
SipMessage::new(
|
||||
format!("CANCEL {ruri} SIP/2.0"),
|
||||
headers,
|
||||
String::new(),
|
||||
)
|
||||
SipMessage::new(format!("CANCEL {ruri} SIP/2.0"), headers, String::new())
|
||||
}
|
||||
|
||||
/// Transition the dialog to terminated state.
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
use md5::{Digest, Md5};
|
||||
use rand::Rng;
|
||||
|
||||
use crate::{Endpoint, SdpMediaKind};
|
||||
|
||||
// ---- ID generators ---------------------------------------------------------
|
||||
|
||||
/// Generate a random SIP Call-ID (32 hex chars).
|
||||
@@ -27,7 +29,9 @@ pub fn generate_branch() -> String {
|
||||
|
||||
fn random_hex(bytes: usize) -> String {
|
||||
let mut rng = rand::thread_rng();
|
||||
(0..bytes).map(|_| format!("{:02x}", rng.gen::<u8>())).collect()
|
||||
(0..bytes)
|
||||
.map(|_| format!("{:02x}", rng.gen::<u8>()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ---- Codec registry --------------------------------------------------------
|
||||
@@ -53,6 +57,9 @@ pub struct SdpOptions<'a> {
|
||||
pub ip: &'a str,
|
||||
pub port: u16,
|
||||
pub payload_types: &'a [u8],
|
||||
pub media_kind: SdpMediaKind,
|
||||
pub transport: &'a str,
|
||||
pub media_formats: &'a [&'a str],
|
||||
pub session_id: Option<&'a str>,
|
||||
pub session_name: Option<&'a str>,
|
||||
pub direction: Option<&'a str>,
|
||||
@@ -65,6 +72,9 @@ impl<'a> Default for SdpOptions<'a> {
|
||||
ip: "0.0.0.0",
|
||||
port: 0,
|
||||
payload_types: &[9, 0, 8, 101],
|
||||
media_kind: SdpMediaKind::Audio,
|
||||
transport: "RTP/AVP",
|
||||
media_formats: &[],
|
||||
session_id: None,
|
||||
session_name: None,
|
||||
direction: None,
|
||||
@@ -81,7 +91,14 @@ pub fn build_sdp(opts: &SdpOptions) -> String {
|
||||
.unwrap_or_else(|| format!("{}", rand::thread_rng().gen_range(0..1_000_000_000u64)));
|
||||
let session_name = opts.session_name.unwrap_or("-");
|
||||
let direction = opts.direction.unwrap_or("sendrecv");
|
||||
let pts: Vec<String> = opts.payload_types.iter().map(|pt| pt.to_string()).collect();
|
||||
let media_formats: Vec<String> = if !opts.media_formats.is_empty() {
|
||||
opts.media_formats
|
||||
.iter()
|
||||
.map(|fmt| fmt.to_string())
|
||||
.collect()
|
||||
} else {
|
||||
opts.payload_types.iter().map(|pt| pt.to_string()).collect()
|
||||
};
|
||||
|
||||
let mut lines = vec![
|
||||
"v=0".to_string(),
|
||||
@@ -89,16 +106,24 @@ pub fn build_sdp(opts: &SdpOptions) -> String {
|
||||
format!("s={session_name}"),
|
||||
format!("c=IN IP4 {}", opts.ip),
|
||||
"t=0 0".to_string(),
|
||||
format!("m=audio {} RTP/AVP {}", opts.port, pts.join(" ")),
|
||||
format!(
|
||||
"m={} {} {} {}",
|
||||
opts.media_kind.as_sdp_token(),
|
||||
opts.port,
|
||||
opts.transport,
|
||||
media_formats.join(" ")
|
||||
),
|
||||
];
|
||||
|
||||
for &pt in opts.payload_types {
|
||||
let name = codec_name(pt);
|
||||
if name != "unknown" {
|
||||
lines.push(format!("a=rtpmap:{pt} {name}"));
|
||||
}
|
||||
if pt == 101 {
|
||||
lines.push("a=fmtp:101 0-16".to_string());
|
||||
if opts.media_kind == SdpMediaKind::Audio {
|
||||
for &pt in opts.payload_types {
|
||||
let name = codec_name(pt);
|
||||
if name != "unknown" {
|
||||
lines.push(format!("a=rtpmap:{pt} {name}"));
|
||||
}
|
||||
if pt == 101 {
|
||||
lines.push("a=fmtp:101 0-16".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,7 +167,9 @@ pub fn parse_digest_challenge(header: &str) -> Option<DigestChallenge> {
|
||||
return Some(after[1..1 + end].to_string());
|
||||
}
|
||||
// Unquoted value.
|
||||
let end = after.find(|c: char| c == ',' || c.is_whitespace()).unwrap_or(after.len());
|
||||
let end = after
|
||||
.find(|c: char| c == ',' || c.is_whitespace())
|
||||
.unwrap_or(after.len());
|
||||
return Some(after[..end].to_string());
|
||||
}
|
||||
None
|
||||
@@ -195,30 +222,62 @@ pub fn compute_digest_auth(
|
||||
|
||||
// ---- SDP parser ------------------------------------------------------------
|
||||
|
||||
use crate::Endpoint;
|
||||
|
||||
/// Parse the audio media port and connection address from an SDP body.
|
||||
/// Parse the preferred media endpoint from an SDP body.
|
||||
///
|
||||
/// Audio `m=` lines are preferred when present so existing RTP call flows keep
|
||||
/// their current behavior. If no audio section exists, the first media section
|
||||
/// is returned, which allows T.38-only SDP offers/answers to be represented.
|
||||
pub fn parse_sdp_endpoint(sdp: &str) -> Option<Endpoint> {
|
||||
let mut addr: Option<&str> = None;
|
||||
let mut port: Option<u16> = None;
|
||||
let mut preferred: Option<(SdpMediaKind, u16, Option<u8>, String)> = None;
|
||||
let mut fallback: Option<(SdpMediaKind, u16, Option<u8>, String)> = None;
|
||||
|
||||
let normalized = sdp.replace("\r\n", "\n");
|
||||
for raw in normalized.split('\n') {
|
||||
let line = raw.trim();
|
||||
if let Some(rest) = line.strip_prefix("c=IN IP4 ") {
|
||||
addr = Some(rest.trim());
|
||||
} else if let Some(rest) = line.strip_prefix("m=audio ") {
|
||||
let parts: Vec<&str> = rest.split_whitespace().collect();
|
||||
} else if let Some(rest) = line.strip_prefix("m=") {
|
||||
// m=<media> <port> <transport> <fmt1> [<fmt2> ...]
|
||||
let mut media_and_rest = rest.splitn(2, ' ');
|
||||
let media = media_and_rest.next().unwrap_or("");
|
||||
let remainder = media_and_rest.next().unwrap_or("");
|
||||
let media_kind = SdpMediaKind::from_sdp_token(media);
|
||||
if media_kind == SdpMediaKind::Unknown {
|
||||
continue;
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = remainder.split_whitespace().collect();
|
||||
if !parts.is_empty() {
|
||||
port = parts[0].parse().ok();
|
||||
if let Ok(port) = parts[0].parse() {
|
||||
let transport = parts.get(1).copied().unwrap_or("").to_string();
|
||||
let codec_pt = if media_kind == SdpMediaKind::Audio && parts.len() > 2 {
|
||||
parts[2].parse::<u8>().ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let candidate = (media_kind, port, codec_pt, transport);
|
||||
if fallback.is_none() {
|
||||
fallback = Some(candidate.clone());
|
||||
}
|
||||
if media_kind == SdpMediaKind::Audio {
|
||||
preferred = Some(candidate);
|
||||
} else if preferred.is_none() {
|
||||
preferred = Some(candidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match (addr, port) {
|
||||
(Some(a), Some(p)) => Some(Endpoint {
|
||||
match (addr, preferred.or(fallback)) {
|
||||
(Some(a), Some((media_kind, port, codec_pt, transport))) => Some(Endpoint {
|
||||
address: a.to_string(),
|
||||
port: p,
|
||||
port,
|
||||
codec_pt,
|
||||
media_kind,
|
||||
transport,
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
@@ -233,11 +292,7 @@ pub struct MwiResult {
|
||||
pub extra_headers: Vec<(String, String)>,
|
||||
}
|
||||
|
||||
pub fn build_mwi_body(
|
||||
new_messages: u32,
|
||||
old_messages: u32,
|
||||
account_uri: &str,
|
||||
) -> MwiResult {
|
||||
pub fn build_mwi_body(new_messages: u32, old_messages: u32, account_uri: &str) -> MwiResult {
|
||||
let waiting = if new_messages > 0 { "yes" } else { "no" };
|
||||
let body = format!(
|
||||
"Messages-Waiting: {waiting}\r\n\
|
||||
@@ -319,6 +374,40 @@ mod tests {
|
||||
let ep = parse_sdp_endpoint(sdp).unwrap();
|
||||
assert_eq!(ep.address, "10.0.0.1");
|
||||
assert_eq!(ep.port, 5060);
|
||||
assert_eq!(ep.media_kind, SdpMediaKind::Audio);
|
||||
assert_eq!(ep.transport, "RTP/AVP");
|
||||
assert!(ep.is_audio_rtp());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_t38_sdp_endpoint() {
|
||||
let sdp = concat!(
|
||||
"v=0\r\n",
|
||||
"c=IN IP4 203.0.113.9\r\n",
|
||||
"m=image 4000 udptl t38\r\n",
|
||||
"a=T38FaxVersion:0\r\n",
|
||||
);
|
||||
let ep = parse_sdp_endpoint(sdp).unwrap();
|
||||
assert_eq!(ep.address, "203.0.113.9");
|
||||
assert_eq!(ep.port, 4000);
|
||||
assert_eq!(ep.media_kind, SdpMediaKind::Image);
|
||||
assert_eq!(ep.transport, "udptl");
|
||||
assert!(ep.is_t38_udptl());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_t38_sdp() {
|
||||
let sdp = build_sdp(&SdpOptions {
|
||||
ip: "192.168.1.1",
|
||||
port: 4000,
|
||||
media_kind: SdpMediaKind::Image,
|
||||
transport: "udptl",
|
||||
media_formats: &["t38"],
|
||||
attributes: &["T38FaxVersion:0"],
|
||||
..Default::default()
|
||||
});
|
||||
assert!(sdp.contains("m=image 4000 udptl t38"));
|
||||
assert!(sdp.contains("a=T38FaxVersion:0"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -4,14 +4,59 @@
|
||||
//! SDP handling, Digest authentication, and URI rewriting.
|
||||
//! Ported from the TypeScript `ts/sip/` library.
|
||||
|
||||
pub mod message;
|
||||
pub mod dialog;
|
||||
pub mod helpers;
|
||||
pub mod message;
|
||||
pub mod rewrite;
|
||||
|
||||
/// Network endpoint (address + port).
|
||||
/// Network endpoint (address + port + optional negotiated codec).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Endpoint {
|
||||
pub address: String,
|
||||
pub port: u16,
|
||||
/// First payload type from the SDP `m=audio` line (the preferred codec).
|
||||
pub codec_pt: Option<u8>,
|
||||
/// SDP media kind from the `m=` line.
|
||||
pub media_kind: SdpMediaKind,
|
||||
/// SDP transport token from the `m=` line (e.g. `RTP/AVP`, `udptl`).
|
||||
pub transport: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum SdpMediaKind {
|
||||
Audio,
|
||||
Image,
|
||||
Application,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl SdpMediaKind {
|
||||
pub fn as_sdp_token(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Audio => "audio",
|
||||
Self::Image => "image",
|
||||
Self::Application => "application",
|
||||
Self::Unknown => "unknown",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_sdp_token(token: &str) -> Self {
|
||||
match token.to_ascii_lowercase().as_str() {
|
||||
"audio" => Self::Audio,
|
||||
"image" => Self::Image,
|
||||
"application" => Self::Application,
|
||||
_ => Self::Unknown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Endpoint {
|
||||
pub fn is_audio_rtp(&self) -> bool {
|
||||
self.media_kind == SdpMediaKind::Audio
|
||||
&& self.transport.to_ascii_uppercase().starts_with("RTP/")
|
||||
}
|
||||
|
||||
pub fn is_t38_udptl(&self) -> bool {
|
||||
self.media_kind == SdpMediaKind::Image && self.transport.eq_ignore_ascii_case("udptl")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,11 @@ pub struct SipMessage {
|
||||
|
||||
impl SipMessage {
|
||||
pub fn new(start_line: String, headers: Vec<(String, String)>, body: String) -> Self {
|
||||
Self { start_line, headers, body }
|
||||
Self {
|
||||
start_line,
|
||||
headers,
|
||||
body,
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Parsing -----------------------------------------------------------
|
||||
@@ -175,7 +179,8 @@ impl SipMessage {
|
||||
|
||||
/// Inserts a header at the top of the header list.
|
||||
pub fn prepend_header(&mut self, name: &str, value: &str) -> &mut Self {
|
||||
self.headers.insert(0, (name.to_string(), value.to_string()));
|
||||
self.headers
|
||||
.insert(0, (name.to_string(), value.to_string()));
|
||||
self
|
||||
}
|
||||
|
||||
@@ -233,10 +238,7 @@ impl SipMessage {
|
||||
.to_display_name
|
||||
.map(|d| format!("\"{d}\" "))
|
||||
.unwrap_or_default();
|
||||
let to_tag_str = opts
|
||||
.to_tag
|
||||
.map(|t| format!(";tag={t}"))
|
||||
.unwrap_or_default();
|
||||
let to_tag_str = opts.to_tag.map(|t| format!(";tag={t}")).unwrap_or_default();
|
||||
|
||||
let mut headers = vec![
|
||||
(
|
||||
@@ -364,7 +366,43 @@ impl SipMessage {
|
||||
.find(|c: char| c == ';' || c == '>')
|
||||
.unwrap_or(trimmed.len());
|
||||
let result = &trimmed[..end];
|
||||
if result.is_empty() { None } else { Some(result) }
|
||||
if result.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the user part from a SIP/TEL URI or header value.
|
||||
pub fn extract_uri_user(uri_or_header_value: &str) -> Option<&str> {
|
||||
let raw = Self::extract_uri(uri_or_header_value).unwrap_or(uri_or_header_value);
|
||||
let raw = raw.trim();
|
||||
if raw.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let user_part = if raw
|
||||
.get(..5)
|
||||
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("sips:"))
|
||||
{
|
||||
&raw[5..]
|
||||
} else if raw.get(..4).is_some_and(|prefix| {
|
||||
prefix.eq_ignore_ascii_case("sip:") || prefix.eq_ignore_ascii_case("tel:")
|
||||
}) {
|
||||
&raw[4..]
|
||||
} else {
|
||||
raw
|
||||
};
|
||||
|
||||
let end = user_part
|
||||
.find(|c: char| matches!(c, '@' | ';' | '?' | '>'))
|
||||
.unwrap_or(user_part.len());
|
||||
let result = &user_part[..end];
|
||||
if result.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -506,6 +544,19 @@ mod tests {
|
||||
SipMessage::extract_uri("\"Name\" <sip:user@host>;tag=abc"),
|
||||
Some("sip:user@host")
|
||||
);
|
||||
assert_eq!(
|
||||
SipMessage::extract_uri_user("\"Name\" <sip:+49 421 219694@host>;tag=abc"),
|
||||
Some("+49 421 219694")
|
||||
);
|
||||
assert_eq!(
|
||||
SipMessage::extract_uri_user("sip:0049421219694@voip.easybell.de"),
|
||||
Some("0049421219694")
|
||||
);
|
||||
assert_eq!(
|
||||
SipMessage::extract_uri_user("tel:+49421219694;phone-context=example.com"),
|
||||
Some("+49421219694")
|
||||
);
|
||||
assert_eq!(SipMessage::extract_uri_user("SIP:user@host"), Some("user"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -535,7 +586,10 @@ mod tests {
|
||||
);
|
||||
assert_eq!(invite.method(), Some("INVITE"));
|
||||
assert_eq!(invite.call_id(), "test-123");
|
||||
assert!(invite.get_header("Via").unwrap().contains("192.168.1.1:5070"));
|
||||
assert!(invite
|
||||
.get_header("Via")
|
||||
.unwrap()
|
||||
.contains("192.168.1.1:5070"));
|
||||
|
||||
let response = SipMessage::create_response(
|
||||
200,
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
//!
|
||||
//! Ported from ts/sip/rewrite.ts.
|
||||
|
||||
use crate::Endpoint;
|
||||
use crate::{Endpoint, SdpMediaKind};
|
||||
|
||||
/// Replaces the host:port in every `sip:` / `sips:` URI found in `value`.
|
||||
pub fn rewrite_sip_uri(value: &str, host: &str, port: u16) -> String {
|
||||
@@ -57,12 +57,12 @@ pub fn rewrite_sip_uri(value: &str, host: &str, port: u16) -> String {
|
||||
result
|
||||
}
|
||||
|
||||
/// Rewrites the connection address (`c=`) and audio media port (`m=audio`)
|
||||
/// in an SDP body. Returns the rewritten body together with the original
|
||||
/// endpoint that was replaced (if any).
|
||||
/// Rewrites the connection address (`c=`) and first supported media port
|
||||
/// (`m=audio`, `m=image`, `m=application`) in an SDP body. Returns the
|
||||
/// rewritten body together with the original endpoint that was replaced (if any).
|
||||
pub fn rewrite_sdp(body: &str, ip: &str, port: u16) -> (String, Option<Endpoint>) {
|
||||
let mut orig_addr: Option<String> = None;
|
||||
let mut orig_port: Option<u16> = None;
|
||||
let mut orig_media: Option<(SdpMediaKind, u16, String)> = None;
|
||||
|
||||
let lines: Vec<String> = body
|
||||
.replace("\r\n", "\n")
|
||||
@@ -71,10 +71,25 @@ pub fn rewrite_sdp(body: &str, ip: &str, port: u16) -> (String, Option<Endpoint>
|
||||
if let Some(rest) = line.strip_prefix("c=IN IP4 ") {
|
||||
orig_addr = Some(rest.trim().to_string());
|
||||
format!("c=IN IP4 {ip}")
|
||||
} else if line.starts_with("m=audio ") {
|
||||
} else if line.starts_with("m=audio ")
|
||||
|| line.starts_with("m=image ")
|
||||
|| line.starts_with("m=application ")
|
||||
{
|
||||
let parts: Vec<&str> = line.split(' ').collect();
|
||||
if parts.len() >= 2 {
|
||||
orig_port = parts[1].parse().ok();
|
||||
let media_kind = parts[0]
|
||||
.strip_prefix("m=")
|
||||
.map(SdpMediaKind::from_sdp_token)
|
||||
.unwrap_or(SdpMediaKind::Unknown);
|
||||
if orig_media.is_none() {
|
||||
orig_media = parts[1].parse().ok().map(|orig_port| {
|
||||
(
|
||||
media_kind,
|
||||
orig_port,
|
||||
parts.get(2).copied().unwrap_or("").to_string(),
|
||||
)
|
||||
});
|
||||
}
|
||||
let mut rebuilt = parts[0].to_string();
|
||||
rebuilt.push(' ');
|
||||
rebuilt.push_str(&port.to_string());
|
||||
@@ -91,8 +106,14 @@ pub fn rewrite_sdp(body: &str, ip: &str, port: u16) -> (String, Option<Endpoint>
|
||||
})
|
||||
.collect();
|
||||
|
||||
let original = match (orig_addr, orig_port) {
|
||||
(Some(a), Some(p)) => Some(Endpoint { address: a, port: p }),
|
||||
let original = match (orig_addr, orig_media) {
|
||||
(Some(a), Some((media_kind, p, transport))) => Some(Endpoint {
|
||||
address: a,
|
||||
port: p,
|
||||
codec_pt: None,
|
||||
media_kind,
|
||||
transport,
|
||||
}),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
@@ -126,5 +147,19 @@ mod tests {
|
||||
let ep = orig.unwrap();
|
||||
assert_eq!(ep.address, "10.0.0.1");
|
||||
assert_eq!(ep.port, 5060);
|
||||
assert_eq!(ep.transport, "RTP/AVP");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rewrite_t38_sdp() {
|
||||
let sdp = "v=0\r\nc=IN IP4 10.0.0.1\r\nm=image 5060 udptl t38\r\na=T38FaxVersion:0\r\n";
|
||||
let (rewritten, orig) = rewrite_sdp(sdp, "192.168.1.1", 4000);
|
||||
assert!(rewritten.contains("c=IN IP4 192.168.1.1"));
|
||||
assert!(rewritten.contains("m=image 4000 udptl t38"));
|
||||
let ep = orig.unwrap();
|
||||
assert_eq!(ep.address, "10.0.0.1");
|
||||
assert_eq!(ep.port, 5060);
|
||||
assert_eq!(ep.media_kind, SdpMediaKind::Image);
|
||||
assert_eq!(ep.transport, "udptl");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
[package]
|
||||
name = "tts-engine"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "tts-engine"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
kokoro-tts = { version = "0.3", default-features = false }
|
||||
# Pin to rc.11 matching kokoro-tts's expectation; enable vendored TLS to avoid system libssl-dev.
|
||||
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
|
||||
"std", "download-binaries", "copy-dylibs", "ndarray",
|
||||
"tls-native-vendored"
|
||||
] }
|
||||
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
|
||||
hound = "3.5"
|
||||
@@ -1,149 +0,0 @@
|
||||
/// TTS engine CLI — synthesizes text to a WAV file using Kokoro neural TTS.
|
||||
///
|
||||
/// Usage:
|
||||
/// echo "Hello world" | tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav
|
||||
/// tts-engine --model kokoro-v1.0.onnx --voices voices.bin --output out.wav --text "Hello world"
|
||||
///
|
||||
/// Outputs 24kHz 16-bit mono WAV.
|
||||
|
||||
use kokoro_tts::{KokoroTts, Voice};
|
||||
use std::io::{self, Read};
|
||||
|
||||
fn parse_args() -> Result<(String, String, String, String, Option<String>), String> {
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
let mut model = String::new();
|
||||
let mut voices = String::new();
|
||||
let mut output = String::new();
|
||||
let mut text: Option<String> = None;
|
||||
let mut voice_name: Option<String> = None;
|
||||
|
||||
let mut i = 1;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--model" => { i += 1; model = args.get(i).cloned().unwrap_or_default(); }
|
||||
"--voices" => { i += 1; voices = args.get(i).cloned().unwrap_or_default(); }
|
||||
"--output" | "--output_file" => { i += 1; output = args.get(i).cloned().unwrap_or_default(); }
|
||||
"--text" => { i += 1; text = args.get(i).cloned(); }
|
||||
"--voice" => { i += 1; voice_name = args.get(i).cloned(); }
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
if model.is_empty() { return Err("--model required".into()); }
|
||||
if voices.is_empty() { return Err("--voices required".into()); }
|
||||
if output.is_empty() { return Err("--output required".into()); }
|
||||
|
||||
let voice_str = voice_name.unwrap_or_else(|| "af_bella".into());
|
||||
|
||||
Ok((model, voices, output, voice_str, text))
|
||||
}
|
||||
|
||||
fn select_voice(name: &str) -> Voice {
|
||||
match name {
|
||||
"af_bella" => Voice::AfBella(1.0),
|
||||
"af_heart" => Voice::AfHeart(1.0),
|
||||
"af_jessica" => Voice::AfJessica(1.0),
|
||||
"af_nicole" => Voice::AfNicole(1.0),
|
||||
"af_nova" => Voice::AfNova(1.0),
|
||||
"af_sarah" => Voice::AfSarah(1.0),
|
||||
"af_sky" => Voice::AfSky(1.0),
|
||||
"af_river" => Voice::AfRiver(1.0),
|
||||
"af_alloy" => Voice::AfAlloy(1.0),
|
||||
"af_aoede" => Voice::AfAoede(1.0),
|
||||
"af_kore" => Voice::AfKore(1.0),
|
||||
"am_adam" => Voice::AmAdam(1.0),
|
||||
"am_echo" => Voice::AmEcho(1.0),
|
||||
"am_eric" => Voice::AmEric(1.0),
|
||||
"am_fenrir" => Voice::AmFenrir(1.0),
|
||||
"am_liam" => Voice::AmLiam(1.0),
|
||||
"am_michael" => Voice::AmMichael(1.0),
|
||||
"am_onyx" => Voice::AmOnyx(1.0),
|
||||
"am_puck" => Voice::AmPuck(1.0),
|
||||
"bf_alice" => Voice::BfAlice(1.0),
|
||||
"bf_emma" => Voice::BfEmma(1.0),
|
||||
"bf_isabella" => Voice::BfIsabella(1.0),
|
||||
"bf_lily" => Voice::BfLily(1.0),
|
||||
"bm_daniel" => Voice::BmDaniel(1.0),
|
||||
"bm_fable" => Voice::BmFable(1.0),
|
||||
"bm_george" => Voice::BmGeorge(1.0),
|
||||
"bm_lewis" => Voice::BmLewis(1.0),
|
||||
_ => {
|
||||
eprintln!("[tts-engine] unknown voice '{}', falling back to af_bella", name);
|
||||
Voice::AfBella(1.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let (model_path, voices_path, output_path, voice_name, text_arg) = match parse_args() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error: {}", e);
|
||||
eprintln!("Usage: tts-engine --model <model.onnx> --voices <voices.bin> --output <output.wav> [--text <text>] [--voice <voice_name>]");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
// Get text from --text arg or stdin.
|
||||
let text = match text_arg {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
let mut buf = String::new();
|
||||
io::stdin().read_to_string(&mut buf).expect("failed to read stdin");
|
||||
buf.trim().to_string()
|
||||
}
|
||||
};
|
||||
|
||||
if text.is_empty() {
|
||||
eprintln!("[tts-engine] no text provided");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
eprintln!("[tts-engine] loading model: {}", model_path);
|
||||
let tts = match KokoroTts::new(&model_path, &voices_path).await {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
eprintln!("[tts-engine] failed to load model: {:?}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let voice = select_voice(&voice_name);
|
||||
eprintln!("[tts-engine] synthesizing with voice '{}': \"{}\"", voice_name, text);
|
||||
|
||||
let (samples, duration) = match tts.synth(&text, voice).await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
eprintln!("[tts-engine] synthesis failed: {:?}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
eprintln!("[tts-engine] synthesized {} samples in {:?}", samples.len(), duration);
|
||||
|
||||
// Write WAV: 24kHz, 16-bit, mono (same format announcement.ts expects).
|
||||
let spec = hound::WavSpec {
|
||||
channels: 1,
|
||||
sample_rate: 24000,
|
||||
bits_per_sample: 16,
|
||||
sample_format: hound::SampleFormat::Int,
|
||||
};
|
||||
|
||||
let mut writer = match hound::WavWriter::create(&output_path, spec) {
|
||||
Ok(w) => w,
|
||||
Err(e) => {
|
||||
eprintln!("[tts-engine] failed to create WAV: {}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
for &sample in &samples {
|
||||
let s16 = (sample * 32767.0).round().clamp(-32768.0, 32767.0) as i16;
|
||||
writer.write_sample(s16).unwrap();
|
||||
}
|
||||
writer.finalize().unwrap();
|
||||
|
||||
eprintln!("[tts-engine] wrote {}", output_path);
|
||||
}
|
||||
Vendored
+1
@@ -0,0 +1 @@
|
||||
{"v":1}
|
||||
+7
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"git": {
|
||||
"sha1": "dfa3eda5e8c3f23f8b4c5d504acaebd6e7a45020",
|
||||
"dirty": true
|
||||
},
|
||||
"path_in_vcs": ""
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
pull_request:
|
||||
branches: [ "master" ]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
jobs:
|
||||
build:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# Ubuntu 专属依赖安装
|
||||
- name: Setup Ubuntu dependencies
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt install libasound2-dev
|
||||
|
||||
# 构建项目
|
||||
- name: Build
|
||||
run: cargo build -vv
|
||||
|
||||
# 运行测试
|
||||
- name: Run tests
|
||||
run: cargo test --workspace -vv
|
||||
@@ -0,0 +1,5 @@
|
||||
*.bin
|
||||
*.onnx
|
||||
Cargo.lock
|
||||
/target
|
||||
.idea
|
||||
Vendored
+116
@@ -0,0 +1,116 @@
|
||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "kokoro-tts"
|
||||
version = "0.3.2"
|
||||
build = "build.rs"
|
||||
autolib = false
|
||||
autobins = false
|
||||
autoexamples = false
|
||||
autotests = false
|
||||
autobenches = false
|
||||
description = "用于Rust的轻量级AI离线语音合成器(Kokoro TTS),可轻松交叉编译到移动端"
|
||||
readme = "README.md"
|
||||
keywords = [
|
||||
"TTS",
|
||||
"Offline",
|
||||
"Lite",
|
||||
"AI",
|
||||
"Synthesizer",
|
||||
]
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/mzdk100/kokoro.git"
|
||||
|
||||
[features]
|
||||
use-cmudict = ["cmudict-fast"]
|
||||
|
||||
[lib]
|
||||
name = "kokoro_tts"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[example]]
|
||||
name = "synth_directly_v10"
|
||||
path = "examples/synth_directly_v10.rs"
|
||||
|
||||
[[example]]
|
||||
name = "synth_directly_v11"
|
||||
path = "examples/synth_directly_v11.rs"
|
||||
|
||||
[[example]]
|
||||
name = "synth_stream"
|
||||
path = "examples/synth_stream.rs"
|
||||
|
||||
[dependencies.bincode]
|
||||
version = "2.0"
|
||||
|
||||
[dependencies.chinese-number]
|
||||
version = "0.7.8"
|
||||
features = [
|
||||
"number-to-chinese",
|
||||
"chinese-to-number",
|
||||
]
|
||||
default-features = false
|
||||
|
||||
[dependencies.cmudict-fast]
|
||||
version = "0.8.0"
|
||||
optional = true
|
||||
|
||||
[dependencies.futures]
|
||||
version = "0.3.31"
|
||||
|
||||
[dependencies.jieba-rs]
|
||||
version = "0.8.1"
|
||||
|
||||
[dependencies.log]
|
||||
version = "0.4.29"
|
||||
|
||||
[dependencies.ndarray]
|
||||
version = "0.17.2"
|
||||
|
||||
[dependencies.ort]
|
||||
version = "2.0.0-rc.11"
|
||||
|
||||
[dependencies.pin-project]
|
||||
version = "1.1.10"
|
||||
|
||||
[dependencies.pinyin]
|
||||
version = "0.11.0"
|
||||
|
||||
[dependencies.rand]
|
||||
version = "0.10.0-rc.7"
|
||||
|
||||
[dependencies.regex]
|
||||
version = "1.12.2"
|
||||
|
||||
[dependencies.tokio]
|
||||
version = "1.49.0"
|
||||
features = [
|
||||
"fs",
|
||||
"rt-multi-thread",
|
||||
"time",
|
||||
"sync",
|
||||
]
|
||||
|
||||
[dev-dependencies.anyhow]
|
||||
version = "1.0.100"
|
||||
|
||||
[dev-dependencies.tokio]
|
||||
version = "1.49.0"
|
||||
features = ["macros"]
|
||||
|
||||
[dev-dependencies.voxudio]
|
||||
version = "0.5.7"
|
||||
features = ["device"]
|
||||
|
||||
[build-dependencies.cc]
|
||||
version = "1.2.53"
|
||||
+35
@@ -0,0 +1,35 @@
|
||||
[package]
|
||||
name = "kokoro-tts"
|
||||
description = "用于Rust的轻量级AI离线语音合成器(Kokoro TTS),可轻松交叉编译到移动端"
|
||||
version = "0.3.2"
|
||||
edition = "2024"
|
||||
keywords = ["TTS", "Offline", "Lite", "AI", "Synthesizer"]
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/mzdk100/kokoro.git"
|
||||
readme = "README.md"
|
||||
|
||||
[features]
|
||||
use-cmudict = ["cmudict-fast"]
|
||||
|
||||
[dependencies]
|
||||
bincode = "2.0"
|
||||
chinese-number = { version = "0.7.8",default-features = false,features = ["number-to-chinese", "chinese-to-number"] }
|
||||
cmudict-fast = { version = "0.8.0", optional = true }
|
||||
futures = "0.3.31"
|
||||
jieba-rs = "0.8.1"
|
||||
log = "0.4.29"
|
||||
ndarray = "0.17.2"
|
||||
ort = "2.0.0-rc.11"
|
||||
pin-project = "1.1.10"
|
||||
pinyin = "0.11.0"
|
||||
rand="0.10.0-rc.7"
|
||||
regex = "1.12.2"
|
||||
tokio = { version = "1.49.0",features = ["fs", "rt-multi-thread","time", "sync"] }
|
||||
|
||||
[dev-dependencies]
|
||||
anyhow = "1.0.100"
|
||||
tokio = {version = "1.49.0",features = ["macros"]}
|
||||
voxudio = { version = "0.5.7",features = ["device"] }
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.2.53"
|
||||
Vendored
+201
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
Vendored
+59
@@ -0,0 +1,59 @@
|
||||
# Kokoro TTS的rust推理实现
|
||||
|
||||
[Kokoro](https://github.com/hexgrad/kokoro)
|
||||
|
||||
> **Kokoro**是具有8200万参数的开放式TTS型号。
|
||||
> 尽管具有轻巧的体系结构,但它的质量与大型型号相当,同时更快,更具成本效益。使用Apache许可的权重,可以将Kokoro部署从生产环境到个人项目的任何地方。
|
||||
|
||||
|
||||
## 概述
|
||||
|
||||
本项目包含幾个示例脚本,展示了如何使用Kokoro库进行语音合成。这些示例展示了如何直接合成语音和通过流式合成来处理更长的文本。
|
||||
|
||||
## 前置条件
|
||||
|
||||
- Rust编程语言
|
||||
- Tokio异步运行时
|
||||
- Rodio音频处理和播放的库(可选)
|
||||
- 下载模型资源,在這裡可以找到[1.0模型](https://github.com/mzdk100/kokoro/releases/tag/V1.0)和[1.1模型](https://github.com/mzdk100/kokoro/releases/tag/V1.1)
|
||||
|
||||
## 特点
|
||||
- 跨平台,可以轻松在Windows、Mac OS上构建,也可以轻松交叉编译到安卓和iOS。
|
||||
- 离线推理,不依赖网络。
|
||||
- 足够轻量级,有不同尺寸的模型可以选择(最小的模型仅88M)。
|
||||
- 发音人多样化,跨越多国语言。
|
||||
|
||||
## 使用方法
|
||||
|
||||
1. 运行示例,克隆或下载本项目到本地。在项目根目录下运行:
|
||||
```shell
|
||||
cargo run --example synth_directly_v10
|
||||
cargo run --example synth_directly_v11
|
||||
```
|
||||
2. 集成到自己的项目中:
|
||||
```shell
|
||||
cargo add kokoro-tts
|
||||
```
|
||||
3. Linux依赖项
|
||||
```shell
|
||||
sudo apt install libasound2-dev
|
||||
```
|
||||
参考[examples](examples)文件夹中的示例代码进行开发。
|
||||
|
||||
|
||||
## 许可证
|
||||
|
||||
本项目采用Apache-2.0许可证。请查看项目中的LICENSE文件了解更多信息。
|
||||
|
||||
## 注意
|
||||
|
||||
- 请确保在运行示例之前已经正确加载了模型和语音数据。
|
||||
- 示例中的语音合成参数(如语音名称、文本内容、速度等)仅作为示例,实际使用时请根据需要进行调整。
|
||||
|
||||
## 贡献
|
||||
|
||||
如果您有任何改进意见或想要贡献代码,请随时提交Pull Request或创建Issue。
|
||||
|
||||
## 免责声明
|
||||
|
||||
本项目中的示例代码仅用于演示目的。在使用本项目中的代码时,请确保遵守相关法律法规和社会主义核心价值观。开发者不对因使用本项目中的代码而导致的任何后果负责。
|
||||
Vendored
+5
@@ -0,0 +1,5 @@
|
||||
fn main() {
|
||||
const SRC: &str = "src/transcription/en_ipa.c";
|
||||
cc::Build::new().file(SRC).compile("es");
|
||||
println!("cargo:rerun-if-changed={}", SRC);
|
||||
}
|
||||
+135010
File diff suppressed because it is too large
Load Diff
BIN
Binary file not shown.
+411980
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,21 @@
|
||||
use {
|
||||
kokoro_tts::{KokoroTts, Voice},
|
||||
voxudio::AudioPlayer,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let tts = KokoroTts::new("kokoro-v1.0.int8.onnx", "voices.bin").await?;
|
||||
let (audio, took) = tts
|
||||
.synth(
|
||||
"Hello, world!你好,我们是一群追逐梦想的人。我正在使用qq。",
|
||||
Voice::ZfXiaoxiao(1.2),
|
||||
)
|
||||
.await?;
|
||||
println!("Synth took: {:?}", took);
|
||||
let mut player = AudioPlayer::new()?;
|
||||
player.play()?;
|
||||
player.write::<24000>(&audio, 1).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
use {
|
||||
kokoro_tts::{KokoroTts, Voice},
|
||||
voxudio::AudioPlayer,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let tts = KokoroTts::new("kokoro-v1.1-zh.onnx", "voices-v1.1-zh.bin").await?;
|
||||
let (audio, took) = tts
|
||||
.synth(
|
||||
"Hello, world!你好,我们是一群追逐梦想的人。我正在使用qq。",
|
||||
Voice::Zm045(1),
|
||||
)
|
||||
.await?;
|
||||
println!("Synth took: {:?}", took);
|
||||
let mut player = AudioPlayer::new()?;
|
||||
player.play()?;
|
||||
player.write::<24000>(&audio, 1).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
+51
@@ -0,0 +1,51 @@
|
||||
use {
|
||||
futures::StreamExt,
|
||||
kokoro_tts::{KokoroTts, Voice},
|
||||
voxudio::AudioPlayer,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let tts = KokoroTts::new("kokoro-v1.1-zh.onnx", "voices-v1.1-zh.bin").await?;
|
||||
let (mut sink, mut stream) = tts.stream(Voice::Zm098(1));
|
||||
sink.synth("hello world.").await?;
|
||||
sink.synth("你好,我们是一群追逐梦想的人。").await?;
|
||||
sink.set_voice(Voice::Zf032(2));
|
||||
sink.synth("我正在使用qq。").await?;
|
||||
sink.set_voice(Voice::Zf090(3));
|
||||
sink.synth("今天天气如何?").await?;
|
||||
sink.set_voice(Voice::Zm045(1));
|
||||
sink.synth("你在使用Rust编程语言吗?").await?;
|
||||
sink.set_voice(Voice::Zf039(1));
|
||||
sink.synth(
|
||||
"你轻轻地走过那
|
||||
在风雨花丛中
|
||||
每一点一滴带走
|
||||
是我醒来的梦
|
||||
是在那天空上
|
||||
最美丽的云朵
|
||||
在那彩虹 最温柔的风",
|
||||
)
|
||||
.await?;
|
||||
sink.set_voice(Voice::Zf088(1));
|
||||
sink.synth(
|
||||
"你静静看着我们
|
||||
最不舍的面容
|
||||
像流星划过夜空
|
||||
转瞬即逝的梦
|
||||
是最深情的脸 在这一瞬间
|
||||
在遥远天边
|
||||
",
|
||||
)
|
||||
.await?;
|
||||
drop(sink);
|
||||
|
||||
let mut player = AudioPlayer::new()?;
|
||||
player.play()?;
|
||||
while let Some((audio, took)) = stream.next().await {
|
||||
player.write::<24000>(&audio, 1).await?;
|
||||
println!("Synth took: {:?}", took);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Vendored
+514
@@ -0,0 +1,514 @@
|
||||
import re
|
||||
from typing import List, Optional, Tuple
|
||||
from jieba import posseg, cut_for_search
|
||||
from pypinyin import lazy_pinyin, load_phrases_dict, Style
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class MToken:
|
||||
tag: str
|
||||
whitespace: str
|
||||
phonemes: Optional[str] = None
|
||||
|
||||
ZH_MAP = {"b":"ㄅ","p":"ㄆ","m":"ㄇ","f":"ㄈ","d":"ㄉ","t":"ㄊ","n":"ㄋ","l":"ㄌ","g":"ㄍ","k":"ㄎ","h":"ㄏ","j":"ㄐ","q":"ㄑ","x":"ㄒ","zh":"ㄓ","ch":"ㄔ","sh":"ㄕ","r":"ㄖ","z":"ㄗ","c":"ㄘ","s":"ㄙ","a":"ㄚ","o":"ㄛ","e":"ㄜ","ie":"ㄝ","ai":"ㄞ","ei":"ㄟ","ao":"ㄠ","ou":"ㄡ","an":"ㄢ","en":"ㄣ","ang":"ㄤ","eng":"ㄥ","er":"ㄦ","i":"ㄧ","u":"ㄨ","v":"ㄩ","ii":"ㄭ","iii":"十","ve":"月","ia":"压","ian":"言","iang":"阳","iao":"要","in":"阴","ing":"应","iong":"用","iou":"又","ong":"中","ua":"穵","uai":"外","uan":"万","uang":"王","uei":"为","uen":"文","ueng":"瓮","uo":"我","van":"元","vn":"云"}
|
||||
for p in ';:,.!?/—…"()“” 12345R':
|
||||
assert p not in ZH_MAP, p
|
||||
ZH_MAP[p] = p
|
||||
|
||||
unk = '❓'
|
||||
punc = frozenset(';:,.!?—…"()“”')
|
||||
phrases_dict = {
|
||||
'开户行': [['ka1i'], ['hu4'], ['hang2']],
|
||||
'发卡行': [['fa4'], ['ka3'], ['hang2']],
|
||||
'放款行': [['fa4ng'], ['kua3n'], ['hang2']],
|
||||
'茧行': [['jia3n'], ['hang2']],
|
||||
'行号': [['hang2'], ['ha4o']],
|
||||
'各地': [['ge4'], ['di4']],
|
||||
'借还款': [['jie4'], ['hua2n'], ['kua3n']],
|
||||
'时间为': [['shi2'], ['jia1n'], ['we2i']],
|
||||
'为准': [['we2i'], ['zhu3n']],
|
||||
'色差': [['se4'], ['cha1']],
|
||||
'嗲': [['dia3']],
|
||||
'呗': [['bei5']],
|
||||
'不': [['bu4']],
|
||||
'咗': [['zuo5']],
|
||||
'嘞': [['lei5']],
|
||||
'掺和': [['chan1'], ['huo5']]
|
||||
}
|
||||
must_erhua = {
|
||||
"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
|
||||
}
|
||||
must_not_neural_tone_words = {
|
||||
'男子', '女子', '分子', '原子', '量子', '莲子', '石子', '瓜子', '电子', '人人', '虎虎',
|
||||
'幺幺', '干嘛', '学子', '哈哈', '数数', '袅袅', '局地', '以下', '娃哈哈', '花花草草', '留得',
|
||||
'耕地', '想想', '熙熙', '攘攘', '卵子', '死死', '冉冉', '恳恳', '佼佼', '吵吵', '打打',
|
||||
'考考', '整整', '莘莘', '落地', '算子', '家家户户', '青青'
|
||||
}
|
||||
must_neural_tone_words = {
|
||||
'麻烦', '麻利', '鸳鸯', '高粱', '骨头', '骆驼', '马虎', '首饰', '馒头', '馄饨', '风筝',
|
||||
'难为', '队伍', '阔气', '闺女', '门道', '锄头', '铺盖', '铃铛', '铁匠', '钥匙', '里脊',
|
||||
'里头', '部分', '那么', '道士', '造化', '迷糊', '连累', '这么', '这个', '运气', '过去',
|
||||
'软和', '转悠', '踏实', '跳蚤', '跟头', '趔趄', '财主', '豆腐', '讲究', '记性', '记号',
|
||||
'认识', '规矩', '见识', '裁缝', '补丁', '衣裳', '衣服', '衙门', '街坊', '行李', '行当',
|
||||
'蛤蟆', '蘑菇', '薄荷', '葫芦', '葡萄', '萝卜', '荸荠', '苗条', '苗头', '苍蝇', '芝麻',
|
||||
'舒服', '舒坦', '舌头', '自在', '膏药', '脾气', '脑袋', '脊梁', '能耐', '胳膊', '胭脂',
|
||||
'胡萝', '胡琴', '胡同', '聪明', '耽误', '耽搁', '耷拉', '耳朵', '老爷', '老实', '老婆',
|
||||
'戏弄', '将军', '翻腾', '罗嗦', '罐头', '编辑', '结实', '红火', '累赘', '糨糊', '糊涂',
|
||||
'精神', '粮食', '簸箕', '篱笆', '算计', '算盘', '答应', '笤帚', '笑语', '笑话', '窟窿',
|
||||
'窝囊', '窗户', '稳当', '稀罕', '称呼', '秧歌', '秀气', '秀才', '福气', '祖宗', '砚台',
|
||||
'码头', '石榴', '石头', '石匠', '知识', '眼睛', '眯缝', '眨巴', '眉毛', '相声', '盘算',
|
||||
'白净', '痢疾', '痛快', '疟疾', '疙瘩', '疏忽', '畜生', '生意', '甘蔗', '琵琶', '琢磨',
|
||||
'琉璃', '玻璃', '玫瑰', '玄乎', '狐狸', '状元', '特务', '牲口', '牙碜', '牌楼', '爽快',
|
||||
'爱人', '热闹', '烧饼', '烟筒', '烂糊', '点心', '炊帚', '灯笼', '火候', '漂亮', '滑溜',
|
||||
'溜达', '温和', '清楚', '消息', '浪头', '活泼', '比方', '正经', '欺负', '模糊', '槟榔',
|
||||
'棺材', '棒槌', '棉花', '核桃', '栅栏', '柴火', '架势', '枕头', '枇杷', '机灵', '本事',
|
||||
'木头', '木匠', '朋友', '月饼', '月亮', '暖和', '明白', '时候', '新鲜', '故事', '收拾',
|
||||
'收成', '提防', '挖苦', '挑剔', '指甲', '指头', '拾掇', '拳头', '拨弄', '招牌', '招呼',
|
||||
'抬举', '护士', '折腾', '扫帚', '打量', '打算', '打扮', '打听', '打发', '扎实', '扁担',
|
||||
'戒指', '懒得', '意识', '意思', '悟性', '怪物', '思量', '怎么', '念头', '念叨', '别人',
|
||||
'快活', '忙活', '志气', '心思', '得罪', '张罗', '弟兄', '开通', '应酬', '庄稼', '干事',
|
||||
'帮手', '帐篷', '希罕', '师父', '师傅', '巴结', '巴掌', '差事', '工夫', '岁数', '屁股',
|
||||
'尾巴', '少爷', '小气', '小伙', '将就', '对头', '对付', '寡妇', '家伙', '客气', '实在',
|
||||
'官司', '学问', '字号', '嫁妆', '媳妇', '媒人', '婆家', '娘家', '委屈', '姑娘', '姐夫',
|
||||
'妯娌', '妥当', '妖精', '奴才', '女婿', '头发', '太阳', '大爷', '大方', '大意', '大夫',
|
||||
'多少', '多么', '外甥', '壮实', '地道', '地方', '在乎', '困难', '嘴巴', '嘱咐', '嘟囔',
|
||||
'嘀咕', '喜欢', '喇嘛', '喇叭', '商量', '唾沫', '哑巴', '哈欠', '哆嗦', '咳嗽', '和尚',
|
||||
'告诉', '告示', '含糊', '吓唬', '后头', '名字', '名堂', '合同', '吆喝', '叫唤', '口袋',
|
||||
'厚道', '厉害', '千斤', '包袱', '包涵', '匀称', '勤快', '动静', '动弹', '功夫', '力气',
|
||||
'前头', '刺猬', '刺激', '别扭', '利落', '利索', '利害', '分析', '出息', '凑合', '凉快',
|
||||
'冷战', '冤枉', '冒失', '养活', '关系', '先生', '兄弟', '便宜', '使唤', '佩服', '作坊',
|
||||
'体面', '位置', '似的', '伙计', '休息', '什么', '人家', '亲戚', '亲家', '交情', '云彩',
|
||||
'事情', '买卖', '主意', '丫头', '丧气', '两口', '东西', '东家', '世故', '不由', '下水',
|
||||
'下巴', '上头', '上司', '丈夫', '丈人', '一辈', '那个', '菩萨', '父亲', '母亲', '咕噜',
|
||||
'邋遢', '费用', '冤家', '甜头', '介绍', '荒唐', '大人', '泥鳅', '幸福', '熟悉', '计划',
|
||||
'扑腾', '蜡烛', '姥爷', '照顾', '喉咙', '吉他', '弄堂', '蚂蚱', '凤凰', '拖沓', '寒碜',
|
||||
'糟蹋', '倒腾', '报复', '逻辑', '盘缠', '喽啰', '牢骚', '咖喱', '扫把', '惦记'
|
||||
}
|
||||
not_erhua = {
|
||||
"虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
|
||||
"拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿",
|
||||
"流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿",
|
||||
"孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿",
|
||||
"狗儿", "少儿"
|
||||
}
|
||||
BU = '不'
|
||||
YI = '一'
|
||||
X_ENG = frozenset(['x', 'eng'])
|
||||
|
||||
# g2p
|
||||
load_phrases_dict(phrases_dict)
|
||||
|
||||
def get_initials_finals(word: str) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Get word initial and final by pypinyin or g2pM
|
||||
"""
|
||||
initials = []
|
||||
finals = []
|
||||
orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
|
||||
orig_finals = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
|
||||
print(orig_initials, orig_finals)
|
||||
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
|
||||
en_index = [index for index, c in enumerate(word) if c == "嗯"]
|
||||
for i in en_index:
|
||||
orig_finals[i] = "n2"
|
||||
|
||||
for c, v in zip(orig_initials, orig_finals):
|
||||
if re.match(r'i\d', v):
|
||||
if c in ['z', 'c', 's']:
|
||||
# zi, ci, si
|
||||
v = re.sub('i', 'ii', v)
|
||||
elif c in ['zh', 'ch', 'sh', 'r']:
|
||||
# zhi, chi, shi
|
||||
v = re.sub('i', 'iii', v)
|
||||
initials.append(c)
|
||||
finals.append(v)
|
||||
|
||||
return initials, finals
|
||||
|
||||
def merge_erhua(initials: List[str], finals: List[str], word: str, pos: str) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Do erhub.
|
||||
"""
|
||||
# fix er1
|
||||
for i, phn in enumerate(finals):
|
||||
if i == len(finals) - 1 and word[i] == "儿" and phn == 'er1':
|
||||
finals[i] = 'er2'
|
||||
|
||||
# 发音
|
||||
if word not in must_erhua and (word in not_erhua or pos in {"a", "j", "nr"}):
|
||||
return initials, finals
|
||||
|
||||
# "……" 等情况直接返回
|
||||
if len(finals) != len(word):
|
||||
return initials, finals
|
||||
|
||||
assert len(finals) == len(word)
|
||||
|
||||
# 不发音
|
||||
new_initials = []
|
||||
new_finals = []
|
||||
for i, phn in enumerate(finals):
|
||||
if i == len(finals) - 1 and word[i] == "儿" and phn in {"er2", "er5"} and word[-2:] not in not_erhua and new_finals:
|
||||
new_finals[-1] = new_finals[-1][:-1] + "R" + new_finals[-1][-1]
|
||||
else:
|
||||
new_initials.append(initials[i])
|
||||
new_finals.append(phn)
|
||||
|
||||
return new_initials, new_finals
|
||||
|
||||
# merge "不" and the word behind it
|
||||
# if don't merge, "不" sometimes appears alone according to jieba, which may occur sandhi error
|
||||
def merge_bu(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if pos not in X_ENG:
|
||||
last_word = None
|
||||
if i > 0:
|
||||
last_word, _ = seg[i - 1]
|
||||
if last_word == BU:
|
||||
word = last_word + word
|
||||
next_pos = None
|
||||
if i + 1 < len(seg):
|
||||
_, next_pos = seg[i + 1]
|
||||
if word != BU or next_pos is None or next_pos in X_ENG:
|
||||
new_seg.append((word, pos))
|
||||
return new_seg
|
||||
|
||||
# function 1: merge "一" and reduplication words in it's left and right, e.g. "听","一","听" ->"听一听"
|
||||
# function 2: merge single "一" and the word behind it
|
||||
# if don't merge, "一" sometimes appears alone according to jieba, which may occur sandhi error
|
||||
# e.g.
|
||||
# input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')]
|
||||
# output seg: [['听一听', 'v']]
|
||||
def merge_yi(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
skip_next = False
|
||||
# function 1
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if skip_next:
|
||||
skip_next = False
|
||||
continue
|
||||
if i - 1 >= 0 and word == YI and i + 1 < len(seg) and seg[i - 1][0] == seg[i + 1][0] and seg[i - 1][1] == "v" and seg[i + 1][1] not in X_ENG:
|
||||
new_seg[-1] = (new_seg[-1][0] + YI + seg[i + 1][0], new_seg[-1][1])
|
||||
skip_next = True
|
||||
else:
|
||||
new_seg.append((word, pos))
|
||||
seg = new_seg
|
||||
new_seg = []
|
||||
# function 2
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if new_seg and new_seg[-1][0] == YI and pos not in X_ENG:
|
||||
new_seg[-1] = (new_seg[-1][0] + word, new_seg[-1][1])
|
||||
else:
|
||||
new_seg.append((word, pos))
|
||||
return new_seg
|
||||
|
||||
def merge_reduplication(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if new_seg and word == new_seg[-1][0] and pos not in X_ENG:
|
||||
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
return new_seg
|
||||
|
||||
def is_reduplication(word: str) -> bool:
|
||||
return len(word) == 2 and word[0] == word[1]
|
||||
|
||||
# the first and the second words are all_tone_three
|
||||
def merge_continuous_three_tones(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
sub_finals_list = []
|
||||
for (word, pos) in seg:
|
||||
if pos in X_ENG:
|
||||
sub_finals_list.append(['0'])
|
||||
continue
|
||||
orig_finals = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
|
||||
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
|
||||
en_index = [index for index, c in enumerate(word) if c == "嗯"]
|
||||
for i in en_index:
|
||||
orig_finals[i] = "n2"
|
||||
sub_finals_list.append(orig_finals)
|
||||
|
||||
assert len(sub_finals_list) == len(seg)
|
||||
merge_last = [False] * len(seg)
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if pos not in X_ENG and i - 1 >= 0 and all_tone_three(sub_finals_list[i - 1]) and all_tone_three(sub_finals_list[i]) and not merge_last[i - 1]:
|
||||
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
||||
if not is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
|
||||
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
||||
merge_last[i] = True
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
|
||||
return new_seg
|
||||
|
||||
# the last char of first word and the first char of second word is tone_three
|
||||
def merge_continuous_three_tones_2(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
sub_finals_list = []
|
||||
for (word, pos) in seg:
|
||||
if pos in X_ENG:
|
||||
sub_finals_list.append(['0'])
|
||||
continue
|
||||
orig_finals = lazy_pinyin(
|
||||
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
|
||||
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
|
||||
en_index = [index for index, c in enumerate(word) if c == "嗯"]
|
||||
for i in en_index:
|
||||
orig_finals[i] = "n2"
|
||||
sub_finals_list.append(orig_finals)
|
||||
assert len(sub_finals_list) == len(seg)
|
||||
merge_last = [False] * len(seg)
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if pos not in X_ENG and i - 1 >= 0 and sub_finals_list[i - 1][-1][-1] == "3" and sub_finals_list[i][0][-1] == "3" and not merge_last[i - 1]:
|
||||
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
|
||||
if not is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
|
||||
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
||||
merge_last[i] = True
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
return new_seg
|
||||
|
||||
def merge_er(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
new_seg = []
|
||||
for i, (word, pos) in enumerate(seg):
|
||||
if i - 1 >= 0 and word == "儿" and new_seg[-1][1] not in X_ENG:
|
||||
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
|
||||
else:
|
||||
new_seg.append([word, pos])
|
||||
return new_seg
|
||||
|
||||
def pre_merge_for_modify(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
seg: [(word, pos), ...]
|
||||
"""
|
||||
seg = merge_bu(seg)
|
||||
seg = merge_yi(seg)
|
||||
seg = merge_reduplication(seg)
|
||||
seg = merge_continuous_three_tones(seg)
|
||||
seg = merge_continuous_three_tones_2(seg)
|
||||
return merge_er(seg)
|
||||
|
||||
def bu_sandhi(word: str, finals: List[str]) -> List[str]:
|
||||
# e.g. 看不懂
|
||||
if len(word) == 3 and word[1] == BU:
|
||||
finals[1] = finals[1][:-1] + "5"
|
||||
else:
|
||||
for i, char in enumerate(word):
|
||||
# "不" before tone4 should be bu2, e.g. 不怕
|
||||
if char == BU and i + 1 < len(word) and finals[i + 1][-1] == "4":
|
||||
finals[i] = finals[i][:-1] + "2"
|
||||
return finals
|
||||
|
||||
def yi_sandhi(word: str, finals: List[str]) -> List[str]:
|
||||
# "一" in number sequences, e.g. 一零零, 二一零
|
||||
if word.find(YI) != -1 and all(
|
||||
[item.isnumeric() for item in word if item != YI]):
|
||||
return finals
|
||||
# "一" between reduplication words shold be yi5, e.g. 看一看
|
||||
elif len(word) == 3 and word[1] == YI and word[0] == word[-1]:
|
||||
finals[1] = finals[1][:-1] + "5"
|
||||
# when "一" is ordinal word, it should be yi1
|
||||
elif word.startswith("第一"):
|
||||
finals[1] = finals[1][:-1] + "1"
|
||||
else:
|
||||
for i, char in enumerate(word):
|
||||
if char == YI and i + 1 < len(word):
|
||||
# "一" before tone4 should be yi2, e.g. 一段
|
||||
if finals[i + 1][-1] in {'4', '5'}:
|
||||
finals[i] = finals[i][:-1] + "2"
|
||||
# "一" before non-tone4 should be yi4, e.g. 一天
|
||||
else:
|
||||
# "一" 后面如果是标点,还读一声
|
||||
if word[i + 1] not in punc:
|
||||
finals[i] = finals[i][:-1] + "4"
|
||||
return finals
|
||||
|
||||
def split_word(word: str) -> List[str]:
|
||||
word_list = cut_for_search(word)
|
||||
word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
|
||||
first_subword = word_list[0]
|
||||
first_begin_idx = word.find(first_subword)
|
||||
if first_begin_idx == 0:
|
||||
second_subword = word[len(first_subword):]
|
||||
new_word_list = [first_subword, second_subword]
|
||||
else:
|
||||
second_subword = word[:-len(first_subword)]
|
||||
new_word_list = [second_subword, first_subword]
|
||||
return new_word_list
|
||||
|
||||
# the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041
|
||||
# e.g.
|
||||
# word: "家里"
|
||||
# pos: "s"
|
||||
# finals: ['ia1', 'i3']
|
||||
def neural_sandhi(word: str, pos: str, finals: List[str]) -> List[str]:
|
||||
if word in must_not_neural_tone_words:
|
||||
return finals
|
||||
# reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
|
||||
for j, item in enumerate(word):
|
||||
if j - 1 >= 0 and item == word[j - 1] and pos[0] in {"n", "v", "a"}:
|
||||
finals[j] = finals[j][:-1] + "5"
|
||||
ge_idx = word.find("个")
|
||||
if len(word) >= 1 and word[-1] in "吧呢啊呐噻嘛吖嗨呐哦哒滴哩哟喽啰耶喔诶":
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
elif len(word) >= 1 and word[-1] in "的地得":
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
# e.g. 走了, 看着, 去过
|
||||
elif len(word) == 1 and word in "了着过" and pos in {"ul", "uz", "ug"}:
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
elif len(word) > 1 and word[-1] in "们子" and pos in {"r", "n"}:
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
# e.g. 桌上, 地下
|
||||
elif len(word) > 1 and word[-1] in "上下" and pos in {"s", "l", "f"}:
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
# e.g. 上来, 下去
|
||||
elif len(word) > 1 and word[-1] in "来去" and word[-2] in "上下进出回过起开":
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
# 个做量词
|
||||
elif (ge_idx >= 1 and (word[ge_idx - 1].isnumeric() or word[ge_idx - 1] in "几有两半多各整每做是")) or word == '个':
|
||||
finals[ge_idx] = finals[ge_idx][:-1] + "5"
|
||||
else:
|
||||
if word in must_neural_tone_words or word[-2:] in must_neural_tone_words:
|
||||
finals[-1] = finals[-1][:-1] + "5"
|
||||
|
||||
word_list = split_word(word)
|
||||
finals_list = [finals[:len(word_list[0])], finals[len(word_list[0]):]]
|
||||
for i, word in enumerate(word_list):
|
||||
# conventional neural in Chinese
|
||||
if word in must_neural_tone_words or word[-2:] in must_neural_tone_words:
|
||||
finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
|
||||
finals = sum(finals_list, [])
|
||||
return finals
|
||||
|
||||
def all_tone_three(finals: List[str]) -> bool:
|
||||
return all(x[-1] == "3" for x in finals)
|
||||
|
||||
def three_sandhi(word: str, finals: List[str]) -> List[str]:
|
||||
if len(word) == 2 and all_tone_three(finals):
|
||||
finals[0] = finals[0][:-1] + "2"
|
||||
elif len(word) == 3:
|
||||
word_list = split_word(word)
|
||||
if all_tone_three(finals):
|
||||
# disyllabic + monosyllabic, e.g. 蒙古/包
|
||||
if len(word_list[0]) == 2:
|
||||
finals[0] = finals[0][:-1] + "2"
|
||||
finals[1] = finals[1][:-1] + "2"
|
||||
# monosyllabic + disyllabic, e.g. 纸/老虎
|
||||
elif len(word_list[0]) == 1:
|
||||
finals[1] = finals[1][:-1] + "2"
|
||||
else:
|
||||
finals_list = [finals[:len(word_list[0])], finals[len(word_list[0]):]]
|
||||
if len(finals_list) == 2:
|
||||
for i, sub in enumerate(finals_list):
|
||||
# e.g. 所有/人
|
||||
if all_tone_three(sub) and len(sub) == 2:
|
||||
finals_list[i][0] = finals_list[i][0][:-1] + "2"
|
||||
# e.g. 好/喜欢
|
||||
elif i == 1 and not all_tone_three(sub) and finals_list[i][0][-1] == "3" and finals_list[0][-1][-1] == "3":
|
||||
finals_list[0][-1] = finals_list[0][-1][:-1] + "2"
|
||||
finals = sum(finals_list, [])
|
||||
# split idiom into two words who's length is 2
|
||||
elif len(word) == 4:
|
||||
finals_list = [finals[:2], finals[2:]]
|
||||
finals = []
|
||||
for sub in finals_list:
|
||||
if all_tone_three(sub):
|
||||
sub[0] = sub[0][:-1] + "2"
|
||||
finals += sub
|
||||
|
||||
return finals
|
||||
|
||||
def modified_tone(word: str, pos: str, finals: List[str]) -> List[str]:
|
||||
"""
|
||||
word: 分词
|
||||
pos: 词性
|
||||
finals: 带调韵母, [final1, ..., finaln]
|
||||
"""
|
||||
finals = bu_sandhi(word, finals)
|
||||
finals = yi_sandhi(word, finals)
|
||||
finals = neural_sandhi(word, pos, finals)
|
||||
return three_sandhi(word, finals)
|
||||
|
||||
def g2p(text: str, with_erhua: bool = True) -> str:
|
||||
"""
|
||||
Return: string of phonemes.
|
||||
'ㄋㄧ2ㄏㄠ3/ㄕ十4ㄐㄝ4'
|
||||
"""
|
||||
tokens = []
|
||||
seg_cut = posseg.lcut(text)
|
||||
# fix wordseg bad case for sandhi
|
||||
seg_cut = pre_merge_for_modify(seg_cut)
|
||||
|
||||
# 为了多音词获得更好的效果,这里采用整句预测
|
||||
initials = []
|
||||
finals = []
|
||||
# pypinyin, g2pM
|
||||
for word, pos in seg_cut:
|
||||
if pos == 'x' and '\u4E00' <= min(word) and max(word) <= '\u9FFF':
|
||||
pos = 'X'
|
||||
elif pos != 'x' and word in punc:
|
||||
pos = 'x'
|
||||
tk = MToken(tag=pos, whitespace='')
|
||||
if pos in X_ENG:
|
||||
if not word.isspace():
|
||||
if pos == 'x' and word in punc:
|
||||
tk.phonemes = word
|
||||
tokens.append(tk)
|
||||
elif tokens:
|
||||
tokens[-1].whitespace += word
|
||||
continue
|
||||
elif tokens and tokens[-1].tag not in X_ENG and not tokens[-1].whitespace:
|
||||
tokens[-1].whitespace = '/'
|
||||
|
||||
# g2p
|
||||
sub_initials, sub_finals = get_initials_finals(word)
|
||||
# tone sandhi
|
||||
sub_finals = modified_tone(word, pos, sub_finals)
|
||||
# er hua
|
||||
if with_erhua:
|
||||
sub_initials, sub_finals = merge_erhua(sub_initials, sub_finals, word, pos)
|
||||
|
||||
initials.append(sub_initials)
|
||||
finals.append(sub_finals)
|
||||
# assert len(sub_initials) == len(sub_finals) == len(word)
|
||||
|
||||
# sum(iterable[, start])
|
||||
# initials = sum(initials, [])
|
||||
# finals = sum(finals, [])
|
||||
|
||||
phones = []
|
||||
for c, v in zip(sub_initials, sub_finals):
|
||||
# NOTE: post process for pypinyin outputs
|
||||
# we discriminate i, ii and iii
|
||||
if c:
|
||||
phones.append(c)
|
||||
# replace punctuation by ` `
|
||||
# if c and c in punc:
|
||||
# phones.append(c)
|
||||
if v and (v not in punc or v != c):# and v not in rhy_phns:
|
||||
phones.append(v)
|
||||
phones = '_'.join(phones).replace('_eR', '_er').replace('R', '_R')
|
||||
phones = re.sub(r'(?=\d)', '_', phones).split('_')
|
||||
print(phones)
|
||||
tk.phonemes = ''.join(ZH_MAP.get(p, unk) for p in phones)
|
||||
tokens.append(tk)
|
||||
|
||||
return ''.join((unk if tk.phonemes is None else tk.phonemes) + tk.whitespace for tk in tokens)
|
||||
|
||||
print(g2p('时间为。Hello, world!你好,我们是一群追逐梦想的人。我正在使用qq。忽略卢驴'))
|
||||
seg = posseg.lcut('不好看', True)
|
||||
print(seg, merge_bu(seg))
|
||||
seg = merge_bu(posseg.lcut('听一听一个', True))
|
||||
print(seg, merge_yi(seg))
|
||||
seg = merge_bu(posseg.lcut('谢谢谢谢', True))
|
||||
print(seg, merge_reduplication(seg))
|
||||
seg = merge_bu(posseg.lcut('小美好', True))
|
||||
print(seg, merge_continuous_three_tones(seg))
|
||||
seg = merge_bu(posseg.lcut('风景好', True))
|
||||
print(seg, merge_continuous_three_tones_2(seg))
|
||||
Vendored
+3
@@ -0,0 +1,3 @@
|
||||
set PATH=%PATH%;D:\msys64\mingw64\bin
|
||||
cargo run --example synth_directly_v11
|
||||
pause
|
||||
Vendored
+80
@@ -0,0 +1,80 @@
|
||||
use crate::G2PError;
|
||||
use bincode::error::DecodeError;
|
||||
use ndarray::ShapeError;
|
||||
use ort::Error as OrtError;
|
||||
use std::{
|
||||
error::Error,
|
||||
fmt::{Debug, Display, Formatter, Result as FmtResult},
|
||||
io::Error as IoError,
|
||||
time::SystemTimeError,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum KokoroError {
|
||||
Decode(DecodeError),
|
||||
G2P(G2PError),
|
||||
Io(IoError),
|
||||
ModelReleased,
|
||||
Ort(OrtError),
|
||||
Send(String),
|
||||
Shape(ShapeError),
|
||||
SystemTime(SystemTimeError),
|
||||
VoiceNotFound(String),
|
||||
VoiceVersionInvalid(String),
|
||||
}
|
||||
|
||||
impl Display for KokoroError {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
|
||||
write!(f, "KokoroError: ")?;
|
||||
match self {
|
||||
Self::Decode(e) => Display::fmt(e, f),
|
||||
Self::G2P(e) => Display::fmt(e, f),
|
||||
Self::Io(e) => Display::fmt(e, f),
|
||||
Self::Ort(e) => Display::fmt(e, f),
|
||||
Self::ModelReleased => write!(f, "ModelReleased"),
|
||||
Self::Send(e) => Display::fmt(e, f),
|
||||
Self::Shape(e) => Display::fmt(e, f),
|
||||
Self::SystemTime(e) => Display::fmt(e, f),
|
||||
Self::VoiceNotFound(name) => write!(f, "VoiceNotFound({})", name),
|
||||
Self::VoiceVersionInvalid(msg) => write!(f, "VoiceVersionInvalid({})", msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for KokoroError {}
|
||||
|
||||
impl From<IoError> for KokoroError {
|
||||
fn from(value: IoError) -> Self {
|
||||
Self::Io(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DecodeError> for KokoroError {
|
||||
fn from(value: DecodeError) -> Self {
|
||||
Self::Decode(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<OrtError> for KokoroError {
|
||||
fn from(value: OrtError) -> Self {
|
||||
Self::Ort(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<G2PError> for KokoroError {
|
||||
fn from(value: G2PError) -> Self {
|
||||
Self::G2P(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ShapeError> for KokoroError {
|
||||
fn from(value: ShapeError) -> Self {
|
||||
Self::Shape(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SystemTimeError> for KokoroError {
|
||||
fn from(value: SystemTimeError) -> Self {
|
||||
Self::SystemTime(value)
|
||||
}
|
||||
}
|
||||
Vendored
+321
@@ -0,0 +1,321 @@
|
||||
/// 文本到国际音标的转换
|
||||
mod v10;
|
||||
mod v11;
|
||||
|
||||
use super::PinyinError;
|
||||
use chinese_number::{ChineseCase, ChineseCountMethod, ChineseVariant, NumberToChinese};
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
use cmudict_fast::{Cmudict, Error as CmudictError};
|
||||
use pinyin::ToPinyin;
|
||||
use regex::{Captures, Error as RegexError, Regex};
|
||||
use std::{
|
||||
error::Error,
|
||||
fmt::{Display, Formatter, Result as FmtResult},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum G2PError {
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
CmudictError(CmudictError),
|
||||
EnptyData,
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
Nul(std::ffi::NulError),
|
||||
Pinyin(PinyinError),
|
||||
Regex(RegexError),
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
Utf8(std::str::Utf8Error),
|
||||
}
|
||||
|
||||
impl Display for G2PError {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
|
||||
write!(f, "G2PError: ")?;
|
||||
match self {
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
Self::CmudictError(e) => Display::fmt(e, f),
|
||||
Self::EnptyData => Display::fmt("EmptyData", f),
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
Self::Nul(e) => Display::fmt(e, f),
|
||||
Self::Pinyin(e) => Display::fmt(e, f),
|
||||
Self::Regex(e) => Display::fmt(e, f),
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
Self::Utf8(e) => Display::fmt(e, f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for G2PError {}
|
||||
|
||||
impl From<PinyinError> for G2PError {
|
||||
fn from(value: PinyinError) -> Self {
|
||||
Self::Pinyin(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RegexError> for G2PError {
|
||||
fn from(value: RegexError) -> Self {
|
||||
Self::Regex(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
impl From<CmudictError> for G2PError {
|
||||
fn from(value: CmudictError) -> Self {
|
||||
Self::CmudictError(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
impl From<std::ffi::NulError> for G2PError {
|
||||
fn from(value: std::ffi::NulError) -> Self {
|
||||
Self::Nul(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
impl From<std::str::Utf8Error> for G2PError {
|
||||
fn from(value: std::str::Utf8Error) -> Self {
|
||||
Self::Utf8(value)
|
||||
}
|
||||
}
|
||||
|
||||
fn word2ipa_zh(word: &str) -> Result<String, G2PError> {
|
||||
let iter = word.chars().map(|i| match i.to_pinyin() {
|
||||
None => Ok(i.to_string()),
|
||||
Some(p) => v10::py2ipa(p.with_tone_num_end()),
|
||||
});
|
||||
|
||||
let mut result = String::new();
|
||||
for i in iter {
|
||||
result.push_str(&i?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
fn word2ipa_en(word: &str) -> Result<String, G2PError> {
|
||||
use super::{arpa_to_ipa, letters_to_ipa};
|
||||
use std::{
|
||||
io::{Error as IoError, ErrorKind},
|
||||
str::FromStr,
|
||||
sync::LazyLock,
|
||||
};
|
||||
|
||||
fn get_cmudict<'a>() -> Result<&'a Cmudict, CmudictError> {
|
||||
static CMUDICT: LazyLock<Result<Cmudict, CmudictError>> =
|
||||
LazyLock::new(|| Cmudict::from_str(include_str!("../dict/cmudict.dict")));
|
||||
CMUDICT.as_ref().map_err(|i| match i {
|
||||
CmudictError::IoErr(e) => CmudictError::IoErr(IoError::new(ErrorKind::Other, e)),
|
||||
CmudictError::InvalidLine(e) => CmudictError::InvalidLine(*e),
|
||||
CmudictError::RuleParseError(e) => CmudictError::RuleParseError(e.clone()),
|
||||
})
|
||||
}
|
||||
|
||||
if word.chars().count() < 4 && word.chars().all(|c| c.is_ascii_uppercase()) {
|
||||
return Ok(letters_to_ipa(word));
|
||||
}
|
||||
|
||||
let dict = get_cmudict()?;
|
||||
let upper = word.to_ascii_uppercase();
|
||||
let lower = word.to_ascii_lowercase();
|
||||
let Some(rules) = dict
|
||||
.get(word)
|
||||
.or_else(|| dict.get(&upper))
|
||||
.or_else(|| dict.get(&lower))
|
||||
else {
|
||||
return Ok(letters_to_ipa(word));
|
||||
};
|
||||
if rules.is_empty() {
|
||||
return Ok(word.to_owned());
|
||||
}
|
||||
let i = rand::random_range(0..rules.len());
|
||||
let result = rules[i]
|
||||
.pronunciation()
|
||||
.iter()
|
||||
.map(|i| arpa_to_ipa(&i.to_string()).unwrap_or_default())
|
||||
.collect::<String>();
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
fn word2ipa_en(word: &str) -> Result<String, G2PError> {
|
||||
use super::letters_to_ipa;
|
||||
use std::{
|
||||
ffi::{CStr, CString, c_char},
|
||||
sync::Once,
|
||||
};
|
||||
|
||||
if word.chars().count() < 4 && word.chars().all(|c| c.is_ascii_uppercase()) {
|
||||
return Ok(letters_to_ipa(word));
|
||||
}
|
||||
|
||||
unsafe extern "C" {
|
||||
fn TextToPhonemes(text: *const c_char) -> *const ::std::os::raw::c_char;
|
||||
fn Initialize(data_dictlist: *const c_char);
|
||||
}
|
||||
|
||||
unsafe {
|
||||
static INIT: Once = Once::new();
|
||||
INIT.call_once(|| {
|
||||
static DATA: &[u8] = include_bytes!("../dict/espeak.dict");
|
||||
Initialize(DATA.as_ptr() as _);
|
||||
});
|
||||
|
||||
let word = CString::new(word.to_lowercase())?.into_raw() as *const c_char;
|
||||
let res = TextToPhonemes(word);
|
||||
Ok(CStr::from_ptr(res).to_str()?.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn to_half_shape(text: &str) -> String {
|
||||
let mut result = String::with_capacity(text.len() * 2); // 预分配合理空间
|
||||
let chars = text.chars().peekable();
|
||||
|
||||
for c in chars {
|
||||
match c {
|
||||
// 处理需要后看的情况
|
||||
'«' | '《' => result.push('“'),
|
||||
'»' | '》' => result.push('”'),
|
||||
'(' => result.push('('),
|
||||
')' => result.push(')'),
|
||||
// 简单替换规则
|
||||
'、' | ',' => result.push(','),
|
||||
'。' => result.push('.'),
|
||||
'!' => result.push('!'),
|
||||
':' => result.push(':'),
|
||||
';' => result.push(';'),
|
||||
'?' => result.push('?'),
|
||||
// 默认字符
|
||||
_ => result.push(c),
|
||||
}
|
||||
}
|
||||
|
||||
// 清理多余空格并返回
|
||||
result
|
||||
}
|
||||
|
||||
fn num_repr(text: &str) -> Result<String, G2PError> {
|
||||
let regex = Regex::new(r#"\d+(\.\d+)?"#)?;
|
||||
Ok(regex
|
||||
.replace(text, |caps: &Captures| {
|
||||
let text = &caps[0];
|
||||
if let Ok(num) = text.parse::<f64>() {
|
||||
num.to_chinese(
|
||||
ChineseVariant::Traditional,
|
||||
ChineseCase::Lower,
|
||||
ChineseCountMethod::Low,
|
||||
)
|
||||
.map_or(text.to_owned(), |i| i)
|
||||
} else if let Ok(num) = text.parse::<i64>() {
|
||||
num.to_chinese(
|
||||
ChineseVariant::Traditional,
|
||||
ChineseCase::Lower,
|
||||
ChineseCountMethod::Low,
|
||||
)
|
||||
.map_or(text.to_owned(), |i| i)
|
||||
} else {
|
||||
text.to_owned()
|
||||
}
|
||||
})
|
||||
.to_string())
|
||||
}
|
||||
|
||||
pub fn g2p(text: &str, use_v11: bool) -> Result<String, G2PError> {
|
||||
let text = num_repr(text)?;
|
||||
let sentence_pattern = Regex::new(
|
||||
r#"([\u4E00-\u9FFF]+)|([,。:·?、!《》()【】〖〗〔〕“”‘’〈〉…— ]+)|([\u0000-\u00FF]+)+"#,
|
||||
)?;
|
||||
let en_word_pattern = Regex::new("\\w+|\\W+")?;
|
||||
let jieba = jieba_rs::Jieba::new();
|
||||
let mut result = String::new();
|
||||
for i in sentence_pattern.captures_iter(&text) {
|
||||
match (i.get(1), i.get(2), i.get(3)) {
|
||||
(Some(text), _, _) => {
|
||||
let text = to_half_shape(text.as_str());
|
||||
if use_v11 {
|
||||
if !result.is_empty() && !result.ends_with(' ') {
|
||||
result.push(' ');
|
||||
}
|
||||
result.push_str(&v11::g2p(&text, true));
|
||||
result.push(' ');
|
||||
} else {
|
||||
for i in jieba.cut(&text, true) {
|
||||
result.push_str(&word2ipa_zh(i)?);
|
||||
result.push(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
(_, Some(text), _) => {
|
||||
let text = to_half_shape(text.as_str());
|
||||
result = result.trim_end().to_string();
|
||||
result.push_str(&text);
|
||||
result.push(' ');
|
||||
}
|
||||
(_, _, Some(text)) => {
|
||||
for i in en_word_pattern.captures_iter(text.as_str()) {
|
||||
let c = (i[0]).chars().next().unwrap_or_default();
|
||||
if c == '\''
|
||||
|| c == '_'
|
||||
|| c == '-'
|
||||
|| c.is_ascii_lowercase()
|
||||
|| c.is_ascii_uppercase()
|
||||
{
|
||||
let i = &i[0];
|
||||
if result.trim_end().ends_with(['.', ',', '!', '?'])
|
||||
&& !result.ends_with(' ')
|
||||
{
|
||||
result.push(' ');
|
||||
}
|
||||
result.push_str(&word2ipa_en(i)?);
|
||||
} else if c == ' ' && result.ends_with(' ') {
|
||||
result.push_str((i[0]).trim_start());
|
||||
} else {
|
||||
result.push_str(&i[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
|
||||
Ok(result.trim().to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[cfg(not(feature = "use-cmudict"))]
|
||||
#[test]
|
||||
fn test_word2ipa_en() -> Result<(), super::G2PError> {
|
||||
use super::word2ipa_en;
|
||||
|
||||
// println!("{:?}", espeak_rs::text_to_phonemes("days", "en", None, true, false));
|
||||
assert_eq!("kjˌuːkjˈuː", word2ipa_en("qq")?);
|
||||
assert_eq!("həlˈəʊ", word2ipa_en("hello")?);
|
||||
assert_eq!("wˈɜːld", word2ipa_en("world")?);
|
||||
assert_eq!("ˈapəl", word2ipa_en("apple")?);
|
||||
assert_eq!("tʃˈɪldɹɛn", word2ipa_en("children")?);
|
||||
assert_eq!("ˈaʊə", word2ipa_en("hour")?);
|
||||
assert_eq!("dˈeɪz", word2ipa_en("days")?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "use-cmudict")]
|
||||
#[test]
|
||||
fn test_word2ipa_en_is_case_insensitive_for_dictionary_words() -> Result<(), super::G2PError> {
|
||||
use super::word2ipa_en;
|
||||
|
||||
assert_eq!(word2ipa_en("Welcome")?, word2ipa_en("welcome")?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_g2p() -> Result<(), super::G2PError> {
|
||||
use super::g2p;
|
||||
|
||||
assert_eq!("ni↓xau↓ ʂɻ↘ʨje↘", g2p("你好世界", false)?);
|
||||
assert_eq!("ㄋㄧ2ㄏㄠ3/ㄕ十4ㄐㄝ4", g2p("你好世界", true)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
+62
@@ -0,0 +1,62 @@
|
||||
use crate::{G2PError, pinyin_to_ipa};
|
||||
|
||||
fn retone(p: &str) -> String {
|
||||
let chars: Vec<char> = p.chars().collect();
|
||||
let mut result = String::with_capacity(p.len());
|
||||
let mut i = 0;
|
||||
|
||||
while i < chars.len() {
|
||||
match () {
|
||||
// 三声调优先处理
|
||||
_ if i + 2 < chars.len()
|
||||
&& chars[i] == '˧'
|
||||
&& chars[i + 1] == '˩'
|
||||
&& chars[i + 2] == '˧' =>
|
||||
{
|
||||
result.push('↓');
|
||||
i += 3;
|
||||
}
|
||||
// 二声调
|
||||
_ if i + 1 < chars.len() && chars[i] == '˧' && chars[i + 1] == '˥' => {
|
||||
result.push('↗');
|
||||
i += 2;
|
||||
}
|
||||
// 四声调
|
||||
_ if i + 1 < chars.len() && chars[i] == '˥' && chars[i + 1] == '˩' => {
|
||||
result.push('↘');
|
||||
i += 2;
|
||||
}
|
||||
// 一声调
|
||||
_ if chars[i] == '˥' => {
|
||||
result.push('→');
|
||||
i += 1;
|
||||
}
|
||||
// 组合字符替换(ɻ̩ 和 ɱ̩)
|
||||
_ if !(i + 1 >= chars.len() || chars[i+1] != '\u{0329}' || chars[i] != '\u{027B}' && chars[i] != '\u{0271}') =>
|
||||
{
|
||||
result.push('ɨ');
|
||||
i += 2;
|
||||
}
|
||||
// 默认情况
|
||||
_ => {
|
||||
result.push(chars[i]);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
!result.contains('\u{0329}'),
|
||||
"Unexpected combining mark in: {}",
|
||||
result
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
pub(super) fn py2ipa(py: &str) -> Result<String, G2PError> {
|
||||
pinyin_to_ipa(py)?
|
||||
.first()
|
||||
.map_or(Err(G2PError::EnptyData), |i| {
|
||||
Ok(i.iter().map(|i| retone(i)).collect::<String>())
|
||||
})
|
||||
}
|
||||
+1263
File diff suppressed because it is too large
Load Diff
Vendored
+83
@@ -0,0 +1,83 @@
|
||||
mod error;
|
||||
mod g2p;
|
||||
mod stream;
|
||||
mod synthesizer;
|
||||
mod tokenizer;
|
||||
mod transcription;
|
||||
mod voice;
|
||||
|
||||
use {
|
||||
bincode::{config::standard, decode_from_slice},
|
||||
ort::{execution_providers::CUDAExecutionProvider, session::Session},
|
||||
std::{collections::HashMap, path::Path, sync::Arc, time::Duration},
|
||||
tokio::{fs::read, sync::Mutex},
|
||||
};
|
||||
pub use {error::*, g2p::*, stream::*, tokenizer::*, transcription::*, voice::*};
|
||||
|
||||
pub struct KokoroTts {
|
||||
model: Arc<Mutex<Session>>,
|
||||
voices: Arc<HashMap<String, Vec<Vec<Vec<f32>>>>>,
|
||||
}
|
||||
|
||||
impl KokoroTts {
|
||||
pub async fn new<P: AsRef<Path>>(model_path: P, voices_path: P) -> Result<Self, KokoroError> {
|
||||
let voices = read(voices_path).await?;
|
||||
let (voices, _) = decode_from_slice(&voices, standard())?;
|
||||
|
||||
let model = Session::builder()?
|
||||
.with_execution_providers([CUDAExecutionProvider::default().build()])?
|
||||
.commit_from_file(model_path)?;
|
||||
Ok(Self {
|
||||
model: Arc::new(model.into()),
|
||||
voices,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn new_from_bytes<B>(model: B, voices: B) -> Result<Self, KokoroError>
|
||||
where
|
||||
B: AsRef<[u8]>,
|
||||
{
|
||||
let (voices, _) = decode_from_slice(voices.as_ref(), standard())?;
|
||||
|
||||
let model = Session::builder()?
|
||||
.with_execution_providers([CUDAExecutionProvider::default().build()])?
|
||||
.commit_from_memory(model.as_ref())?;
|
||||
Ok(Self {
|
||||
model: Arc::new(model.into()),
|
||||
voices,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn synth<S>(&self, text: S, voice: Voice) -> Result<(Vec<f32>, Duration), KokoroError>
|
||||
where
|
||||
S: AsRef<str>,
|
||||
{
|
||||
let name = voice.get_name();
|
||||
let pack = self
|
||||
.voices
|
||||
.get(name)
|
||||
.ok_or(KokoroError::VoiceNotFound(name.to_owned()))?;
|
||||
synthesizer::synth(Arc::downgrade(&self.model), text, pack, voice).await
|
||||
}
|
||||
|
||||
pub fn stream<S>(&self, voice: Voice) -> (SynthSink<S>, SynthStream)
|
||||
where
|
||||
S: AsRef<str> + Send + 'static,
|
||||
{
|
||||
let voices = Arc::downgrade(&self.voices);
|
||||
let model = Arc::downgrade(&self.model);
|
||||
|
||||
start_synth_session(voice, move |text, voice| {
|
||||
let voices = voices.clone();
|
||||
let model = model.clone();
|
||||
async move {
|
||||
let name = voice.get_name();
|
||||
let voices = voices.upgrade().ok_or(KokoroError::ModelReleased)?;
|
||||
let pack = voices
|
||||
.get(name)
|
||||
.ok_or(KokoroError::VoiceNotFound(name.to_owned()))?;
|
||||
synthesizer::synth(model, text, pack, voice).await
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Vendored
+157
@@ -0,0 +1,157 @@
|
||||
use {
|
||||
crate::{KokoroError, Voice},
|
||||
futures::{Sink, SinkExt, Stream},
|
||||
pin_project::pin_project,
|
||||
std::{
|
||||
pin::Pin,
|
||||
task::{Context, Poll},
|
||||
time::Duration,
|
||||
},
|
||||
tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
|
||||
};
|
||||
|
||||
struct Request<S> {
|
||||
voice: Voice,
|
||||
text: S,
|
||||
}
|
||||
|
||||
struct Response {
|
||||
data: Vec<f32>,
|
||||
took: Duration,
|
||||
}
|
||||
|
||||
/// 语音合成流
|
||||
///
|
||||
/// 该结构体用于通过流式合成来处理更长的文本。它实现了`Stream` trait,可以用于异步迭代合成后的音频数据。
|
||||
#[pin_project]
|
||||
pub struct SynthStream {
|
||||
#[pin]
|
||||
rx: UnboundedReceiver<Response>,
|
||||
}
|
||||
|
||||
impl Stream for SynthStream {
|
||||
type Item = (Vec<f32>, Duration);
|
||||
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
Pin::new(&mut self.project().rx)
|
||||
.poll_recv(cx)
|
||||
.map(|i| i.map(|Response { data, took }| (data, took)))
|
||||
}
|
||||
}
|
||||
|
||||
/// 语音合成发送端
|
||||
///
|
||||
/// 该结构体用于发送语音合成请求。它实现了`Sink` trait,可以用于异步发送合成请求。
|
||||
#[pin_project]
|
||||
pub struct SynthSink<S> {
|
||||
tx: UnboundedSender<Request<S>>,
|
||||
voice: Voice,
|
||||
}
|
||||
|
||||
impl<S> SynthSink<S> {
|
||||
/// 设置语音名称
|
||||
///
|
||||
/// 该方法用于设置要合成的语音名称。
|
||||
///
|
||||
/// # 参数
|
||||
///
|
||||
/// * `voice_name` - 语音名称,用于选择要合成的语音。
|
||||
///
|
||||
/// # 示例
|
||||
///
|
||||
/// ```rust
|
||||
/// use kokoro_tts::{KokoroTts, Voice};
|
||||
///
|
||||
/// #[tokio::main]
|
||||
/// async fn main() {
|
||||
/// let Ok(tts) = KokoroTts::new("../kokoro-v1.0.int8.onnx", "../voices.bin").await else {
|
||||
/// return;
|
||||
/// };
|
||||
/// // speed: 1.0
|
||||
/// let (mut sink, _) = tts.stream::<&str>(Voice::ZfXiaoxiao(1.0));
|
||||
/// // speed: 1.8
|
||||
/// sink.set_voice(Voice::ZmYunxi(1.8));
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
pub fn set_voice(&mut self, voice: Voice) {
|
||||
self.voice = voice
|
||||
}
|
||||
|
||||
/// 发送合成请求
|
||||
///
|
||||
/// 该方法用于发送语音合成请求。
|
||||
///
|
||||
/// # 参数
|
||||
///
|
||||
/// * `text` - 要合成的文本内容。
|
||||
///
|
||||
/// # 返回值
|
||||
///
|
||||
/// 如果发送成功,将返回`Ok(())`;如果发送失败,将返回一个`KokoroError`类型的错误。
|
||||
///
|
||||
/// # 示例
|
||||
///
|
||||
/// ```rust
|
||||
/// use kokoro_tts::{KokoroTts, Voice};
|
||||
///
|
||||
/// #[tokio::main]
|
||||
/// async fn main() {
|
||||
/// let Ok(tts) = KokoroTts::new("../kokoro-v1.1-zh.onnx", "../voices-v1.1-zh.bin").await else {
|
||||
/// return;
|
||||
/// };
|
||||
/// let (mut sink, _) =tts.stream(Voice::Zf003(2));
|
||||
/// let _ = sink.synth("hello world.").await;
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
pub async fn synth(&mut self, text: S) -> Result<(), KokoroError> {
|
||||
self.send((self.voice, text)).await
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> Sink<(Voice, S)> for SynthSink<S> {
|
||||
type Error = KokoroError;
|
||||
|
||||
fn poll_ready(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
|
||||
fn start_send(self: Pin<&mut Self>, (voice, text): (Voice, S)) -> Result<(), Self::Error> {
|
||||
self.tx
|
||||
.send(Request { voice, text })
|
||||
.map_err(|e| KokoroError::Send(e.to_string()))
|
||||
}
|
||||
|
||||
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
|
||||
fn poll_close(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn start_synth_session<F, R, S>(
|
||||
voice: Voice,
|
||||
synth_request_callback: F,
|
||||
) -> (SynthSink<S>, SynthStream)
|
||||
where
|
||||
F: Fn(S, Voice) -> R + Send + 'static,
|
||||
R: Future<Output = Result<(Vec<f32>, Duration), KokoroError>> + Send,
|
||||
S: AsRef<str> + Send + 'static,
|
||||
{
|
||||
let (tx, mut rx) = unbounded_channel::<Request<S>>();
|
||||
let (tx2, rx2) = unbounded_channel();
|
||||
tokio::spawn(async move {
|
||||
while let Some(req) = rx.recv().await {
|
||||
let (data, took) = synth_request_callback(req.text, req.voice).await?;
|
||||
tx2.send(Response { data, took })
|
||||
.map_err(|e| KokoroError::Send(e.to_string()))?;
|
||||
}
|
||||
|
||||
Ok::<_, KokoroError>(())
|
||||
});
|
||||
|
||||
(SynthSink { tx, voice }, SynthStream { rx: rx2 })
|
||||
}
|
||||
+123
@@ -0,0 +1,123 @@
|
||||
use {
|
||||
crate::{KokoroError, Voice, g2p, get_token_ids},
|
||||
ndarray::Array,
|
||||
ort::{
|
||||
inputs,
|
||||
session::{RunOptions, Session},
|
||||
value::TensorRef,
|
||||
},
|
||||
std::{
|
||||
cmp::min,
|
||||
sync::Weak,
|
||||
time::{Duration, SystemTime},
|
||||
},
|
||||
tokio::sync::Mutex,
|
||||
};
|
||||
|
||||
async fn synth_v10<P, S>(
|
||||
model: Weak<Mutex<Session>>,
|
||||
phonemes: S,
|
||||
pack: P,
|
||||
speed: f32,
|
||||
) -> Result<(Vec<f32>, Duration), KokoroError>
|
||||
where
|
||||
P: AsRef<Vec<Vec<Vec<f32>>>>,
|
||||
S: AsRef<str>,
|
||||
{
|
||||
let model = model.upgrade().ok_or(KokoroError::ModelReleased)?;
|
||||
let phonemes = get_token_ids(phonemes.as_ref(), false);
|
||||
let phonemes = Array::from_shape_vec((1, phonemes.len()), phonemes)?;
|
||||
let ref_s = pack.as_ref()[phonemes.len() - 1]
|
||||
.first()
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
let style = Array::from_shape_vec((1, ref_s.len()), ref_s)?;
|
||||
let speed = Array::from_vec(vec![speed]);
|
||||
let options = RunOptions::new()?;
|
||||
let mut model = model.lock().await;
|
||||
let t = SystemTime::now();
|
||||
let kokoro_output = model
|
||||
.run_async(
|
||||
inputs![
|
||||
"tokens" => TensorRef::from_array_view(&phonemes)?,
|
||||
"style" => TensorRef::from_array_view(&style)?,
|
||||
"speed" => TensorRef::from_array_view(&speed)?,
|
||||
],
|
||||
&options,
|
||||
)?
|
||||
.await?;
|
||||
let elapsed = t.elapsed()?;
|
||||
let (_, audio) = kokoro_output["audio"].try_extract_tensor::<f32>()?;
|
||||
|
||||
Ok((audio.to_owned(), elapsed))
|
||||
}
|
||||
|
||||
async fn synth_v11<P, S>(
|
||||
model: Weak<Mutex<Session>>,
|
||||
phonemes: S,
|
||||
pack: P,
|
||||
speed: i32,
|
||||
) -> Result<(Vec<f32>, Duration), KokoroError>
|
||||
where
|
||||
P: AsRef<Vec<Vec<Vec<f32>>>>,
|
||||
S: AsRef<str>,
|
||||
{
|
||||
let model = model.upgrade().ok_or(KokoroError::ModelReleased)?;
|
||||
let mut phonemes = get_token_ids(phonemes.as_ref(), true);
|
||||
|
||||
let mut ret = Vec::new();
|
||||
let mut elapsed = Duration::ZERO;
|
||||
while let p = phonemes.drain(..min(pack.as_ref().len(), phonemes.len()))
|
||||
&& p.len() != 0
|
||||
{
|
||||
let phonemes = Array::from_shape_vec((1, p.len()), p.collect())?;
|
||||
let ref_s = pack.as_ref()[phonemes.len() - 1]
|
||||
.first()
|
||||
.cloned()
|
||||
.unwrap_or(vec![0.; 256]);
|
||||
|
||||
let style = Array::from_shape_vec((1, ref_s.len()), ref_s)?;
|
||||
let speed = Array::from_vec(vec![speed]);
|
||||
let options = RunOptions::new()?;
|
||||
let mut model = model.lock().await;
|
||||
let t = SystemTime::now();
|
||||
let kokoro_output = model
|
||||
.run_async(
|
||||
inputs![
|
||||
"input_ids" => TensorRef::from_array_view(&phonemes)?,
|
||||
"style" => TensorRef::from_array_view(&style)?,
|
||||
"speed" => TensorRef::from_array_view(&speed)?,
|
||||
],
|
||||
&options,
|
||||
)?
|
||||
.await?;
|
||||
elapsed = t.elapsed()?;
|
||||
let (_, audio) = kokoro_output["waveform"].try_extract_tensor::<f32>()?;
|
||||
let (_, _duration) = kokoro_output["duration"].try_extract_tensor::<i64>()?;
|
||||
// let _ = dbg!(duration.len());
|
||||
ret.extend_from_slice(audio);
|
||||
}
|
||||
|
||||
Ok((ret, elapsed))
|
||||
}
|
||||
|
||||
pub(super) async fn synth<P, S>(
|
||||
model: Weak<Mutex<Session>>,
|
||||
text: S,
|
||||
pack: P,
|
||||
voice: Voice,
|
||||
) -> Result<(Vec<f32>, Duration), KokoroError>
|
||||
where
|
||||
P: AsRef<Vec<Vec<Vec<f32>>>>,
|
||||
S: AsRef<str>,
|
||||
{
|
||||
let phonemes = g2p(text.as_ref(), voice.is_v11_supported())?;
|
||||
// #[cfg(debug_assertions)]
|
||||
// println!("{}", phonemes);
|
||||
match voice {
|
||||
v if v.is_v11_supported() => synth_v11(model, phonemes, pack, v.get_speed_v11()?).await,
|
||||
v if v.is_v10_supported() => synth_v10(model, phonemes, pack, v.get_speed_v10()?).await,
|
||||
v => Err(KokoroError::VoiceVersionInvalid(v.get_name().to_owned())),
|
||||
}
|
||||
}
|
||||
+324
@@ -0,0 +1,324 @@
|
||||
use {
|
||||
log::warn,
|
||||
std::{collections::HashMap, sync::LazyLock},
|
||||
};
|
||||
static VOCAB_V10: LazyLock<HashMap<char, u8>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
|
||||
map.insert(';', 1);
|
||||
map.insert(':', 2);
|
||||
map.insert(',', 3);
|
||||
map.insert('.', 4);
|
||||
map.insert('!', 5);
|
||||
map.insert('?', 6);
|
||||
map.insert('—', 9);
|
||||
map.insert('…', 10);
|
||||
map.insert('"', 11);
|
||||
map.insert('(', 12);
|
||||
map.insert(')', 13);
|
||||
map.insert('“', 14);
|
||||
map.insert('”', 15);
|
||||
map.insert(' ', 16);
|
||||
map.insert('\u{0303}', 17); // Unicode escape for combining tilde
|
||||
map.insert('ʣ', 18);
|
||||
map.insert('ʥ', 19);
|
||||
map.insert('ʦ', 20);
|
||||
map.insert('ʨ', 21);
|
||||
map.insert('ᵝ', 22);
|
||||
map.insert('\u{AB67}', 23); // Unicode escape
|
||||
map.insert('A', 24);
|
||||
map.insert('I', 25);
|
||||
map.insert('O', 31);
|
||||
map.insert('Q', 33);
|
||||
map.insert('S', 35);
|
||||
map.insert('T', 36);
|
||||
map.insert('W', 39);
|
||||
map.insert('Y', 41);
|
||||
map.insert('ᵊ', 42);
|
||||
map.insert('a', 43);
|
||||
map.insert('b', 44);
|
||||
map.insert('c', 45);
|
||||
map.insert('d', 46);
|
||||
map.insert('e', 47);
|
||||
map.insert('f', 48);
|
||||
map.insert('h', 50);
|
||||
map.insert('i', 51);
|
||||
map.insert('j', 52);
|
||||
map.insert('k', 53);
|
||||
map.insert('l', 54);
|
||||
map.insert('m', 55);
|
||||
map.insert('n', 56);
|
||||
map.insert('o', 57);
|
||||
map.insert('p', 58);
|
||||
map.insert('q', 59);
|
||||
map.insert('r', 60);
|
||||
map.insert('s', 61);
|
||||
map.insert('t', 62);
|
||||
map.insert('u', 63);
|
||||
map.insert('v', 64);
|
||||
map.insert('w', 65);
|
||||
map.insert('x', 66);
|
||||
map.insert('y', 67);
|
||||
map.insert('z', 68);
|
||||
map.insert('ɑ', 69);
|
||||
map.insert('ɐ', 70);
|
||||
map.insert('ɒ', 71);
|
||||
map.insert('æ', 72);
|
||||
map.insert('β', 75);
|
||||
map.insert('ɔ', 76);
|
||||
map.insert('ɕ', 77);
|
||||
map.insert('ç', 78);
|
||||
map.insert('ɖ', 80);
|
||||
map.insert('ð', 81);
|
||||
map.insert('ʤ', 82);
|
||||
map.insert('ə', 83);
|
||||
map.insert('ɚ', 85);
|
||||
map.insert('ɛ', 86);
|
||||
map.insert('ɜ', 87);
|
||||
map.insert('ɟ', 90);
|
||||
map.insert('ɡ', 92);
|
||||
map.insert('ɥ', 99);
|
||||
map.insert('ɨ', 101);
|
||||
map.insert('ɪ', 102);
|
||||
map.insert('ʝ', 103);
|
||||
map.insert('ɯ', 110);
|
||||
map.insert('ɰ', 111);
|
||||
map.insert('ŋ', 112);
|
||||
map.insert('ɳ', 113);
|
||||
map.insert('ɲ', 114);
|
||||
map.insert('ɴ', 115);
|
||||
map.insert('ø', 116);
|
||||
map.insert('ɸ', 118);
|
||||
map.insert('θ', 119);
|
||||
map.insert('œ', 120);
|
||||
map.insert('ɹ', 123);
|
||||
map.insert('ɾ', 125);
|
||||
map.insert('ɻ', 126);
|
||||
map.insert('ʁ', 128);
|
||||
map.insert('ɽ', 129);
|
||||
map.insert('ʂ', 130);
|
||||
map.insert('ʃ', 131);
|
||||
map.insert('ʈ', 132);
|
||||
map.insert('ʧ', 133);
|
||||
map.insert('ʊ', 135);
|
||||
map.insert('ʋ', 136);
|
||||
map.insert('ʌ', 138);
|
||||
map.insert('ɣ', 139);
|
||||
map.insert('ɤ', 140);
|
||||
map.insert('χ', 142);
|
||||
map.insert('ʎ', 143);
|
||||
map.insert('ʒ', 147);
|
||||
map.insert('ʔ', 148);
|
||||
map.insert('ˈ', 156);
|
||||
map.insert('ˌ', 157);
|
||||
map.insert('ː', 158);
|
||||
map.insert('ʰ', 162);
|
||||
map.insert('ʲ', 164);
|
||||
map.insert('↓', 169);
|
||||
map.insert('→', 171);
|
||||
map.insert('↗', 172);
|
||||
map.insert('↘', 173);
|
||||
map.insert('ᵻ', 177);
|
||||
map
|
||||
});
|
||||
|
||||
static VOCAB_V11: LazyLock<HashMap<char, u8>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
|
||||
map.insert(';', 1);
|
||||
map.insert(':', 2);
|
||||
map.insert(',', 3);
|
||||
map.insert('.', 4);
|
||||
map.insert('!', 5);
|
||||
map.insert('?', 6);
|
||||
map.insert('/', 7);
|
||||
map.insert('—', 9);
|
||||
map.insert('…', 10);
|
||||
map.insert('"', 11);
|
||||
map.insert('(', 12);
|
||||
map.insert(')', 13);
|
||||
map.insert('“', 14);
|
||||
map.insert('”', 15);
|
||||
map.insert(' ', 16);
|
||||
map.insert('\u{0303}', 17); // Unicode escape for combining tilde
|
||||
map.insert('ʣ', 18);
|
||||
map.insert('ʥ', 19);
|
||||
map.insert('ʦ', 20);
|
||||
map.insert('ʨ', 21);
|
||||
map.insert('ᵝ', 22);
|
||||
map.insert('ㄓ', 23);
|
||||
map.insert('A', 24);
|
||||
map.insert('I', 25);
|
||||
map.insert('ㄅ', 30);
|
||||
map.insert('O', 31);
|
||||
map.insert('ㄆ', 32);
|
||||
map.insert('Q', 33);
|
||||
map.insert('R', 34);
|
||||
map.insert('S', 35);
|
||||
map.insert('T', 36);
|
||||
map.insert('ㄇ', 37);
|
||||
map.insert('ㄈ', 38);
|
||||
map.insert('W', 39);
|
||||
map.insert('ㄉ', 40);
|
||||
map.insert('Y', 41);
|
||||
map.insert('ᵊ', 42);
|
||||
map.insert('a', 43);
|
||||
map.insert('b', 44);
|
||||
map.insert('c', 45);
|
||||
map.insert('d', 46);
|
||||
map.insert('e', 47);
|
||||
map.insert('f', 48);
|
||||
map.insert('ㄊ', 49);
|
||||
map.insert('h', 50);
|
||||
map.insert('i', 51);
|
||||
map.insert('j', 52);
|
||||
map.insert('k', 53);
|
||||
map.insert('l', 54);
|
||||
map.insert('m', 55);
|
||||
map.insert('n', 56);
|
||||
map.insert('o', 57);
|
||||
map.insert('p', 58);
|
||||
map.insert('q', 59);
|
||||
map.insert('r', 60);
|
||||
map.insert('s', 61);
|
||||
map.insert('t', 62);
|
||||
map.insert('u', 63);
|
||||
map.insert('v', 64);
|
||||
map.insert('w', 65);
|
||||
map.insert('x', 66);
|
||||
map.insert('y', 67);
|
||||
map.insert('z', 68);
|
||||
map.insert('ɑ', 69);
|
||||
map.insert('ɐ', 70);
|
||||
map.insert('ɒ', 71);
|
||||
map.insert('æ', 72);
|
||||
map.insert('ㄋ', 73);
|
||||
map.insert('ㄌ', 74);
|
||||
map.insert('β', 75);
|
||||
map.insert('ɔ', 76);
|
||||
map.insert('ɕ', 77);
|
||||
map.insert('ç', 78);
|
||||
map.insert('ㄍ', 79);
|
||||
map.insert('ɖ', 80);
|
||||
map.insert('ð', 81);
|
||||
map.insert('ʤ', 82);
|
||||
map.insert('ə', 83);
|
||||
map.insert('ㄎ', 84);
|
||||
map.insert('ㄦ', 85);
|
||||
map.insert('ɛ', 86);
|
||||
map.insert('ɜ', 87);
|
||||
map.insert('ㄏ', 88);
|
||||
map.insert('ㄐ', 89);
|
||||
map.insert('ɟ', 90);
|
||||
map.insert('ㄑ', 91);
|
||||
map.insert('ɡ', 92);
|
||||
map.insert('ㄒ', 93);
|
||||
map.insert('ㄔ', 94);
|
||||
map.insert('ㄕ', 95);
|
||||
map.insert('ㄗ', 96);
|
||||
map.insert('ㄘ', 97);
|
||||
map.insert('ㄙ', 98);
|
||||
map.insert('月', 99);
|
||||
map.insert('ㄚ', 100);
|
||||
map.insert('ɨ', 101);
|
||||
map.insert('ɪ', 102);
|
||||
map.insert('ʝ', 103);
|
||||
map.insert('ㄛ', 104);
|
||||
map.insert('ㄝ', 105);
|
||||
map.insert('ㄞ', 106);
|
||||
map.insert('ㄟ', 107);
|
||||
map.insert('ㄠ', 108);
|
||||
map.insert('ㄡ', 109);
|
||||
map.insert('ɯ', 110);
|
||||
map.insert('ɰ', 111);
|
||||
map.insert('ŋ', 112);
|
||||
map.insert('ɳ', 113);
|
||||
map.insert('ɲ', 114);
|
||||
map.insert('ɴ', 115);
|
||||
map.insert('ø', 116);
|
||||
map.insert('ㄢ', 117);
|
||||
map.insert('ɸ', 118);
|
||||
map.insert('θ', 119);
|
||||
map.insert('œ', 120);
|
||||
map.insert('ㄣ', 121);
|
||||
map.insert('ㄤ', 122);
|
||||
map.insert('ɹ', 123);
|
||||
map.insert('ㄥ', 124);
|
||||
map.insert('ɾ', 125);
|
||||
map.insert('ㄖ', 126);
|
||||
map.insert('ㄧ', 127);
|
||||
map.insert('ʁ', 128);
|
||||
map.insert('ɽ', 129);
|
||||
map.insert('ʂ', 130);
|
||||
map.insert('ʃ', 131);
|
||||
map.insert('ʈ', 132);
|
||||
map.insert('ʧ', 133);
|
||||
map.insert('ㄨ', 134);
|
||||
map.insert('ʊ', 135);
|
||||
map.insert('ʋ', 136);
|
||||
map.insert('ㄩ', 137);
|
||||
map.insert('ʌ', 138);
|
||||
map.insert('ɣ', 139);
|
||||
map.insert('ㄜ', 140);
|
||||
map.insert('ㄭ', 141);
|
||||
map.insert('χ', 142);
|
||||
map.insert('ʎ', 143);
|
||||
map.insert('十', 144);
|
||||
map.insert('压', 145);
|
||||
map.insert('言', 146);
|
||||
map.insert('ʒ', 147);
|
||||
map.insert('ʔ', 148);
|
||||
map.insert('阳', 149);
|
||||
map.insert('要', 150);
|
||||
map.insert('阴', 151);
|
||||
map.insert('应', 152);
|
||||
map.insert('用', 153);
|
||||
map.insert('又', 154);
|
||||
map.insert('中', 155);
|
||||
map.insert('ˈ', 156);
|
||||
map.insert('ˌ', 157);
|
||||
map.insert('ː', 158);
|
||||
map.insert('穵', 159);
|
||||
map.insert('外', 160);
|
||||
map.insert('万', 161);
|
||||
map.insert('ʰ', 162);
|
||||
map.insert('王', 163);
|
||||
map.insert('ʲ', 164);
|
||||
map.insert('为', 165);
|
||||
map.insert('文', 166);
|
||||
map.insert('瓮', 167);
|
||||
map.insert('我', 168);
|
||||
map.insert('3', 169);
|
||||
map.insert('5', 170);
|
||||
map.insert('1', 171);
|
||||
map.insert('2', 172);
|
||||
map.insert('4', 173);
|
||||
map.insert('元', 175);
|
||||
map.insert('云', 176);
|
||||
map.insert('ᵻ', 177);
|
||||
map
|
||||
});
|
||||
|
||||
pub fn get_token_ids(phonemes: &str, v11: bool) -> Vec<i64> {
|
||||
let mut tokens = Vec::with_capacity(phonemes.len() + 2);
|
||||
tokens.push(0);
|
||||
|
||||
for i in phonemes.chars() {
|
||||
let v = if v11 {
|
||||
VOCAB_V11.get(&i).copied()
|
||||
} else {
|
||||
VOCAB_V10.get(&i).copied()
|
||||
};
|
||||
match v {
|
||||
Some(t) => {
|
||||
tokens.push(t as _);
|
||||
}
|
||||
_ => {
|
||||
warn!("Unknown phone {}, skipped.", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tokens.push(0);
|
||||
tokens
|
||||
}
|
||||
+4
@@ -0,0 +1,4 @@
|
||||
mod en;
|
||||
mod zh;
|
||||
|
||||
pub use {en::*, zh::*};
|
||||
+147
@@ -0,0 +1,147 @@
|
||||
use regex::Regex;
|
||||
use std::{collections::HashMap, sync::LazyLock};
|
||||
|
||||
static LETTERS_IPA_MAP: LazyLock<HashMap<char, &'static str>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert('a', "ɐ");
|
||||
map.insert('b', "bˈi");
|
||||
map.insert('c', "sˈi");
|
||||
map.insert('d', "dˈi");
|
||||
map.insert('e', "ˈi");
|
||||
map.insert('f', "ˈɛf");
|
||||
map.insert('g', "ʤˈi");
|
||||
map.insert('h', "ˈAʧ");
|
||||
map.insert('i', "ˈI");
|
||||
map.insert('j', "ʤˈA");
|
||||
map.insert('k', "kˈA");
|
||||
map.insert('l', "ˈɛl");
|
||||
map.insert('m', "ˈɛm");
|
||||
map.insert('n', "ˈɛn");
|
||||
map.insert('o', "ˈO");
|
||||
map.insert('p', "pˈi");
|
||||
map.insert('q', "kjˈu");
|
||||
map.insert('r', "ˈɑɹ");
|
||||
map.insert('s', "ˈɛs");
|
||||
map.insert('t', "tˈi");
|
||||
map.insert('u', "jˈu");
|
||||
map.insert('v', "vˈi");
|
||||
map.insert('w', "dˈʌbᵊlju");
|
||||
map.insert('x', "ˈɛks");
|
||||
map.insert('y', "wˈI");
|
||||
map.insert('z', "zˈi");
|
||||
map.insert('A', "ˈA");
|
||||
map.insert('B', "bˈi");
|
||||
map.insert('C', "sˈi");
|
||||
map.insert('D', "dˈi");
|
||||
map.insert('E', "ˈi");
|
||||
map.insert('F', "ˈɛf");
|
||||
map.insert('G', "ʤˈi");
|
||||
map.insert('H', "ˈAʧ");
|
||||
map.insert('I', "ˈI");
|
||||
map.insert('J', "ʤˈA");
|
||||
map.insert('K', "kˈA");
|
||||
map.insert('L', "ˈɛl");
|
||||
map.insert('M', "ˈɛm");
|
||||
map.insert('N', "ˈɛn");
|
||||
map.insert('O', "ˈO");
|
||||
map.insert('P', "pˈi");
|
||||
map.insert('Q', "kjˈu");
|
||||
map.insert('R', "ˈɑɹ");
|
||||
map.insert('S', "ˈɛs");
|
||||
map.insert('T', "tˈi");
|
||||
map.insert('U', "jˈu");
|
||||
map.insert('V', "vˈi");
|
||||
map.insert('W', "dˈʌbᵊlju");
|
||||
map.insert('X', "ˈɛks");
|
||||
map.insert('Y', "wˈI");
|
||||
map.insert('Z', "zˈi");
|
||||
map
|
||||
});
|
||||
static ARPA_IPA_MAP: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("AA", "ɑ");
|
||||
map.insert("AE", "æ");
|
||||
map.insert("AH", "ə");
|
||||
map.insert("AO", "ɔ");
|
||||
map.insert("AW", "aʊ");
|
||||
map.insert("AY", "aɪ");
|
||||
map.insert("B", "b");
|
||||
map.insert("CH", "tʃ");
|
||||
map.insert("D", "d");
|
||||
map.insert("DH", "ð");
|
||||
map.insert("EH", "ɛ");
|
||||
map.insert("ER", "ɝ");
|
||||
map.insert("EY", "eɪ");
|
||||
map.insert("F", "f");
|
||||
map.insert("G", "ɡ");
|
||||
map.insert("HH", "h");
|
||||
map.insert("IH", "ɪ");
|
||||
map.insert("IY", "i");
|
||||
map.insert("JH", "dʒ");
|
||||
map.insert("K", "k");
|
||||
map.insert("L", "l");
|
||||
map.insert("M", "m");
|
||||
map.insert("N", "n");
|
||||
map.insert("NG", "ŋ");
|
||||
map.insert("OW", "oʊ");
|
||||
map.insert("OY", "ɔɪ");
|
||||
map.insert("P", "p");
|
||||
map.insert("R", "ɹ");
|
||||
map.insert("S", "s");
|
||||
map.insert("SH", "ʃ");
|
||||
map.insert("T", "t");
|
||||
map.insert("TH", "θ");
|
||||
map.insert("UH", "ʊ");
|
||||
map.insert("UW", "u");
|
||||
map.insert("V", "v");
|
||||
map.insert("W", "w");
|
||||
map.insert("Y", "j");
|
||||
map.insert("Z", "z");
|
||||
map.insert("ZH", "ʒ");
|
||||
map.insert("SIL", "");
|
||||
map
|
||||
});
|
||||
|
||||
/// 支持2025新增符号(如:吸气音ʘ)
|
||||
const SPECIAL_CASES: [(&str, &str); 3] = [("CLICK!", "ʘ"), ("TSK!", "ǀ"), ("TUT!", "ǁ")];
|
||||
|
||||
pub fn arpa_to_ipa(arpa: &str) -> Result<String, regex::Error> {
|
||||
let re = Regex::new(r"([A-Z!]+)(\d*)")?;
|
||||
|
||||
let Some(caps) = re.captures(arpa) else {
|
||||
return Ok(Default::default());
|
||||
};
|
||||
|
||||
// 处理特殊符号(2025新增)
|
||||
if let Some(sc) = SPECIAL_CASES.iter().find(|&&(s, _)| s == &caps[1]) {
|
||||
return Ok(sc.1.to_string());
|
||||
}
|
||||
|
||||
// 获取IPA映射
|
||||
let phoneme = ARPA_IPA_MAP
|
||||
.get(&caps[1])
|
||||
.map_or_else(|| letters_to_ipa(arpa), |i| i.to_string());
|
||||
|
||||
let mut result = String::with_capacity(arpa.len() * 2);
|
||||
// 添加重音标记(支持三级重音)
|
||||
result.push(match &caps[2] {
|
||||
"1" => 'ˈ',
|
||||
"2" => 'ˌ',
|
||||
"3" => '˧', // 2025新增中级重音
|
||||
_ => '\0',
|
||||
});
|
||||
|
||||
result.push_str(&phoneme);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn letters_to_ipa(letters: &str) -> String {
|
||||
let mut res = String::with_capacity(letters.len());
|
||||
for i in letters.chars() {
|
||||
if let Some(p) = LETTERS_IPA_MAP.get(&i) {
|
||||
res.push_str(p);
|
||||
}
|
||||
}
|
||||
res
|
||||
}
|
||||
+3597
File diff suppressed because it is too large
Load Diff
+364
@@ -0,0 +1,364 @@
|
||||
/// 汉语拼音到国际音标的转换
|
||||
/// 参考了python的misaki库的zh.py。
|
||||
use std::{collections::HashMap, error::Error, fmt, sync::LazyLock};
|
||||
|
||||
const VALID_FINALS: [&str; 37] = [
|
||||
"i", "u", "ü", "a", "ia", "ua", "o", "uo", "e", "ie", "üe", "ai", "uai", "ei", "uei", "ao",
|
||||
"iao", "ou", "iou", "an", "ian", "uan", "üan", "en", "in", "uen", "ün", "ang", "iang", "uang",
|
||||
"eng", "ing", "ueng", "ong", "iong", "er", "ê",
|
||||
];
|
||||
const INITIALS: [&str; 21] = [
|
||||
"zh", "ch", "sh", "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s",
|
||||
"t", "x", "z",
|
||||
];
|
||||
|
||||
// 错误类型定义
|
||||
#[derive(Debug)]
|
||||
pub enum PinyinError {
|
||||
FinalNotFound(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for PinyinError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
PinyinError::FinalNotFound(tip) => write!(f, "Final not found: {}", tip),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for PinyinError {}
|
||||
|
||||
static INITIAL_MAPPING: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
|
||||
map.insert("b", vec![vec!["p"]]);
|
||||
map.insert("c", vec![vec!["ʦʰ"]]);
|
||||
map.insert("ch", vec![vec!["ꭧʰ"]]);
|
||||
map.insert("d", vec![vec!["t"]]);
|
||||
map.insert("f", vec![vec!["f"]]);
|
||||
map.insert("g", vec![vec!["k"]]);
|
||||
map.insert("h", vec![vec!["x"], vec!["h"]]);
|
||||
map.insert("j", vec![vec!["ʨ"]]);
|
||||
map.insert("k", vec![vec!["kʰ"]]);
|
||||
map.insert("l", vec![vec!["l"]]);
|
||||
map.insert("m", vec![vec!["m"]]);
|
||||
map.insert("n", vec![vec!["n"]]);
|
||||
map.insert("p", vec![vec!["pʰ"]]);
|
||||
map.insert("q", vec![vec!["ʨʰ"]]);
|
||||
map.insert("r", vec![vec!["ɻ"], vec!["ʐ"]]);
|
||||
map.insert("s", vec![vec!["s"]]);
|
||||
map.insert("sh", vec![vec!["ʂ"]]);
|
||||
map.insert("t", vec![vec!["tʰ"]]);
|
||||
map.insert("x", vec![vec!["ɕ"]]);
|
||||
map.insert("z", vec![vec!["ʦ"]]);
|
||||
map.insert("zh", vec![vec!["ꭧ"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static SYLLABIC_CONSONANT_MAPPINGS: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("hm", vec![vec!["h", "m0"]]);
|
||||
map.insert("hng", vec![vec!["h", "ŋ0"]]);
|
||||
map.insert("m", vec![vec!["m0"]]);
|
||||
map.insert("n", vec![vec!["n0"]]);
|
||||
map.insert("ng", vec![vec!["ŋ0"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static INTERJECTION_MAPPINGS: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("io", vec![vec!["j", "ɔ0"]]);
|
||||
map.insert("ê", vec![vec!["ɛ0"]]);
|
||||
map.insert("er", vec![vec!["ɚ0"], vec!["aɚ̯0"]]);
|
||||
map.insert("o", vec![vec!["ɔ0"]]);
|
||||
map
|
||||
});
|
||||
|
||||
/// Duanmu (2000, p. 37) and Lin (2007, p. 68f)
|
||||
/// Diphtongs from Duanmu (2007, p. 40): au, əu, əi, ai
|
||||
/// Diphthongs from Lin (2007, p. 68f): au̯, ou̯, ei̯, ai̯
|
||||
static FINAL_MAPPING: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("a", vec![vec!["a0"]]);
|
||||
map.insert("ai", vec![vec!["ai0"]]);
|
||||
map.insert("an", vec![vec!["a0", "n"]]);
|
||||
map.insert("ang", vec![vec!["a0", "ŋ"]]);
|
||||
map.insert("ao", vec![vec!["au0"]]);
|
||||
map.insert("e", vec![vec!["ɤ0"]]);
|
||||
map.insert("ei", vec![vec!["ei0"]]);
|
||||
map.insert("en", vec![vec!["ə0", "n"]]);
|
||||
map.insert("eng", vec![vec!["ə0", "ŋ"]]);
|
||||
map.insert("i", vec![vec!["i0"]]);
|
||||
map.insert("ia", vec![vec!["j", "a0"]]);
|
||||
map.insert("ian", vec![vec!["j", "ɛ0", "n"]]);
|
||||
map.insert("iang", vec![vec!["j", "a0", "ŋ"]]);
|
||||
map.insert("iao", vec![vec!["j", "au0"]]);
|
||||
map.insert("ie", vec![vec!["j", "e0"]]);
|
||||
map.insert("in", vec![vec!["i0", "n"]]);
|
||||
map.insert("iou", vec![vec!["j", "ou0"]]);
|
||||
map.insert("ing", vec![vec!["i0", "ŋ"]]);
|
||||
map.insert("iong", vec![vec!["j", "ʊ0", "ŋ"]]);
|
||||
map.insert("ong", vec![vec!["ʊ0", "ŋ"]]);
|
||||
map.insert("ou", vec![vec!["ou0"]]);
|
||||
map.insert("u", vec![vec!["u0"]]);
|
||||
map.insert("uei", vec![vec!["w", "ei0"]]);
|
||||
map.insert("ua", vec![vec!["w", "a0"]]);
|
||||
map.insert("uai", vec![vec!["w", "ai0"]]);
|
||||
map.insert("uan", vec![vec!["w", "a0", "n"]]);
|
||||
map.insert("uen", vec![vec!["w", "ə0", "n"]]);
|
||||
map.insert("uang", vec![vec!["w", "a0", "ŋ"]]);
|
||||
map.insert("ueng", vec![vec!["w", "ə0", "ŋ"]]);
|
||||
map.insert("ui", vec![vec!["w", "ei0"]]);
|
||||
map.insert("un", vec![vec!["w", "ə0", "n"]]);
|
||||
map.insert("uo", vec![vec!["w", "o0"]]);
|
||||
map.insert("o", vec![vec!["w", "o0"]]); // 注意:这里'o'的映射可能与预期不符,根据注释可能需要特殊处理
|
||||
map.insert("ü", vec![vec!["y0"]]);
|
||||
map.insert("üe", vec![vec!["ɥ", "e0"]]);
|
||||
map.insert("üan", vec![vec!["ɥ", "ɛ0", "n"]]);
|
||||
map.insert("ün", vec![vec!["y0", "n"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static FINAL_MAPPING_AFTER_ZH_CH_SH_R: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("i", vec![vec!["ɻ0"], vec!["ʐ0"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static FINAL_MAPPING_AFTER_Z_C_S: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
|
||||
LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("i", vec![vec!["ɹ0"], vec!["z0"]]);
|
||||
map
|
||||
});
|
||||
|
||||
static TONE_MAPPING: LazyLock<HashMap<u8, &'static str>> = LazyLock::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
map.insert(1u8, "˥");
|
||||
map.insert(2u8, "˧˥");
|
||||
map.insert(3u8, "˧˩˧");
|
||||
map.insert(4u8, "˥˩");
|
||||
map.insert(5u8, "");
|
||||
map
|
||||
});
|
||||
|
||||
pub(crate) fn split_tone(pinyin: &str) -> (&str, u8) {
|
||||
if let Some(t) = pinyin
|
||||
.chars()
|
||||
.last()
|
||||
.and_then(|c| c.to_digit(10).map(|n| n as u8))
|
||||
{
|
||||
return (&pinyin[..pinyin.len() - 1], t);
|
||||
}
|
||||
(pinyin, 5)
|
||||
}
|
||||
|
||||
/// uen 转换,还原原始的韵母
|
||||
/// iou,uei,uen前面加声母的时候,写成iu,ui,un。
|
||||
/// 例如niu(牛),gui(归),lun(论)。
|
||||
fn convert_uen(s: &str) -> String {
|
||||
match s.strip_suffix('n') {
|
||||
Some(stem) if stem.ends_with(['u', 'ū', 'ú', 'ǔ', 'ù']) => {
|
||||
format!("{}en", stem)
|
||||
}
|
||||
_ => s.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// ü 转换,还原原始的韵母
|
||||
/// ü行的韵母跟声母j,q,x拼的时候,写成ju(居),qu(区),xu(虚), ü上两点也省略;
|
||||
/// 但是跟声母n,l拼的时候,仍然写成nü(女),lü(吕)
|
||||
fn convert_uv(pinyin: &str) -> String {
|
||||
let chars = pinyin.chars().collect::<Vec<_>>();
|
||||
|
||||
match chars.as_slice() {
|
||||
[
|
||||
c @ ('j' | 'q' | 'x'),
|
||||
tone @ ('u' | 'ū' | 'ú' | 'ǔ' | 'ù'),
|
||||
rest @ ..,
|
||||
] => {
|
||||
let new_tone = match tone {
|
||||
'u' => 'ü',
|
||||
'ū' => 'ǖ',
|
||||
'ú' => 'ǘ',
|
||||
'ǔ' => 'ǚ',
|
||||
'ù' => 'ǜ',
|
||||
_ => unreachable!(),
|
||||
};
|
||||
format!("{}{}{}", c, new_tone, rest.iter().collect::<String>())
|
||||
}
|
||||
_ => pinyin.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// iou 转换,还原原始的韵母
|
||||
/// iou,uei,uen前面加声母的时候,写成iu,ui,un。
|
||||
/// 例如niu(牛),gui(归),lun(论)。
|
||||
fn convert_iou(pinyin: &str) -> String {
|
||||
let chars = pinyin.chars().collect::<Vec<_>>();
|
||||
|
||||
match chars.as_slice() {
|
||||
// 处理 iu 系列
|
||||
[.., 'i', u @ ('u' | 'ū' | 'ú' | 'ǔ' | 'ù')] => {
|
||||
format!("{}o{}", &pinyin[..pinyin.len() - 1], u)
|
||||
}
|
||||
|
||||
// 其他情况保持原样
|
||||
_ => pinyin.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// uei 转换,还原原始的韵母
|
||||
/// iou,uei,uen前面加声母的时候,写成iu,ui,un。
|
||||
/// 例如niu(牛),gui(归),lun(论)。
|
||||
fn convert_uei(pinyin: &str) -> String {
|
||||
let chars = pinyin.chars().collect::<Vec<_>>();
|
||||
|
||||
match chars.as_slice() {
|
||||
// 处理 ui 系列
|
||||
[.., 'u', i @ ('i' | 'ī' | 'í' | 'ǐ' | 'ì')] => {
|
||||
format!("{}e{}", &pinyin[..pinyin.len() - 1], i)
|
||||
}
|
||||
|
||||
// 其他情况保持原样
|
||||
_ => pinyin.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// 零声母转换,还原原始的韵母
|
||||
/// i行的韵母,前面没有声母的时候,写成yi(衣),ya(呀),ye(耶),yao(腰),you(忧),yan(烟),yin(因),yang(央),ying(英),yong(雍)。
|
||||
/// u行的韵母,前面没有声母的时候,写成wu(乌),wa(蛙),wo(窝),wai(歪),wei(威),wan(弯),wen(温),wang(汪),weng(翁)。
|
||||
/// ü行的韵母,前面没有声母的时候,写成yu(迂),yue(约),yuan(冤),yun(晕);ü上两点省略。"""
|
||||
pub(crate) fn convert_zero_consonant(pinyin: &str) -> String {
|
||||
let mut buffer = String::with_capacity(pinyin.len() + 2);
|
||||
let chars: Vec<char> = pinyin.chars().collect();
|
||||
|
||||
match chars.as_slice() {
|
||||
// 处理Y系转换
|
||||
['y', 'u', rest @ ..] => {
|
||||
buffer.push('ü');
|
||||
buffer.extend(rest.iter());
|
||||
}
|
||||
['y', u @ ('ū' | 'ú' | 'ǔ' | 'ù'), rest @ ..] => {
|
||||
buffer.push(match u {
|
||||
'ū' => 'ǖ', // ü 第一声
|
||||
'ú' => 'ǘ', // ü 第二声
|
||||
'ǔ' => 'ǚ', // ü 第三声
|
||||
'ù' => 'ǜ', // ü 第四声
|
||||
_ => unreachable!(),
|
||||
});
|
||||
buffer.extend(rest.iter());
|
||||
}
|
||||
['y', i @ ('i' | 'ī' | 'í' | 'ǐ' | 'ì'), rest @ ..] => {
|
||||
buffer.push(*i);
|
||||
buffer.extend(rest.iter());
|
||||
}
|
||||
['y', rest @ ..] => {
|
||||
buffer.push('i');
|
||||
buffer.extend(rest);
|
||||
}
|
||||
|
||||
// 处理W系转换
|
||||
['w', u @ ('u' | 'ū' | 'ú' | 'ǔ' | 'ù'), rest @ ..] => {
|
||||
buffer.push(*u);
|
||||
buffer.extend(rest.iter());
|
||||
}
|
||||
['w', rest @ ..] => {
|
||||
buffer.push('u');
|
||||
buffer.extend(rest);
|
||||
}
|
||||
|
||||
// 无需转换的情况
|
||||
_ => return pinyin.to_string(),
|
||||
}
|
||||
|
||||
// 有效性验证
|
||||
if VALID_FINALS.contains(&buffer.as_str()) {
|
||||
buffer
|
||||
} else {
|
||||
pinyin.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn split_initial(pinyin: &str) -> (&'static str, &str) {
|
||||
for &initial in &INITIALS {
|
||||
if let Some(stripped) = pinyin.strip_prefix(initial) {
|
||||
return (initial, stripped);
|
||||
}
|
||||
}
|
||||
("", pinyin)
|
||||
}
|
||||
|
||||
fn apply_tone(variants: &[Vec<&str>], tone: u8) -> Vec<Vec<String>> {
|
||||
let tone_str = TONE_MAPPING.get(&tone).unwrap_or(&"");
|
||||
variants
|
||||
.iter()
|
||||
.map(|v| v.iter().map(|s| s.replace("0", tone_str)).collect())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn pinyin_to_ipa(pinyin: &str) -> Result<Vec<Vec<String>>, PinyinError> {
|
||||
let (pinyin, tone) = split_tone(pinyin);
|
||||
let pinyin = convert_zero_consonant(pinyin);
|
||||
let pinyin = convert_uv(&pinyin);
|
||||
let pinyin = convert_iou(&pinyin);
|
||||
let pinyin = convert_uei(&pinyin);
|
||||
let pinyin = convert_uen(&pinyin);
|
||||
|
||||
// 处理特殊成音节辅音和感叹词
|
||||
if let Some(ipa) = SYLLABIC_CONSONANT_MAPPINGS.get(pinyin.as_str()) {
|
||||
return Ok(apply_tone(ipa, tone)
|
||||
.into_iter()
|
||||
.map(|i| i.into_iter().collect())
|
||||
.collect());
|
||||
}
|
||||
if let Some(ipa) = INTERJECTION_MAPPINGS.get(pinyin.as_str()) {
|
||||
return Ok(apply_tone(ipa, tone)
|
||||
.into_iter()
|
||||
.map(|i| i.into_iter().collect())
|
||||
.collect());
|
||||
}
|
||||
|
||||
// 分解声母韵母
|
||||
let (initial_part, final_part) = split_initial(pinyin.as_str());
|
||||
|
||||
// 获取韵母IPA
|
||||
let final_ipa = match initial_part {
|
||||
"zh" | "ch" | "sh" | "r" if FINAL_MAPPING_AFTER_ZH_CH_SH_R.contains_key(final_part) => {
|
||||
FINAL_MAPPING_AFTER_ZH_CH_SH_R.get(final_part)
|
||||
}
|
||||
"z" | "c" | "s" if FINAL_MAPPING_AFTER_Z_C_S.contains_key(final_part) => {
|
||||
FINAL_MAPPING_AFTER_Z_C_S.get(final_part)
|
||||
}
|
||||
_ => FINAL_MAPPING.get(final_part),
|
||||
}
|
||||
.ok_or(PinyinError::FinalNotFound(final_part.to_owned()))?;
|
||||
|
||||
// 组合所有可能
|
||||
let mut result = Vec::<Vec<String>>::new();
|
||||
let initials = INITIAL_MAPPING
|
||||
.get(initial_part)
|
||||
.map_or(vec![vec![Default::default()]], |i| {
|
||||
i.iter()
|
||||
.map(|i| i.iter().map(|i| i.to_string()).collect())
|
||||
.collect()
|
||||
});
|
||||
|
||||
for i in initials.into_iter() {
|
||||
for j in apply_tone(final_ipa, tone).into_iter() {
|
||||
result.push(
|
||||
i.iter()
|
||||
.chain(j.iter())
|
||||
.map(|i| i.to_owned())
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
Vendored
+673
@@ -0,0 +1,673 @@
|
||||
use crate::KokoroError;
|
||||
|
||||
//noinspection SpellCheckingInspection
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum Voice {
|
||||
// v1.0
|
||||
ZmYunyang(f32),
|
||||
ZfXiaoni(f32),
|
||||
AfJessica(f32),
|
||||
BfLily(f32),
|
||||
ZfXiaobei(f32),
|
||||
ZmYunxia(f32),
|
||||
AfHeart(f32),
|
||||
BfEmma(f32),
|
||||
AmPuck(f32),
|
||||
BfAlice(f32),
|
||||
HfAlpha(f32),
|
||||
BfIsabella(f32),
|
||||
AfNova(f32),
|
||||
AmFenrir(f32),
|
||||
EmAlex(f32),
|
||||
ImNicola(f32),
|
||||
PmAlex(f32),
|
||||
AfAlloy(f32),
|
||||
ZmYunxi(f32),
|
||||
AfSarah(f32),
|
||||
JfNezumi(f32),
|
||||
BmDaniel(f32),
|
||||
JfTebukuro(f32),
|
||||
JfAlpha(f32),
|
||||
JmKumo(f32),
|
||||
EmSanta(f32),
|
||||
AmLiam(f32),
|
||||
AmSanta(f32),
|
||||
AmEric(f32),
|
||||
BmFable(f32),
|
||||
AfBella(f32),
|
||||
BmLewis(f32),
|
||||
PfDora(f32),
|
||||
AfNicole(f32),
|
||||
BmGeorge(f32),
|
||||
AmOnyx(f32),
|
||||
HmPsi(f32),
|
||||
HfBeta(f32),
|
||||
HmOmega(f32),
|
||||
ZfXiaoxiao(f32),
|
||||
FfSiwis(f32),
|
||||
EfDora(f32),
|
||||
AfAoede(f32),
|
||||
AmEcho(f32),
|
||||
AmMichael(f32),
|
||||
AfKore(f32),
|
||||
ZfXiaoyi(f32),
|
||||
JfGongitsune(f32),
|
||||
AmAdam(f32),
|
||||
IfSara(f32),
|
||||
AfSky(f32),
|
||||
PmSanta(f32),
|
||||
AfRiver(f32),
|
||||
ZmYunjian(f32),
|
||||
|
||||
// v1.1
|
||||
Zm029(i32),
|
||||
Zf048(i32),
|
||||
Zf008(i32),
|
||||
Zm014(i32),
|
||||
Zf003(i32),
|
||||
Zf047(i32),
|
||||
Zm080(i32),
|
||||
Zf094(i32),
|
||||
Zf046(i32),
|
||||
Zm054(i32),
|
||||
Zf001(i32),
|
||||
Zm062(i32),
|
||||
BfVale(i32),
|
||||
Zf044(i32),
|
||||
Zf005(i32),
|
||||
Zf028(i32),
|
||||
Zf059(i32),
|
||||
Zm030(i32),
|
||||
Zf074(i32),
|
||||
Zm009(i32),
|
||||
Zf004(i32),
|
||||
Zf021(i32),
|
||||
Zm095(i32),
|
||||
Zm041(i32),
|
||||
Zf087(i32),
|
||||
Zf039(i32),
|
||||
Zm031(i32),
|
||||
Zf007(i32),
|
||||
Zf038(i32),
|
||||
Zf092(i32),
|
||||
Zm056(i32),
|
||||
Zf099(i32),
|
||||
Zm010(i32),
|
||||
Zm069(i32),
|
||||
Zm016(i32),
|
||||
Zm068(i32),
|
||||
Zf083(i32),
|
||||
Zf093(i32),
|
||||
Zf006(i32),
|
||||
Zf026(i32),
|
||||
Zm053(i32),
|
||||
Zm064(i32),
|
||||
AfSol(i32),
|
||||
Zf042(i32),
|
||||
Zf084(i32),
|
||||
Zf073(i32),
|
||||
Zf067(i32),
|
||||
Zm025(i32),
|
||||
Zm020(i32),
|
||||
Zm050(i32),
|
||||
Zf070(i32),
|
||||
Zf002(i32),
|
||||
Zf032(i32),
|
||||
Zm091(i32),
|
||||
Zm066(i32),
|
||||
Zm089(i32),
|
||||
Zm034(i32),
|
||||
Zm100(i32),
|
||||
Zf086(i32),
|
||||
Zf040(i32),
|
||||
Zm011(i32),
|
||||
Zm098(i32),
|
||||
Zm015(i32),
|
||||
Zf051(i32),
|
||||
Zm065(i32),
|
||||
Zf076(i32),
|
||||
Zf036(i32),
|
||||
Zm033(i32),
|
||||
Zf018(i32),
|
||||
Zf017(i32),
|
||||
Zf049(i32),
|
||||
AfMaple(i32),
|
||||
Zm082(i32),
|
||||
Zm057(i32),
|
||||
Zf079(i32),
|
||||
Zf022(i32),
|
||||
Zm063(i32),
|
||||
Zf060(i32),
|
||||
Zf019(i32),
|
||||
Zm097(i32),
|
||||
Zm096(i32),
|
||||
Zf023(i32),
|
||||
Zf027(i32),
|
||||
Zf085(i32),
|
||||
Zf077(i32),
|
||||
Zm035(i32),
|
||||
Zf088(i32),
|
||||
Zf024(i32),
|
||||
Zf072(i32),
|
||||
Zm055(i32),
|
||||
Zm052(i32),
|
||||
Zf071(i32),
|
||||
Zm061(i32),
|
||||
Zf078(i32),
|
||||
Zm013(i32),
|
||||
Zm081(i32),
|
||||
Zm037(i32),
|
||||
Zf090(i32),
|
||||
Zf043(i32),
|
||||
Zm058(i32),
|
||||
Zm012(i32),
|
||||
Zm045(i32),
|
||||
Zf075(i32),
|
||||
}
|
||||
|
||||
impl Voice {
|
||||
//noinspection SpellCheckingInspection
|
||||
pub(super) fn get_name(&self) -> &str {
|
||||
match self {
|
||||
Self::ZmYunyang(_) => "zm_yunyang",
|
||||
Self::ZfXiaoni(_) => "zf_xiaoni",
|
||||
Self::AfJessica(_) => "af_jessica",
|
||||
Self::BfLily(_) => "bf_lily",
|
||||
Self::ZfXiaobei(_) => "zf_xiaobei",
|
||||
Self::ZmYunxia(_) => "zm_yunxia",
|
||||
Self::AfHeart(_) => "af_heart",
|
||||
Self::BfEmma(_) => "bf_emma",
|
||||
Self::AmPuck(_) => "am_puck",
|
||||
Self::BfAlice(_) => "bf_alice",
|
||||
Self::HfAlpha(_) => "hf_alpha",
|
||||
Self::BfIsabella(_) => "bf_isabella",
|
||||
Self::AfNova(_) => "af_nova",
|
||||
Self::AmFenrir(_) => "am_fenrir",
|
||||
Self::EmAlex(_) => "em_alex",
|
||||
Self::ImNicola(_) => "im_nicola",
|
||||
Self::PmAlex(_) => "pm_alex",
|
||||
Self::AfAlloy(_) => "af_alloy",
|
||||
Self::ZmYunxi(_) => "zm_yunxi",
|
||||
Self::AfSarah(_) => "af_sarah",
|
||||
Self::JfNezumi(_) => "jf_nezumi",
|
||||
Self::BmDaniel(_) => "bm_daniel",
|
||||
Self::JfTebukuro(_) => "jf_tebukuro",
|
||||
Self::JfAlpha(_) => "jf_alpha",
|
||||
Self::JmKumo(_) => "jm_kumo",
|
||||
Self::EmSanta(_) => "em_santa",
|
||||
Self::AmLiam(_) => "am_liam",
|
||||
Self::AmSanta(_) => "am_santa",
|
||||
Self::AmEric(_) => "am_eric",
|
||||
Self::BmFable(_) => "bm_fable",
|
||||
Self::AfBella(_) => "af_bella",
|
||||
Self::BmLewis(_) => "bm_lewis",
|
||||
Self::PfDora(_) => "pf_dora",
|
||||
Self::AfNicole(_) => "af_nicole",
|
||||
Self::BmGeorge(_) => "bm_george",
|
||||
Self::AmOnyx(_) => "am_onyx",
|
||||
Self::HmPsi(_) => "hm_psi",
|
||||
Self::HfBeta(_) => "hf_beta",
|
||||
Self::HmOmega(_) => "hm_omega",
|
||||
Self::ZfXiaoxiao(_) => "zf_xiaoxiao",
|
||||
Self::FfSiwis(_) => "ff_siwis",
|
||||
Self::EfDora(_) => "ef_dora",
|
||||
Self::AfAoede(_) => "af_aoede",
|
||||
Self::AmEcho(_) => "am_echo",
|
||||
Self::AmMichael(_) => "am_michael",
|
||||
Self::AfKore(_) => "af_kore",
|
||||
Self::ZfXiaoyi(_) => "zf_xiaoyi",
|
||||
Self::JfGongitsune(_) => "jf_gongitsune",
|
||||
Self::AmAdam(_) => "am_adam",
|
||||
Self::IfSara(_) => "if_sara",
|
||||
Self::AfSky(_) => "af_sky",
|
||||
Self::PmSanta(_) => "pm_santa",
|
||||
Self::AfRiver(_) => "af_river",
|
||||
Self::ZmYunjian(_) => "zm_yunjian",
|
||||
Self::Zm029(_) => "zm_029",
|
||||
Self::Zf048(_) => "zf_048",
|
||||
Self::Zf008(_) => "zf_008",
|
||||
Self::Zm014(_) => "zm_014",
|
||||
Self::Zf003(_) => "zf_003",
|
||||
Self::Zf047(_) => "zf_047",
|
||||
Self::Zm080(_) => "zm_080",
|
||||
Self::Zf094(_) => "zf_094",
|
||||
Self::Zf046(_) => "zf_046",
|
||||
Self::Zm054(_) => "zm_054",
|
||||
Self::Zf001(_) => "zf_001",
|
||||
Self::Zm062(_) => "zm_062",
|
||||
Self::BfVale(_) => "bf_vale",
|
||||
Self::Zf044(_) => "zf_044",
|
||||
Self::Zf005(_) => "zf_005",
|
||||
Self::Zf028(_) => "zf_028",
|
||||
Self::Zf059(_) => "zf_059",
|
||||
Self::Zm030(_) => "zm_030",
|
||||
Self::Zf074(_) => "zf_074",
|
||||
Self::Zm009(_) => "zm_009",
|
||||
Self::Zf004(_) => "zf_004",
|
||||
Self::Zf021(_) => "zf_021",
|
||||
Self::Zm095(_) => "zm_095",
|
||||
Self::Zm041(_) => "zm_041",
|
||||
Self::Zf087(_) => "zf_087",
|
||||
Self::Zf039(_) => "zf_039",
|
||||
Self::Zm031(_) => "zm_031",
|
||||
Self::Zf007(_) => "zf_007",
|
||||
Self::Zf038(_) => "zf_038",
|
||||
Self::Zf092(_) => "zf_092",
|
||||
Self::Zm056(_) => "zm_056",
|
||||
Self::Zf099(_) => "zf_099",
|
||||
Self::Zm010(_) => "zm_010",
|
||||
Self::Zm069(_) => "zm_069",
|
||||
Self::Zm016(_) => "zm_016",
|
||||
Self::Zm068(_) => "zm_068",
|
||||
Self::Zf083(_) => "zf_083",
|
||||
Self::Zf093(_) => "zf_093",
|
||||
Self::Zf006(_) => "zf_006",
|
||||
Self::Zf026(_) => "zf_026",
|
||||
Self::Zm053(_) => "zm_053",
|
||||
Self::Zm064(_) => "zm_064",
|
||||
Self::AfSol(_) => "af_sol",
|
||||
Self::Zf042(_) => "zf_042",
|
||||
Self::Zf084(_) => "zf_084",
|
||||
Self::Zf073(_) => "zf_073",
|
||||
Self::Zf067(_) => "zf_067",
|
||||
Self::Zm025(_) => "zm_025",
|
||||
Self::Zm020(_) => "zm_020",
|
||||
Self::Zm050(_) => "zm_050",
|
||||
Self::Zf070(_) => "zf_070",
|
||||
Self::Zf002(_) => "zf_002",
|
||||
Self::Zf032(_) => "zf_032",
|
||||
Self::Zm091(_) => "zm_091",
|
||||
Self::Zm066(_) => "zm_066",
|
||||
Self::Zm089(_) => "zm_089",
|
||||
Self::Zm034(_) => "zm_034",
|
||||
Self::Zm100(_) => "zm_100",
|
||||
Self::Zf086(_) => "zf_086",
|
||||
Self::Zf040(_) => "zf_040",
|
||||
Self::Zm011(_) => "zm_011",
|
||||
Self::Zm098(_) => "zm_098",
|
||||
Self::Zm015(_) => "zm_015",
|
||||
Self::Zf051(_) => "zf_051",
|
||||
Self::Zm065(_) => "zm_065",
|
||||
Self::Zf076(_) => "zf_076",
|
||||
Self::Zf036(_) => "zf_036",
|
||||
Self::Zm033(_) => "zm_033",
|
||||
Self::Zf018(_) => "zf_018",
|
||||
Self::Zf017(_) => "zf_017",
|
||||
Self::Zf049(_) => "zf_049",
|
||||
Self::AfMaple(_) => "af_maple",
|
||||
Self::Zm082(_) => "zm_082",
|
||||
Self::Zm057(_) => "zm_057",
|
||||
Self::Zf079(_) => "zf_079",
|
||||
Self::Zf022(_) => "zf_022",
|
||||
Self::Zm063(_) => "zm_063",
|
||||
Self::Zf060(_) => "zf_060",
|
||||
Self::Zf019(_) => "zf_019",
|
||||
Self::Zm097(_) => "zm_097",
|
||||
Self::Zm096(_) => "zm_096",
|
||||
Self::Zf023(_) => "zf_023",
|
||||
Self::Zf027(_) => "zf_027",
|
||||
Self::Zf085(_) => "zf_085",
|
||||
Self::Zf077(_) => "zf_077",
|
||||
Self::Zm035(_) => "zm_035",
|
||||
Self::Zf088(_) => "zf_088",
|
||||
Self::Zf024(_) => "zf_024",
|
||||
Self::Zf072(_) => "zf_072",
|
||||
Self::Zm055(_) => "zm_055",
|
||||
Self::Zm052(_) => "zm_052",
|
||||
Self::Zf071(_) => "zf_071",
|
||||
Self::Zm061(_) => "zm_061",
|
||||
Self::Zf078(_) => "zf_078",
|
||||
Self::Zm013(_) => "zm_013",
|
||||
Self::Zm081(_) => "zm_081",
|
||||
Self::Zm037(_) => "zm_037",
|
||||
Self::Zf090(_) => "zf_090",
|
||||
Self::Zf043(_) => "zf_043",
|
||||
Self::Zm058(_) => "zm_058",
|
||||
Self::Zm012(_) => "zm_012",
|
||||
Self::Zm045(_) => "zm_045",
|
||||
Self::Zf075(_) => "zf_075",
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn is_v10_supported(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::ZmYunyang(_)
|
||||
| Self::ZfXiaoni(_)
|
||||
| Self::AfJessica(_)
|
||||
| Self::BfLily(_)
|
||||
| Self::ZfXiaobei(_)
|
||||
| Self::ZmYunxia(_)
|
||||
| Self::AfHeart(_)
|
||||
| Self::BfEmma(_)
|
||||
| Self::AmPuck(_)
|
||||
| Self::BfAlice(_)
|
||||
| Self::HfAlpha(_)
|
||||
| Self::BfIsabella(_)
|
||||
| Self::AfNova(_)
|
||||
| Self::AmFenrir(_)
|
||||
| Self::EmAlex(_)
|
||||
| Self::ImNicola(_)
|
||||
| Self::PmAlex(_)
|
||||
| Self::AfAlloy(_)
|
||||
| Self::ZmYunxi(_)
|
||||
| Self::AfSarah(_)
|
||||
| Self::JfNezumi(_)
|
||||
| Self::BmDaniel(_)
|
||||
| Self::JfTebukuro(_)
|
||||
| Self::JfAlpha(_)
|
||||
| Self::JmKumo(_)
|
||||
| Self::EmSanta(_)
|
||||
| Self::AmLiam(_)
|
||||
| Self::AmSanta(_)
|
||||
| Self::AmEric(_)
|
||||
| Self::BmFable(_)
|
||||
| Self::AfBella(_)
|
||||
| Self::BmLewis(_)
|
||||
| Self::PfDora(_)
|
||||
| Self::AfNicole(_)
|
||||
| Self::BmGeorge(_)
|
||||
| Self::AmOnyx(_)
|
||||
| Self::HmPsi(_)
|
||||
| Self::HfBeta(_)
|
||||
| Self::HmOmega(_)
|
||||
| Self::ZfXiaoxiao(_)
|
||||
| Self::FfSiwis(_)
|
||||
| Self::EfDora(_)
|
||||
| Self::AfAoede(_)
|
||||
| Self::AmEcho(_)
|
||||
| Self::AmMichael(_)
|
||||
| Self::AfKore(_)
|
||||
| Self::ZfXiaoyi(_)
|
||||
| Self::JfGongitsune(_)
|
||||
| Self::AmAdam(_)
|
||||
| Self::IfSara(_)
|
||||
| Self::AfSky(_)
|
||||
| Self::PmSanta(_)
|
||||
| Self::AfRiver(_)
|
||||
| Self::ZmYunjian(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn is_v11_supported(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::Zm029(_)
|
||||
| Self::Zf048(_)
|
||||
| Self::Zf008(_)
|
||||
| Self::Zm014(_)
|
||||
| Self::Zf003(_)
|
||||
| Self::Zf047(_)
|
||||
| Self::Zm080(_)
|
||||
| Self::Zf094(_)
|
||||
| Self::Zf046(_)
|
||||
| Self::Zm054(_)
|
||||
| Self::Zf001(_)
|
||||
| Self::Zm062(_)
|
||||
| Self::BfVale(_)
|
||||
| Self::Zf044(_)
|
||||
| Self::Zf005(_)
|
||||
| Self::Zf028(_)
|
||||
| Self::Zf059(_)
|
||||
| Self::Zm030(_)
|
||||
| Self::Zf074(_)
|
||||
| Self::Zm009(_)
|
||||
| Self::Zf004(_)
|
||||
| Self::Zf021(_)
|
||||
| Self::Zm095(_)
|
||||
| Self::Zm041(_)
|
||||
| Self::Zf087(_)
|
||||
| Self::Zf039(_)
|
||||
| Self::Zm031(_)
|
||||
| Self::Zf007(_)
|
||||
| Self::Zf038(_)
|
||||
| Self::Zf092(_)
|
||||
| Self::Zm056(_)
|
||||
| Self::Zf099(_)
|
||||
| Self::Zm010(_)
|
||||
| Self::Zm069(_)
|
||||
| Self::Zm016(_)
|
||||
| Self::Zm068(_)
|
||||
| Self::Zf083(_)
|
||||
| Self::Zf093(_)
|
||||
| Self::Zf006(_)
|
||||
| Self::Zf026(_)
|
||||
| Self::Zm053(_)
|
||||
| Self::Zm064(_)
|
||||
| Self::AfSol(_)
|
||||
| Self::Zf042(_)
|
||||
| Self::Zf084(_)
|
||||
| Self::Zf073(_)
|
||||
| Self::Zf067(_)
|
||||
| Self::Zm025(_)
|
||||
| Self::Zm020(_)
|
||||
| Self::Zm050(_)
|
||||
| Self::Zf070(_)
|
||||
| Self::Zf002(_)
|
||||
| Self::Zf032(_)
|
||||
| Self::Zm091(_)
|
||||
| Self::Zm066(_)
|
||||
| Self::Zm089(_)
|
||||
| Self::Zm034(_)
|
||||
| Self::Zm100(_)
|
||||
| Self::Zf086(_)
|
||||
| Self::Zf040(_)
|
||||
| Self::Zm011(_)
|
||||
| Self::Zm098(_)
|
||||
| Self::Zm015(_)
|
||||
| Self::Zf051(_)
|
||||
| Self::Zm065(_)
|
||||
| Self::Zf076(_)
|
||||
| Self::Zf036(_)
|
||||
| Self::Zm033(_)
|
||||
| Self::Zf018(_)
|
||||
| Self::Zf017(_)
|
||||
| Self::Zf049(_)
|
||||
| Self::AfMaple(_)
|
||||
| Self::Zm082(_)
|
||||
| Self::Zm057(_)
|
||||
| Self::Zf079(_)
|
||||
| Self::Zf022(_)
|
||||
| Self::Zm063(_)
|
||||
| Self::Zf060(_)
|
||||
| Self::Zf019(_)
|
||||
| Self::Zm097(_)
|
||||
| Self::Zm096(_)
|
||||
| Self::Zf023(_)
|
||||
| Self::Zf027(_)
|
||||
| Self::Zf085(_)
|
||||
| Self::Zf077(_)
|
||||
| Self::Zm035(_)
|
||||
| Self::Zf088(_)
|
||||
| Self::Zf024(_)
|
||||
| Self::Zf072(_)
|
||||
| Self::Zm055(_)
|
||||
| Self::Zm052(_)
|
||||
| Self::Zf071(_)
|
||||
| Self::Zm061(_)
|
||||
| Self::Zf078(_)
|
||||
| Self::Zm013(_)
|
||||
| Self::Zm081(_)
|
||||
| Self::Zm037(_)
|
||||
| Self::Zf090(_)
|
||||
| Self::Zf043(_)
|
||||
| Self::Zm058(_)
|
||||
| Self::Zm012(_)
|
||||
| Self::Zm045(_)
|
||||
| Self::Zf075(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn get_speed_v10(&self) -> Result<f32, KokoroError> {
|
||||
match self {
|
||||
Self::ZmYunyang(v)
|
||||
| Self::ZfXiaoni(v)
|
||||
| Self::AfJessica(v)
|
||||
| Self::BfLily(v)
|
||||
| Self::ZfXiaobei(v)
|
||||
| Self::ZmYunxia(v)
|
||||
| Self::AfHeart(v)
|
||||
| Self::BfEmma(v)
|
||||
| Self::AmPuck(v)
|
||||
| Self::BfAlice(v)
|
||||
| Self::HfAlpha(v)
|
||||
| Self::BfIsabella(v)
|
||||
| Self::AfNova(v)
|
||||
| Self::AmFenrir(v)
|
||||
| Self::EmAlex(v)
|
||||
| Self::ImNicola(v)
|
||||
| Self::PmAlex(v)
|
||||
| Self::AfAlloy(v)
|
||||
| Self::ZmYunxi(v)
|
||||
| Self::AfSarah(v)
|
||||
| Self::JfNezumi(v)
|
||||
| Self::BmDaniel(v)
|
||||
| Self::JfTebukuro(v)
|
||||
| Self::JfAlpha(v)
|
||||
| Self::JmKumo(v)
|
||||
| Self::EmSanta(v)
|
||||
| Self::AmLiam(v)
|
||||
| Self::AmSanta(v)
|
||||
| Self::AmEric(v)
|
||||
| Self::BmFable(v)
|
||||
| Self::AfBella(v)
|
||||
| Self::BmLewis(v)
|
||||
| Self::PfDora(v)
|
||||
| Self::AfNicole(v)
|
||||
| Self::BmGeorge(v)
|
||||
| Self::AmOnyx(v)
|
||||
| Self::HmPsi(v)
|
||||
| Self::HfBeta(v)
|
||||
| Self::HmOmega(v)
|
||||
| Self::ZfXiaoxiao(v)
|
||||
| Self::FfSiwis(v)
|
||||
| Self::EfDora(v)
|
||||
| Self::AfAoede(v)
|
||||
| Self::AmEcho(v)
|
||||
| Self::AmMichael(v)
|
||||
| Self::AfKore(v)
|
||||
| Self::ZfXiaoyi(v)
|
||||
| Self::JfGongitsune(v)
|
||||
| Self::AmAdam(v)
|
||||
| Self::IfSara(v)
|
||||
| Self::AfSky(v)
|
||||
| Self::PmSanta(v)
|
||||
| Self::AfRiver(v)
|
||||
| Self::ZmYunjian(v) => Ok(*v),
|
||||
_ => Err(KokoroError::VoiceVersionInvalid(
|
||||
"Expect version 1.0".to_owned(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn get_speed_v11(&self) -> Result<i32, KokoroError> {
|
||||
match self {
|
||||
Self::Zm029(v)
|
||||
| Self::Zf048(v)
|
||||
| Self::Zf008(v)
|
||||
| Self::Zm014(v)
|
||||
| Self::Zf003(v)
|
||||
| Self::Zf047(v)
|
||||
| Self::Zm080(v)
|
||||
| Self::Zf094(v)
|
||||
| Self::Zf046(v)
|
||||
| Self::Zm054(v)
|
||||
| Self::Zf001(v)
|
||||
| Self::Zm062(v)
|
||||
| Self::BfVale(v)
|
||||
| Self::Zf044(v)
|
||||
| Self::Zf005(v)
|
||||
| Self::Zf028(v)
|
||||
| Self::Zf059(v)
|
||||
| Self::Zm030(v)
|
||||
| Self::Zf074(v)
|
||||
| Self::Zm009(v)
|
||||
| Self::Zf004(v)
|
||||
| Self::Zf021(v)
|
||||
| Self::Zm095(v)
|
||||
| Self::Zm041(v)
|
||||
| Self::Zf087(v)
|
||||
| Self::Zf039(v)
|
||||
| Self::Zm031(v)
|
||||
| Self::Zf007(v)
|
||||
| Self::Zf038(v)
|
||||
| Self::Zf092(v)
|
||||
| Self::Zm056(v)
|
||||
| Self::Zf099(v)
|
||||
| Self::Zm010(v)
|
||||
| Self::Zm069(v)
|
||||
| Self::Zm016(v)
|
||||
| Self::Zm068(v)
|
||||
| Self::Zf083(v)
|
||||
| Self::Zf093(v)
|
||||
| Self::Zf006(v)
|
||||
| Self::Zf026(v)
|
||||
| Self::Zm053(v)
|
||||
| Self::Zm064(v)
|
||||
| Self::AfSol(v)
|
||||
| Self::Zf042(v)
|
||||
| Self::Zf084(v)
|
||||
| Self::Zf073(v)
|
||||
| Self::Zf067(v)
|
||||
| Self::Zm025(v)
|
||||
| Self::Zm020(v)
|
||||
| Self::Zm050(v)
|
||||
| Self::Zf070(v)
|
||||
| Self::Zf002(v)
|
||||
| Self::Zf032(v)
|
||||
| Self::Zm091(v)
|
||||
| Self::Zm066(v)
|
||||
| Self::Zm089(v)
|
||||
| Self::Zm034(v)
|
||||
| Self::Zm100(v)
|
||||
| Self::Zf086(v)
|
||||
| Self::Zf040(v)
|
||||
| Self::Zm011(v)
|
||||
| Self::Zm098(v)
|
||||
| Self::Zm015(v)
|
||||
| Self::Zf051(v)
|
||||
| Self::Zm065(v)
|
||||
| Self::Zf076(v)
|
||||
| Self::Zf036(v)
|
||||
| Self::Zm033(v)
|
||||
| Self::Zf018(v)
|
||||
| Self::Zf017(v)
|
||||
| Self::Zf049(v)
|
||||
| Self::AfMaple(v)
|
||||
| Self::Zm082(v)
|
||||
| Self::Zm057(v)
|
||||
| Self::Zf079(v)
|
||||
| Self::Zf022(v)
|
||||
| Self::Zm063(v)
|
||||
| Self::Zf060(v)
|
||||
| Self::Zf019(v)
|
||||
| Self::Zm097(v)
|
||||
| Self::Zm096(v)
|
||||
| Self::Zf023(v)
|
||||
| Self::Zf027(v)
|
||||
| Self::Zf085(v)
|
||||
| Self::Zf077(v)
|
||||
| Self::Zm035(v)
|
||||
| Self::Zf088(v)
|
||||
| Self::Zf024(v)
|
||||
| Self::Zf072(v)
|
||||
| Self::Zm055(v)
|
||||
| Self::Zm052(v)
|
||||
| Self::Zf071(v)
|
||||
| Self::Zm061(v)
|
||||
| Self::Zf078(v)
|
||||
| Self::Zm013(v)
|
||||
| Self::Zm081(v)
|
||||
| Self::Zm037(v)
|
||||
| Self::Zf090(v)
|
||||
| Self::Zf043(v)
|
||||
| Self::Zm058(v)
|
||||
| Self::Zm012(v)
|
||||
| Self::Zm045(v)
|
||||
| Self::Zf075(v) => Ok(*v),
|
||||
_ => Err(KokoroError::VoiceVersionInvalid(
|
||||
"Expect version 1.1".to_owned(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: 'siprouter',
|
||||
version: '1.13.0',
|
||||
version: '1.27.0',
|
||||
description: 'undefined'
|
||||
}
|
||||
|
||||
@@ -1,349 +0,0 @@
|
||||
/**
|
||||
* TTS announcement module — pre-generates audio announcements using espeak-ng
|
||||
* and caches them as encoded RTP packets for playback during call setup.
|
||||
*
|
||||
* On startup, generates the announcement WAV via espeak-ng (formant-based TTS
|
||||
* with highly accurate pronunciation), encodes each 20ms frame to G.722 (for
|
||||
* SIP) and Opus (for WebRTC) via the Rust transcoder, and caches the packets.
|
||||
*
|
||||
* Falls back to the Rust tts-engine (Kokoro neural TTS) if espeak-ng is not
|
||||
* installed, and disables announcements if neither is available.
|
||||
*/
|
||||
|
||||
import { execSync } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import { encodePcm, isCodecReady } from './opusbridge.ts';
|
||||
|
||||
/** RTP clock increment per 20ms frame for each codec. */
|
||||
function rtpClockIncrement(pt: number): number {
|
||||
if (pt === 111) return 960;
|
||||
if (pt === 9) return 160;
|
||||
return 160;
|
||||
}
|
||||
|
||||
/** Build a fresh RTP header. */
|
||||
function buildRtpHeader(pt: number, seq: number, ts: number, ssrc: number, marker: boolean): Buffer {
|
||||
const hdr = Buffer.alloc(12);
|
||||
hdr[0] = 0x80;
|
||||
hdr[1] = (marker ? 0x80 : 0) | (pt & 0x7f);
|
||||
hdr.writeUInt16BE(seq & 0xffff, 2);
|
||||
hdr.writeUInt32BE(ts >>> 0, 4);
|
||||
hdr.writeUInt32BE(ssrc >>> 0, 8);
|
||||
return hdr;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A pre-encoded announcement ready for RTP playback. */
|
||||
export interface IAnnouncementCache {
|
||||
/** G.722 encoded frames (each is a 20ms frame payload, no RTP header). */
|
||||
g722Frames: Buffer[];
|
||||
/** Opus encoded frames for WebRTC playback. */
|
||||
opusFrames: Buffer[];
|
||||
/** Total duration in milliseconds. */
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// State
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let cachedAnnouncement: IAnnouncementCache | null = null;
|
||||
|
||||
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
|
||||
const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now.";
|
||||
const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav');
|
||||
|
||||
// Kokoro fallback constants.
|
||||
const KOKORO_MODEL = 'kokoro-v1.0.onnx';
|
||||
const KOKORO_VOICES = 'voices.bin';
|
||||
const KOKORO_VOICE = 'af_bella';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Initialization
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Check if espeak-ng is available on the system.
|
||||
*/
|
||||
function isEspeakAvailable(): boolean {
|
||||
try {
|
||||
execSync('which espeak-ng', { stdio: 'pipe' });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate announcement WAV via espeak-ng (primary engine).
|
||||
* Returns true on success.
|
||||
*/
|
||||
function generateViaEspeak(wavPath: string, text: string, log: (msg: string) => void): boolean {
|
||||
log('[tts] generating announcement audio via espeak-ng...');
|
||||
try {
|
||||
execSync(
|
||||
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
|
||||
{ timeout: 10000, stdio: 'pipe' },
|
||||
);
|
||||
log('[tts] espeak-ng WAV generated');
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log(`[tts] espeak-ng failed: ${e.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate announcement WAV via Kokoro TTS (fallback engine).
|
||||
* Returns true on success.
|
||||
*/
|
||||
function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): boolean {
|
||||
const modelPath = path.join(TTS_DIR, KOKORO_MODEL);
|
||||
const voicesPath = path.join(TTS_DIR, KOKORO_VOICES);
|
||||
|
||||
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) {
|
||||
log('[tts] Kokoro model/voices not found — Kokoro fallback unavailable');
|
||||
return false;
|
||||
}
|
||||
|
||||
const root = process.cwd();
|
||||
const ttsBinPaths = [
|
||||
path.join(root, 'dist_rust', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'release', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
|
||||
];
|
||||
const ttsBin = ttsBinPaths.find((p) => fs.existsSync(p));
|
||||
if (!ttsBin) {
|
||||
log('[tts] tts-engine binary not found — Kokoro fallback unavailable');
|
||||
return false;
|
||||
}
|
||||
|
||||
log('[tts] generating announcement audio via Kokoro TTS (fallback)...');
|
||||
try {
|
||||
execSync(
|
||||
`"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${KOKORO_VOICE}" --output "${wavPath}" --text "${text}"`,
|
||||
{ timeout: 120000, stdio: 'pipe' },
|
||||
);
|
||||
log('[tts] Kokoro WAV generated');
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log(`[tts] Kokoro failed: ${e.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a WAV file and detect its sample rate from the fmt chunk.
|
||||
* Returns { pcm, sampleRate } or null on failure.
|
||||
*/
|
||||
function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
|
||||
const wav = fs.readFileSync(wavPath);
|
||||
if (wav.length < 44) return null;
|
||||
if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
|
||||
if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
|
||||
|
||||
let sampleRate = 22050; // default
|
||||
let offset = 12;
|
||||
let pcm: Buffer | null = null;
|
||||
|
||||
while (offset < wav.length - 8) {
|
||||
const chunkId = wav.toString('ascii', offset, offset + 4);
|
||||
const chunkSize = wav.readUInt32LE(offset + 4);
|
||||
if (chunkId === 'fmt ') {
|
||||
sampleRate = wav.readUInt32LE(offset + 12);
|
||||
}
|
||||
if (chunkId === 'data') {
|
||||
pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
|
||||
}
|
||||
offset += 8 + chunkSize;
|
||||
if (offset % 2 !== 0) offset++;
|
||||
}
|
||||
|
||||
if (!pcm) return null;
|
||||
return { pcm, sampleRate };
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-generate the announcement audio and encode to G.722 + Opus frames.
|
||||
* Must be called after the codec bridge is initialized.
|
||||
*
|
||||
* Engine priority: espeak-ng → Kokoro → disabled.
|
||||
*/
|
||||
export async function initAnnouncement(log: (msg: string) => void): Promise<boolean> {
|
||||
fs.mkdirSync(TTS_DIR, { recursive: true });
|
||||
|
||||
try {
|
||||
// Generate WAV if not cached.
|
||||
if (!fs.existsSync(CACHE_WAV)) {
|
||||
let generated = false;
|
||||
|
||||
// Try espeak-ng first.
|
||||
if (isEspeakAvailable()) {
|
||||
generated = generateViaEspeak(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
||||
} else {
|
||||
log('[tts] espeak-ng not installed — trying Kokoro fallback');
|
||||
}
|
||||
|
||||
// Fall back to Kokoro.
|
||||
if (!generated) {
|
||||
generated = generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
||||
}
|
||||
|
||||
if (!generated) {
|
||||
log('[tts] no TTS engine available — announcements disabled');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Read WAV and extract raw PCM + sample rate.
|
||||
const result = readWavWithRate(CACHE_WAV);
|
||||
if (!result) {
|
||||
log('[tts] failed to parse WAV file');
|
||||
return false;
|
||||
}
|
||||
|
||||
const { pcm, sampleRate } = result;
|
||||
|
||||
// Wait for codec bridge to be ready.
|
||||
if (!isCodecReady()) {
|
||||
log('[tts] codec bridge not ready — will retry');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Encode in 20ms chunks. The Rust encoder resamples to each codec's native rate.
|
||||
const FRAME_SAMPLES = Math.floor(sampleRate * 0.02);
|
||||
const FRAME_BYTES = FRAME_SAMPLES * 2; // 16-bit = 2 bytes per sample
|
||||
const totalFrames = Math.floor(pcm.length / FRAME_BYTES);
|
||||
|
||||
const g722Frames: Buffer[] = [];
|
||||
const opusFrames: Buffer[] = [];
|
||||
|
||||
log(`[tts] encoding ${totalFrames} frames (${FRAME_SAMPLES} samples/frame @ ${sampleRate}Hz)...`);
|
||||
for (let i = 0; i < totalFrames; i++) {
|
||||
const framePcm = pcm.subarray(i * FRAME_BYTES, (i + 1) * FRAME_BYTES);
|
||||
const pcmBuf = Buffer.from(framePcm);
|
||||
const [g722, opus] = await Promise.all([
|
||||
encodePcm(pcmBuf, sampleRate, 9), // G.722 for SIP devices
|
||||
encodePcm(pcmBuf, sampleRate, 111), // Opus for WebRTC browsers
|
||||
]);
|
||||
if (g722) g722Frames.push(g722);
|
||||
if (opus) opusFrames.push(opus);
|
||||
if (!g722 && !opus && i < 3) log(`[tts] frame ${i} encode failed`);
|
||||
}
|
||||
|
||||
cachedAnnouncement = {
|
||||
g722Frames,
|
||||
opusFrames,
|
||||
durationMs: totalFrames * 20,
|
||||
};
|
||||
|
||||
log(`[tts] announcement cached: ${g722Frames.length} frames (${(totalFrames * 20 / 1000).toFixed(1)}s)`);
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log(`[tts] init error: ${e.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Playback
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Play the pre-cached announcement to an RTP endpoint.
|
||||
*
|
||||
* @param sendPacket - function to send a raw RTP packet
|
||||
* @param ssrc - SSRC to use in RTP headers
|
||||
* @param onDone - called when the announcement finishes
|
||||
* @returns a cancel function, or null if no announcement is cached
|
||||
*/
|
||||
export function playAnnouncement(
|
||||
sendPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (!cachedAnnouncement || cachedAnnouncement.g722Frames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = cachedAnnouncement.g722Frames;
|
||||
const PT = 9; // G.722
|
||||
let frameIdx = 0;
|
||||
let seq = Math.floor(Math.random() * 0xffff);
|
||||
let rtpTs = Math.floor(Math.random() * 0xffffffff);
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendPacket(pkt);
|
||||
|
||||
seq++;
|
||||
rtpTs += rtpClockIncrement(PT);
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
// Return cancel function.
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Play pre-cached Opus announcement to a WebRTC PeerConnection sender.
|
||||
*
|
||||
* @param sendRtpPacket - function to send a raw RTP packet via sender.sendRtp()
|
||||
* @param ssrc - SSRC to use in RTP headers
|
||||
* @param onDone - called when announcement finishes
|
||||
* @returns cancel function, or null if no announcement cached
|
||||
*/
|
||||
export function playAnnouncementToWebRtc(
|
||||
sendRtpPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
counters: { seq: number; ts: number },
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (!cachedAnnouncement || cachedAnnouncement.opusFrames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = cachedAnnouncement.opusFrames;
|
||||
const PT = 111; // Opus
|
||||
let frameIdx = 0;
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendRtpPacket(pkt);
|
||||
|
||||
counters.seq++;
|
||||
counters.ts += 960; // Opus at 48kHz: 960 samples per 20ms
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
|
||||
/** Check if an announcement is cached and ready. */
|
||||
export function isAnnouncementReady(): boolean {
|
||||
return cachedAnnouncement !== null && cachedAnnouncement.g722Frames.length > 0;
|
||||
}
|
||||
|
||||
@@ -1,421 +0,0 @@
|
||||
/**
|
||||
* PromptCache — manages multiple named audio prompts for IVR and voicemail.
|
||||
*
|
||||
* Each prompt is pre-encoded as both G.722 frames (for SIP legs) and Opus
|
||||
* frames (for WebRTC legs), ready for 20ms RTP playback.
|
||||
*
|
||||
* Supports three sources:
|
||||
* 1. TTS generation via espeak-ng (primary) or Kokoro (fallback)
|
||||
* 2. Loading from a pre-existing WAV file
|
||||
* 3. Programmatic tone generation (beep, etc.)
|
||||
*
|
||||
* The existing announcement.ts system continues to work independently;
|
||||
* this module provides generalized prompt management for IVR/voicemail.
|
||||
*/
|
||||
|
||||
import { execSync } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { Buffer } from 'node:buffer';
|
||||
import { encodePcm, isCodecReady } from '../opusbridge.ts';
|
||||
|
||||
/** RTP clock increment per 20ms frame for each codec. */
|
||||
function rtpClockIncrement(pt: number): number {
|
||||
if (pt === 111) return 960;
|
||||
if (pt === 9) return 160;
|
||||
return 160;
|
||||
}
|
||||
|
||||
/** Build a fresh RTP header. */
|
||||
function buildRtpHeader(pt: number, seq: number, ts: number, ssrc: number, marker: boolean): Buffer {
|
||||
const hdr = Buffer.alloc(12);
|
||||
hdr[0] = 0x80;
|
||||
hdr[1] = (marker ? 0x80 : 0) | (pt & 0x7f);
|
||||
hdr.writeUInt16BE(seq & 0xffff, 2);
|
||||
hdr.writeUInt32BE(ts >>> 0, 4);
|
||||
hdr.writeUInt32BE(ssrc >>> 0, 8);
|
||||
return hdr;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A pre-encoded prompt ready for RTP playback. */
|
||||
export interface ICachedPrompt {
|
||||
/** Unique prompt identifier. */
|
||||
id: string;
|
||||
/** G.722 encoded frames (20ms each, no RTP header). */
|
||||
g722Frames: Buffer[];
|
||||
/** Opus encoded frames (20ms each, no RTP header). */
|
||||
opusFrames: Buffer[];
|
||||
/** Total duration in milliseconds. */
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TTS helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
|
||||
|
||||
/** Check if espeak-ng is available. */
|
||||
function isEspeakAvailable(): boolean {
|
||||
try {
|
||||
execSync('which espeak-ng', { stdio: 'pipe' });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate WAV via espeak-ng. */
|
||||
function generateViaEspeak(wavPath: string, text: string): boolean {
|
||||
try {
|
||||
execSync(
|
||||
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
|
||||
{ timeout: 10000, stdio: 'pipe' },
|
||||
);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate WAV via Kokoro TTS. */
|
||||
function generateViaKokoro(wavPath: string, text: string, voice: string): boolean {
|
||||
const modelPath = path.join(TTS_DIR, 'kokoro-v1.0.onnx');
|
||||
const voicesPath = path.join(TTS_DIR, 'voices.bin');
|
||||
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) return false;
|
||||
|
||||
const root = process.cwd();
|
||||
const ttsBin = [
|
||||
path.join(root, 'dist_rust', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'release', 'tts-engine'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
|
||||
].find((p) => fs.existsSync(p));
|
||||
if (!ttsBin) return false;
|
||||
|
||||
try {
|
||||
execSync(
|
||||
`"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${voice}" --output "${wavPath}" --text "${text}"`,
|
||||
{ timeout: 120000, stdio: 'pipe' },
|
||||
);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Read a WAV file and return raw PCM + sample rate. */
|
||||
function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
|
||||
const wav = fs.readFileSync(wavPath);
|
||||
if (wav.length < 44) return null;
|
||||
if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
|
||||
if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
|
||||
|
||||
let sampleRate = 22050;
|
||||
let pcm: Buffer | null = null;
|
||||
let offset = 12;
|
||||
|
||||
while (offset < wav.length - 8) {
|
||||
const chunkId = wav.toString('ascii', offset, offset + 4);
|
||||
const chunkSize = wav.readUInt32LE(offset + 4);
|
||||
if (chunkId === 'fmt ') {
|
||||
sampleRate = wav.readUInt32LE(offset + 12);
|
||||
}
|
||||
if (chunkId === 'data') {
|
||||
pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
|
||||
}
|
||||
offset += 8 + chunkSize;
|
||||
if (offset % 2 !== 0) offset++;
|
||||
}
|
||||
|
||||
return pcm ? { pcm, sampleRate } : null;
|
||||
}
|
||||
|
||||
/** Encode raw PCM frames to G.722 + Opus. */
|
||||
async function encodePcmFrames(
|
||||
pcm: Buffer,
|
||||
sampleRate: number,
|
||||
log: (msg: string) => void,
|
||||
): Promise<{ g722Frames: Buffer[]; opusFrames: Buffer[] } | null> {
|
||||
if (!isCodecReady()) return null;
|
||||
|
||||
const frameSamples = Math.floor(sampleRate * 0.02); // 20ms
|
||||
const frameBytes = frameSamples * 2; // 16-bit
|
||||
const totalFrames = Math.floor(pcm.length / frameBytes);
|
||||
|
||||
const g722Frames: Buffer[] = [];
|
||||
const opusFrames: Buffer[] = [];
|
||||
|
||||
for (let i = 0; i < totalFrames; i++) {
|
||||
const framePcm = Buffer.from(pcm.subarray(i * frameBytes, (i + 1) * frameBytes));
|
||||
const [g722, opus] = await Promise.all([
|
||||
encodePcm(framePcm, sampleRate, 9), // G.722
|
||||
encodePcm(framePcm, sampleRate, 111), // Opus
|
||||
]);
|
||||
if (g722) g722Frames.push(g722);
|
||||
if (opus) opusFrames.push(opus);
|
||||
}
|
||||
|
||||
return { g722Frames, opusFrames };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PromptCache
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class PromptCache {
|
||||
private prompts = new Map<string, ICachedPrompt>();
|
||||
private log: (msg: string) => void;
|
||||
private espeakAvailable: boolean | null = null;
|
||||
|
||||
constructor(log: (msg: string) => void) {
|
||||
this.log = log;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Public API
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/** Get a cached prompt by ID. */
|
||||
get(id: string): ICachedPrompt | null {
|
||||
return this.prompts.get(id) ?? null;
|
||||
}
|
||||
|
||||
/** Check if a prompt is cached. */
|
||||
has(id: string): boolean {
|
||||
return this.prompts.has(id);
|
||||
}
|
||||
|
||||
/** List all cached prompt IDs. */
|
||||
listIds(): string[] {
|
||||
return [...this.prompts.keys()];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a TTS prompt and cache it.
|
||||
* Uses espeak-ng (primary) or Kokoro (fallback).
|
||||
*/
|
||||
async generatePrompt(id: string, text: string, voice = 'af_bella'): Promise<ICachedPrompt | null> {
|
||||
fs.mkdirSync(TTS_DIR, { recursive: true });
|
||||
const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`);
|
||||
|
||||
// Check espeak availability once.
|
||||
if (this.espeakAvailable === null) {
|
||||
this.espeakAvailable = isEspeakAvailable();
|
||||
}
|
||||
|
||||
// Generate WAV.
|
||||
let generated = false;
|
||||
if (!fs.existsSync(wavPath)) {
|
||||
if (this.espeakAvailable) {
|
||||
generated = generateViaEspeak(wavPath, text);
|
||||
}
|
||||
if (!generated) {
|
||||
generated = generateViaKokoro(wavPath, text, voice);
|
||||
}
|
||||
if (!generated) {
|
||||
this.log(`[prompt-cache] failed to generate TTS for "${id}"`);
|
||||
return null;
|
||||
}
|
||||
this.log(`[prompt-cache] generated WAV for "${id}"`);
|
||||
}
|
||||
|
||||
return this.loadWavPrompt(id, wavPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a WAV file as a prompt and cache it.
|
||||
*/
|
||||
async loadWavPrompt(id: string, wavPath: string): Promise<ICachedPrompt | null> {
|
||||
if (!fs.existsSync(wavPath)) {
|
||||
this.log(`[prompt-cache] WAV not found: ${wavPath}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = readWavWithRate(wavPath);
|
||||
if (!result) {
|
||||
this.log(`[prompt-cache] failed to parse WAV: ${wavPath}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const encoded = await encodePcmFrames(result.pcm, result.sampleRate, this.log);
|
||||
if (!encoded) {
|
||||
this.log(`[prompt-cache] encoding failed for "${id}" (codec bridge not ready?)`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const durationMs = encoded.g722Frames.length * 20;
|
||||
const prompt: ICachedPrompt = {
|
||||
id,
|
||||
g722Frames: encoded.g722Frames,
|
||||
opusFrames: encoded.opusFrames,
|
||||
durationMs,
|
||||
};
|
||||
|
||||
this.prompts.set(id, prompt);
|
||||
this.log(`[prompt-cache] cached "${id}": ${encoded.g722Frames.length} frames (${(durationMs / 1000).toFixed(1)}s)`);
|
||||
return prompt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a beep tone prompt (sine wave).
|
||||
* @param id - prompt ID
|
||||
* @param freqHz - tone frequency (default 1000 Hz)
|
||||
* @param durationMs - tone duration (default 500ms)
|
||||
* @param amplitude - 16-bit amplitude (default 8000)
|
||||
*/
|
||||
async generateBeep(
|
||||
id: string,
|
||||
freqHz = 1000,
|
||||
durationMs = 500,
|
||||
amplitude = 8000,
|
||||
): Promise<ICachedPrompt | null> {
|
||||
// Generate at 16kHz for decent quality.
|
||||
const sampleRate = 16000;
|
||||
const totalSamples = Math.floor((sampleRate * durationMs) / 1000);
|
||||
const pcm = Buffer.alloc(totalSamples * 2);
|
||||
|
||||
for (let i = 0; i < totalSamples; i++) {
|
||||
const t = i / sampleRate;
|
||||
// Apply a short fade-in/fade-out to avoid click artifacts.
|
||||
const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade
|
||||
let envelope = 1.0;
|
||||
if (i < fadeLen) envelope = i / fadeLen;
|
||||
else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen;
|
||||
|
||||
const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope);
|
||||
pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
|
||||
}
|
||||
|
||||
const encoded = await encodePcmFrames(pcm, sampleRate, this.log);
|
||||
if (!encoded) {
|
||||
this.log(`[prompt-cache] beep encoding failed for "${id}"`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const actualDuration = encoded.g722Frames.length * 20;
|
||||
const prompt: ICachedPrompt = {
|
||||
id,
|
||||
g722Frames: encoded.g722Frames,
|
||||
opusFrames: encoded.opusFrames,
|
||||
durationMs: actualDuration,
|
||||
};
|
||||
|
||||
this.prompts.set(id, prompt);
|
||||
this.log(`[prompt-cache] beep "${id}" cached: ${actualDuration}ms @ ${freqHz}Hz`);
|
||||
return prompt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a prompt from the cache.
|
||||
*/
|
||||
remove(id: string): void {
|
||||
this.prompts.delete(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cached prompts.
|
||||
*/
|
||||
clear(): void {
|
||||
this.prompts.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Standalone playback helpers (for use by SystemLeg)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Play a cached prompt's G.722 frames as RTP packets at 20ms intervals.
|
||||
*
|
||||
* @param prompt - the cached prompt to play
|
||||
* @param sendPacket - function to send a raw RTP packet (12-byte header + payload)
|
||||
* @param ssrc - SSRC for RTP headers
|
||||
* @param onDone - called when playback finishes
|
||||
* @returns cancel function, or null if prompt has no G.722 frames
|
||||
*/
|
||||
export function playPromptG722(
|
||||
prompt: ICachedPrompt,
|
||||
sendPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (prompt.g722Frames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = prompt.g722Frames;
|
||||
const PT = 9;
|
||||
let frameIdx = 0;
|
||||
let seq = Math.floor(Math.random() * 0xffff);
|
||||
let rtpTs = Math.floor(Math.random() * 0xffffffff);
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendPacket(pkt);
|
||||
|
||||
seq++;
|
||||
rtpTs += rtpClockIncrement(PT);
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Play a cached prompt's Opus frames as RTP packets at 20ms intervals.
|
||||
*
|
||||
* @param prompt - the cached prompt to play
|
||||
* @param sendPacket - function to send a raw RTP packet
|
||||
* @param ssrc - SSRC for RTP headers
|
||||
* @param counters - shared seq/ts counters (mutated in place for seamless transitions)
|
||||
* @param onDone - called when playback finishes
|
||||
* @returns cancel function, or null if prompt has no Opus frames
|
||||
*/
|
||||
export function playPromptOpus(
|
||||
prompt: ICachedPrompt,
|
||||
sendPacket: (pkt: Buffer) => void,
|
||||
ssrc: number,
|
||||
counters: { seq: number; ts: number },
|
||||
onDone?: () => void,
|
||||
): (() => void) | null {
|
||||
if (prompt.opusFrames.length === 0) {
|
||||
onDone?.();
|
||||
return null;
|
||||
}
|
||||
|
||||
const frames = prompt.opusFrames;
|
||||
const PT = 111;
|
||||
let frameIdx = 0;
|
||||
|
||||
const timer = setInterval(() => {
|
||||
if (frameIdx >= frames.length) {
|
||||
clearInterval(timer);
|
||||
onDone?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = frames[frameIdx];
|
||||
const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
|
||||
const pkt = Buffer.concat([hdr, payload]);
|
||||
sendPacket(pkt);
|
||||
|
||||
counters.seq++;
|
||||
counters.ts += 960; // Opus 48kHz: 960 samples per 20ms
|
||||
frameIdx++;
|
||||
}, 20);
|
||||
|
||||
return () => clearInterval(timer);
|
||||
}
|
||||
+218
-89
@@ -1,13 +1,12 @@
|
||||
/**
|
||||
* Application configuration — loaded from .nogit/config.json.
|
||||
* Application configuration models and normalization helpers.
|
||||
*
|
||||
* All network addresses, credentials, provider settings, device definitions,
|
||||
* and routing rules come from this single config file. No hardcoded values
|
||||
* in source.
|
||||
* and routing rules are persisted through SmartData.
|
||||
*/
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import type { IFaxBoxConfig } from './faxbox.ts';
|
||||
import type { IVoiceboxConfig } from './voicebox.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shared types (previously in ts/sip/types.ts, now inlined)
|
||||
@@ -47,6 +46,24 @@ export interface IDeviceConfig {
|
||||
extension: string;
|
||||
}
|
||||
|
||||
export type TIncomingNumberMode = 'single' | 'range' | 'regex';
|
||||
|
||||
export interface IIncomingNumberConfig {
|
||||
id: string;
|
||||
label: string;
|
||||
providerId?: string;
|
||||
mode: TIncomingNumberMode;
|
||||
countryCode?: string;
|
||||
areaCode?: string;
|
||||
localNumber?: string;
|
||||
rangeEnd?: string;
|
||||
pattern?: string;
|
||||
|
||||
// Legacy persisted fields kept for migration compatibility.
|
||||
number?: string;
|
||||
rangeStart?: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Match/Action routing model
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -61,8 +78,11 @@ export interface ISipRouteMatch {
|
||||
direction: 'inbound' | 'outbound';
|
||||
|
||||
/**
|
||||
* Match the dialed/called number (To/Request-URI for inbound DID, dialed digits for outbound).
|
||||
* Supports: exact string, prefix with trailing '*' (e.g. "+4930*"), or regex ("/^\\+49/").
|
||||
* Match the normalized called number.
|
||||
*
|
||||
* Inbound: matches the provider-delivered DID / Request-URI user part.
|
||||
* Outbound: matches the normalized dialed digits.
|
||||
* Supports: exact string, numeric range `start..end`, prefix with trailing '*' (e.g. "+4930*"), or regex ("/^\\+49/").
|
||||
*/
|
||||
numberPattern?: string;
|
||||
|
||||
@@ -88,13 +108,16 @@ export interface ISipRouteAction {
|
||||
|
||||
// --- Inbound actions (IVR / voicemail) ---
|
||||
|
||||
/** Route directly to a voicemail box (skip ringing devices). */
|
||||
/** Voicemail fallback for matched inbound routes. */
|
||||
voicemailBox?: string;
|
||||
|
||||
/** Fax inbox target for matched inbound routes. */
|
||||
faxBox?: string;
|
||||
|
||||
/** Route to an IVR menu by menu ID (skip ringing devices). */
|
||||
ivrMenuId?: string;
|
||||
|
||||
/** Override no-answer timeout (seconds) before routing to voicemail. */
|
||||
/** Reserved for future no-answer handling. */
|
||||
noAnswerTimeout?: number;
|
||||
|
||||
// --- Outbound actions (provider selection) ---
|
||||
@@ -160,24 +183,14 @@ export interface IContact {
|
||||
// Voicebox configuration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface IVoiceboxConfig {
|
||||
/** Unique ID — typically matches device ID or extension. */
|
||||
id: string;
|
||||
/** Whether this voicebox is active. */
|
||||
enabled: boolean;
|
||||
/** Custom TTS greeting text. */
|
||||
greetingText?: string;
|
||||
/** TTS voice ID (default 'af_bella'). */
|
||||
greetingVoice?: string;
|
||||
/** Path to uploaded WAV greeting (overrides TTS). */
|
||||
greetingWavPath?: string;
|
||||
/** Seconds to wait before routing to voicemail (default 25). */
|
||||
noAnswerTimeoutSec?: number;
|
||||
/** Maximum recording duration in seconds (default 120). */
|
||||
maxRecordingSec?: number;
|
||||
/** Maximum stored messages per box (default 50). */
|
||||
maxMessages?: number;
|
||||
}
|
||||
// Canonical definition lives in voicebox.ts (imported at the top of this
|
||||
// file) — re-exported here so consumers can import everything from a
|
||||
// single config module without pulling in the voicebox implementation.
|
||||
// This used to be a duplicated interface and caused
|
||||
// "number | undefined is not assignable to number" type errors when
|
||||
// passing config.voiceboxes into VoiceboxManager.init().
|
||||
export type { IVoiceboxConfig };
|
||||
export type { IFaxBoxConfig };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// IVR configuration
|
||||
@@ -241,90 +254,206 @@ export interface IAppConfig {
|
||||
proxy: IProxyConfig;
|
||||
providers: IProviderConfig[];
|
||||
devices: IDeviceConfig[];
|
||||
incomingNumbers?: IIncomingNumberConfig[];
|
||||
routing: IRoutingConfig;
|
||||
contacts: IContact[];
|
||||
faxboxes?: IFaxBoxConfig[];
|
||||
voiceboxes?: IVoiceboxConfig[];
|
||||
ivr?: IIvrConfig;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Loader
|
||||
// Defaults and normalization
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const CONFIG_PATH = path.join(process.cwd(), '.nogit', 'config.json');
|
||||
function requiredInitialEnv(keyArg: string): string {
|
||||
const value = process.env[keyArg];
|
||||
if (!value) {
|
||||
throw new Error(`Missing required initial config environment variable: ${keyArg}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function loadConfig(): IAppConfig {
|
||||
let raw: string;
|
||||
function numberFromEnv(keyArg: string, fallbackArg: number): number {
|
||||
const value = process.env[keyArg];
|
||||
if (!value) return fallbackArg;
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) ? parsed : fallbackArg;
|
||||
}
|
||||
|
||||
export function normalizeConfig(cfg: IAppConfig): IAppConfig {
|
||||
try {
|
||||
raw = fs.readFileSync(CONFIG_PATH, 'utf8');
|
||||
} catch {
|
||||
throw new Error(`config not found at ${CONFIG_PATH} — create .nogit/config.json`);
|
||||
}
|
||||
|
||||
const cfg = JSON.parse(raw) as IAppConfig;
|
||||
|
||||
// Basic validation.
|
||||
if (!cfg.proxy) throw new Error('config: missing "proxy" section');
|
||||
if (!cfg.proxy.lanIp) throw new Error('config: missing proxy.lanIp');
|
||||
if (!cfg.proxy.lanPort) throw new Error('config: missing proxy.lanPort');
|
||||
if (!cfg.proxy.rtpPortRange?.min || !cfg.proxy.rtpPortRange?.max) {
|
||||
throw new Error('config: missing proxy.rtpPortRange.min/max');
|
||||
}
|
||||
cfg.proxy.webUiPort ??= 3060;
|
||||
cfg.proxy.publicIpSeed ??= null;
|
||||
|
||||
cfg.providers ??= [];
|
||||
for (const p of cfg.providers) {
|
||||
if (!p.id || !p.domain || !p.outboundProxy || !p.username || !p.password) {
|
||||
throw new Error(`config: provider "${p.id || '?'}" missing required fields`);
|
||||
// Basic validation.
|
||||
if (!cfg.proxy) throw new Error('config: missing "proxy" section');
|
||||
if (!cfg.proxy.lanIp) throw new Error('config: missing proxy.lanIp');
|
||||
if (!cfg.proxy.lanPort) throw new Error('config: missing proxy.lanPort');
|
||||
if (!cfg.proxy.rtpPortRange?.min || !cfg.proxy.rtpPortRange?.max) {
|
||||
throw new Error('config: missing proxy.rtpPortRange.min/max');
|
||||
}
|
||||
p.displayName ??= p.id;
|
||||
p.registerIntervalSec ??= 300;
|
||||
p.codecs ??= [9, 0, 8, 101];
|
||||
p.quirks ??= { earlyMediaSilence: false };
|
||||
}
|
||||
cfg.proxy.webUiPort ??= 3060;
|
||||
cfg.proxy.publicIpSeed ??= null;
|
||||
|
||||
if (!Array.isArray(cfg.devices) || !cfg.devices.length) {
|
||||
throw new Error('config: need at least one device');
|
||||
}
|
||||
for (const d of cfg.devices) {
|
||||
if (!d.id || !d.expectedAddress) {
|
||||
throw new Error(`config: device "${d.id || '?'}" missing required fields`);
|
||||
cfg.providers ??= [];
|
||||
for (const p of cfg.providers) {
|
||||
if (!p.id || !p.domain || !p.outboundProxy || !p.username || !p.password) {
|
||||
throw new Error(`config: provider "${p.id || '?'}" missing required fields`);
|
||||
}
|
||||
p.displayName ??= p.id;
|
||||
p.registerIntervalSec ??= 300;
|
||||
p.codecs ??= [9, 0, 8, 101];
|
||||
p.quirks ??= { earlyMediaSilence: false };
|
||||
}
|
||||
d.displayName ??= d.id;
|
||||
d.extension ??= '100';
|
||||
|
||||
if (!Array.isArray(cfg.devices) || !cfg.devices.length) {
|
||||
throw new Error('config: need at least one device');
|
||||
}
|
||||
for (const d of cfg.devices) {
|
||||
if (!d.id || !d.expectedAddress) {
|
||||
throw new Error(`config: device "${d.id || '?'}" missing required fields`);
|
||||
}
|
||||
d.displayName ??= d.id;
|
||||
d.extension ??= '100';
|
||||
}
|
||||
|
||||
cfg.incomingNumbers ??= [];
|
||||
for (const incoming of cfg.incomingNumbers) {
|
||||
if (!incoming.id) incoming.id = `incoming-${Date.now()}`;
|
||||
incoming.label ??= incoming.id;
|
||||
incoming.mode ??= incoming.pattern ? 'regex' : incoming.rangeStart || incoming.rangeEnd ? 'range' : 'single';
|
||||
incoming.countryCode ??= incoming.mode === 'regex' ? undefined : '+49';
|
||||
}
|
||||
|
||||
cfg.routing ??= { routes: [] };
|
||||
cfg.routing.routes ??= [];
|
||||
|
||||
cfg.contacts ??= [];
|
||||
for (const c of cfg.contacts) {
|
||||
c.starred ??= false;
|
||||
}
|
||||
|
||||
cfg.faxboxes ??= [];
|
||||
for (const fb of cfg.faxboxes) {
|
||||
fb.enabled ??= true;
|
||||
fb.maxMessages ??= 50;
|
||||
}
|
||||
|
||||
cfg.voiceboxes ??= [];
|
||||
for (const vb of cfg.voiceboxes) {
|
||||
vb.enabled ??= true;
|
||||
vb.noAnswerTimeoutSec ??= 25;
|
||||
vb.maxRecordingSec ??= 120;
|
||||
vb.maxMessages ??= 50;
|
||||
vb.greetingVoice ??= 'af_bella';
|
||||
}
|
||||
|
||||
if (cfg.ivr) {
|
||||
cfg.ivr.enabled ??= false;
|
||||
cfg.ivr.menus ??= [];
|
||||
for (const menu of cfg.ivr.menus) {
|
||||
menu.timeoutSec ??= 5;
|
||||
menu.maxRetries ??= 3;
|
||||
menu.entries ??= [];
|
||||
}
|
||||
}
|
||||
|
||||
return cfg;
|
||||
} catch (error) {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
cfg.routing ??= { routes: [] };
|
||||
cfg.routing.routes ??= [];
|
||||
export function createInitialConfigFromEnv(): IAppConfig {
|
||||
return normalizeConfig({
|
||||
proxy: {
|
||||
lanIp: requiredInitialEnv('SIPROUTER_LAN_IP'),
|
||||
lanPort: numberFromEnv('SIPROUTER_LAN_PORT', 5070),
|
||||
publicIpSeed: process.env.SIPROUTER_PUBLIC_IP || null,
|
||||
rtpPortRange: {
|
||||
min: numberFromEnv('SIPROUTER_RTP_PORT_MIN', 20000),
|
||||
max: numberFromEnv('SIPROUTER_RTP_PORT_MAX', 20200),
|
||||
},
|
||||
webUiPort: numberFromEnv('SIPROUTER_WEB_UI_PORT', 3060),
|
||||
},
|
||||
providers: [],
|
||||
devices: [
|
||||
{
|
||||
id: process.env.SIPROUTER_INITIAL_DEVICE_ID || 'desk-phone',
|
||||
displayName: process.env.SIPROUTER_INITIAL_DEVICE_DISPLAY_NAME || 'Desk Phone',
|
||||
expectedAddress: requiredInitialEnv('SIPROUTER_INITIAL_DEVICE_ADDRESS'),
|
||||
extension: process.env.SIPROUTER_INITIAL_DEVICE_EXTENSION || '100',
|
||||
},
|
||||
],
|
||||
incomingNumbers: [],
|
||||
routing: { routes: [] },
|
||||
contacts: [],
|
||||
faxboxes: [],
|
||||
voiceboxes: [],
|
||||
ivr: {
|
||||
enabled: false,
|
||||
entryMenuId: 'main-menu',
|
||||
menus: [],
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
cfg.contacts ??= [];
|
||||
for (const c of cfg.contacts) {
|
||||
c.starred ??= false;
|
||||
}
|
||||
export function maskConfig(configArg: IAppConfig): IAppConfig {
|
||||
return {
|
||||
...configArg,
|
||||
providers: configArg.providers?.map((providerArg) => ({
|
||||
...providerArg,
|
||||
password: providerArg.password ? '••••••' : providerArg.password,
|
||||
})) || [],
|
||||
};
|
||||
}
|
||||
|
||||
// Voicebox defaults.
|
||||
cfg.voiceboxes ??= [];
|
||||
for (const vb of cfg.voiceboxes) {
|
||||
vb.enabled ??= true;
|
||||
vb.noAnswerTimeoutSec ??= 25;
|
||||
vb.maxRecordingSec ??= 120;
|
||||
vb.maxMessages ??= 50;
|
||||
vb.greetingVoice ??= 'af_bella';
|
||||
}
|
||||
export function applyConfigUpdates(configArg: IAppConfig, updatesArg: any): IAppConfig {
|
||||
const cfg = JSON.parse(JSON.stringify(configArg)) as IAppConfig;
|
||||
|
||||
// IVR defaults.
|
||||
if (cfg.ivr) {
|
||||
cfg.ivr.enabled ??= false;
|
||||
cfg.ivr.menus ??= [];
|
||||
for (const menu of cfg.ivr.menus) {
|
||||
menu.timeoutSec ??= 5;
|
||||
menu.maxRetries ??= 3;
|
||||
menu.entries ??= [];
|
||||
if (updatesArg.providers) {
|
||||
for (const up of updatesArg.providers) {
|
||||
const existing = cfg.providers?.find((p: any) => p.id === up.id);
|
||||
if (existing) {
|
||||
if (up.displayName !== undefined) existing.displayName = up.displayName;
|
||||
if (up.password && up.password !== '••••••') existing.password = up.password;
|
||||
if (up.domain !== undefined) existing.domain = up.domain;
|
||||
if (up.outboundProxy !== undefined) existing.outboundProxy = up.outboundProxy;
|
||||
if (up.username !== undefined) existing.username = up.username;
|
||||
if (up.registerIntervalSec !== undefined) existing.registerIntervalSec = up.registerIntervalSec;
|
||||
if (up.codecs !== undefined) existing.codecs = up.codecs;
|
||||
if (up.quirks !== undefined) existing.quirks = up.quirks;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return cfg;
|
||||
if (updatesArg.addProvider) {
|
||||
cfg.providers ??= [];
|
||||
cfg.providers.push(updatesArg.addProvider);
|
||||
}
|
||||
|
||||
if (updatesArg.removeProvider) {
|
||||
cfg.providers = (cfg.providers || []).filter((p: any) => p.id !== updatesArg.removeProvider);
|
||||
if (cfg.routing?.routes) {
|
||||
cfg.routing.routes = cfg.routing.routes.filter((r: any) =>
|
||||
r.match?.sourceProvider !== updatesArg.removeProvider &&
|
||||
r.action?.provider !== updatesArg.removeProvider
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (updatesArg.devices) {
|
||||
for (const ud of updatesArg.devices) {
|
||||
const existing = cfg.devices?.find((d: any) => d.id === ud.id);
|
||||
if (existing && ud.displayName !== undefined) existing.displayName = ud.displayName;
|
||||
}
|
||||
}
|
||||
if (updatesArg.incomingNumbers !== undefined) cfg.incomingNumbers = updatesArg.incomingNumbers;
|
||||
if (updatesArg.routing?.routes) cfg.routing.routes = updatesArg.routing.routes;
|
||||
if (updatesArg.contacts !== undefined) cfg.contacts = updatesArg.contacts;
|
||||
if (updatesArg.faxboxes !== undefined) cfg.faxboxes = updatesArg.faxboxes;
|
||||
if (updatesArg.voiceboxes !== undefined) cfg.voiceboxes = updatesArg.voiceboxes;
|
||||
if (updatesArg.ivr !== undefined) cfg.ivr = updatesArg.ivr;
|
||||
|
||||
return normalizeConfig(cfg);
|
||||
}
|
||||
|
||||
// Route resolution, pattern matching, and provider/device lookup
|
||||
|
||||
+197
@@ -0,0 +1,197 @@
|
||||
import fs from 'node:fs';
|
||||
import * as fsPromises from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
import type { SiprouterStorage } from './storage.ts';
|
||||
|
||||
export interface IFaxBoxConfig {
|
||||
id: string;
|
||||
enabled: boolean;
|
||||
maxMessages?: number;
|
||||
}
|
||||
|
||||
export interface IFaxMessage {
|
||||
id: string;
|
||||
boxId: string;
|
||||
callerNumber?: string;
|
||||
timestamp: number;
|
||||
fileName: string;
|
||||
objectKey?: string;
|
||||
completionCode?: number | null;
|
||||
completionLabel?: string | null;
|
||||
pageCount?: number;
|
||||
bitRate?: number;
|
||||
}
|
||||
|
||||
export class FaxBoxManager {
|
||||
private boxes = new Map<string, IFaxBoxConfig>();
|
||||
private messagesByBox = new Map<string, IFaxMessage[]>();
|
||||
private readonly basePath: string;
|
||||
private readonly log: (msg: string) => void;
|
||||
private readonly storage: SiprouterStorage;
|
||||
|
||||
constructor(log: (msg: string) => void, storageArg: SiprouterStorage) {
|
||||
this.basePath = path.join(process.cwd(), '.nogit', 'fax', 'inboxes');
|
||||
this.log = log;
|
||||
this.storage = storageArg;
|
||||
}
|
||||
|
||||
async init(faxBoxConfigs: IFaxBoxConfig[]): Promise<void> {
|
||||
this.boxes.clear();
|
||||
|
||||
for (const cfg of faxBoxConfigs) {
|
||||
cfg.enabled ??= true;
|
||||
cfg.maxMessages ??= 50;
|
||||
this.boxes.set(cfg.id, cfg);
|
||||
this.messagesByBox.set(cfg.id, await this.loadMessages(cfg.id));
|
||||
}
|
||||
|
||||
await fsPromises.mkdir(this.basePath, { recursive: true });
|
||||
this.log(`[faxbox] initialized ${this.boxes.size} fax box(es)`);
|
||||
}
|
||||
|
||||
getBox(boxId: string): IFaxBoxConfig | null {
|
||||
return this.boxes.get(boxId) ?? null;
|
||||
}
|
||||
|
||||
getBoxDir(boxId: string): string {
|
||||
return path.join(this.basePath, boxId);
|
||||
}
|
||||
|
||||
async prepareOutboundFaxFile(filePathArg: string): Promise<string> {
|
||||
const localPath = path.isAbsolute(filePathArg) ? filePathArg : path.join(process.cwd(), filePathArg);
|
||||
await fsPromises.access(localPath);
|
||||
return localPath;
|
||||
}
|
||||
|
||||
async addMessage(
|
||||
boxId: string,
|
||||
info: {
|
||||
callerNumber?: string;
|
||||
fileName: string;
|
||||
completionCode?: number | null;
|
||||
completionLabel?: string | null;
|
||||
pageCount?: number;
|
||||
bitRate?: number;
|
||||
},
|
||||
): Promise<void> {
|
||||
const id = crypto.randomUUID();
|
||||
const localPath = path.isAbsolute(info.fileName) ? info.fileName : path.join(process.cwd(), info.fileName);
|
||||
const objectKey = await this.storage.putFileObject(`fax/inboxes/${boxId}/${id}.tif`, localPath);
|
||||
|
||||
const msg: IFaxMessage = {
|
||||
id,
|
||||
boxId,
|
||||
callerNumber: info.callerNumber,
|
||||
timestamp: Date.now(),
|
||||
fileName: path.basename(localPath),
|
||||
objectKey,
|
||||
completionCode: info.completionCode ?? null,
|
||||
completionLabel: info.completionLabel ?? null,
|
||||
pageCount: info.pageCount,
|
||||
bitRate: info.bitRate,
|
||||
};
|
||||
|
||||
const messages = this.getMessages(boxId);
|
||||
messages.unshift(msg);
|
||||
await this.enforceLimit(boxId, messages);
|
||||
await this.writeMessages(boxId, messages);
|
||||
await fsPromises.rm(localPath, { force: true }).catch(() => {});
|
||||
this.log(`[faxbox] saved fax ${msg.id} in box "${msg.boxId}" (${msg.fileName})`);
|
||||
}
|
||||
|
||||
getMessages(boxId: string): IFaxMessage[] {
|
||||
return [...(this.messagesByBox.get(boxId) || [])];
|
||||
}
|
||||
|
||||
getMessage(boxId: string, messageId: string): IFaxMessage | null {
|
||||
const messages = this.messagesByBox.get(boxId) || [];
|
||||
return messages.find((m) => m.id === messageId) ?? null;
|
||||
}
|
||||
|
||||
async getMessageFilePath(boxId: string, messageId: string): Promise<string | null> {
|
||||
const msg = this.getMessage(boxId, messageId);
|
||||
if (!msg) return null;
|
||||
if (msg.objectKey) {
|
||||
return await this.storage.getObjectAsCachedFile(msg.objectKey, msg.fileName);
|
||||
}
|
||||
const filePath = path.join(this.getBoxDir(boxId), msg.fileName);
|
||||
return fs.existsSync(filePath) ? filePath : null;
|
||||
}
|
||||
|
||||
async deleteMessage(boxId: string, messageId: string): Promise<boolean> {
|
||||
const messages = this.messagesByBox.get(boxId) || [];
|
||||
const idx = messages.findIndex((m) => m.id === messageId);
|
||||
if (idx === -1) return false;
|
||||
|
||||
const msg = messages[idx];
|
||||
await this.storage.removeObject(msg.objectKey);
|
||||
if (!msg.objectKey) {
|
||||
await fsPromises.rm(path.join(this.getBoxDir(boxId), msg.fileName), { force: true }).catch(() => {});
|
||||
}
|
||||
|
||||
messages.splice(idx, 1);
|
||||
await this.writeMessages(boxId, messages);
|
||||
return true;
|
||||
}
|
||||
|
||||
private async enforceLimit(boxId: string, messages: IFaxMessage[]): Promise<void> {
|
||||
const box = this.boxes.get(boxId);
|
||||
const maxMessages = box?.maxMessages ?? 50;
|
||||
while (messages.length > maxMessages) {
|
||||
const old = messages.pop()!;
|
||||
await this.storage.removeObject(old.objectKey);
|
||||
if (!old.objectKey) {
|
||||
await fsPromises.rm(path.join(this.getBoxDir(boxId), old.fileName), { force: true }).catch(() => {});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async loadMessages(boxId: string): Promise<IFaxMessage[]> {
|
||||
const storedMessages = await this.storage.getFaxMessages(boxId);
|
||||
if (storedMessages.length) return await this.ensureMessageObjects(boxId, storedMessages);
|
||||
|
||||
const filePath = path.join(this.getBoxDir(boxId), 'messages.json');
|
||||
try {
|
||||
if (!fs.existsSync(filePath)) return [];
|
||||
const raw = await fsPromises.readFile(filePath, 'utf8');
|
||||
const legacyMessages = await this.ensureMessageObjects(boxId, JSON.parse(raw) as IFaxMessage[]);
|
||||
await this.storage.writeFaxMessages(boxId, legacyMessages);
|
||||
return legacyMessages;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private async ensureMessageObjects(boxId: string, messages: IFaxMessage[]): Promise<IFaxMessage[]> {
|
||||
let changed = false;
|
||||
|
||||
for (const msg of messages) {
|
||||
if (!msg.id) {
|
||||
msg.id = crypto.randomUUID();
|
||||
changed = true;
|
||||
}
|
||||
if (msg.objectKey) continue;
|
||||
|
||||
const localPath = path.isAbsolute(msg.fileName) ? msg.fileName : path.join(this.getBoxDir(boxId), msg.fileName);
|
||||
if (!fs.existsSync(localPath)) continue;
|
||||
|
||||
const extension = path.extname(localPath) || '.tif';
|
||||
msg.objectKey = await this.storage.putFileObject(`fax/inboxes/${boxId}/${msg.id}${extension}`, localPath);
|
||||
msg.fileName = path.basename(localPath);
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (changed) {
|
||||
await this.storage.writeFaxMessages(boxId, messages);
|
||||
this.log(`[faxbox] migrated legacy messages for box "${boxId}" to smartbucket`);
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
private async writeMessages(boxId: string, messages: IFaxMessage[]): Promise<void> {
|
||||
this.messagesByBox.set(boxId, [...messages]);
|
||||
await this.storage.writeFaxMessages(boxId, messages);
|
||||
}
|
||||
}
|
||||
+149
@@ -0,0 +1,149 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
import type { SiprouterStorage } from './storage.ts';
|
||||
import type {
|
||||
IFaxCompletedEvent,
|
||||
IFaxFailedEvent,
|
||||
IFaxStartedEvent,
|
||||
} from './shared/proxy-events.ts';
|
||||
|
||||
export interface IFaxJob {
|
||||
id: string;
|
||||
callId: string;
|
||||
number?: string;
|
||||
providerId?: string;
|
||||
direction: 'outbound' | 'inbound';
|
||||
status: 'dialing' | 'started' | 'completed' | 'failed';
|
||||
transport?: 'audio' | 't38';
|
||||
filePath?: string;
|
||||
objectKey?: string;
|
||||
codec?: string;
|
||||
remoteMedia?: string;
|
||||
success?: boolean;
|
||||
completionCode?: number | null;
|
||||
completionLabel?: string | null;
|
||||
error?: string;
|
||||
stats?: IFaxCompletedEvent['stats'];
|
||||
createdAt: number;
|
||||
updatedAt: number;
|
||||
}
|
||||
|
||||
export class FaxJobManager {
|
||||
private jobs: IFaxJob[] = [];
|
||||
private readonly log: (msg: string) => void;
|
||||
private readonly storage: SiprouterStorage;
|
||||
|
||||
constructor(log: (msg: string) => void, storageArg: SiprouterStorage) {
|
||||
this.log = log;
|
||||
this.storage = storageArg;
|
||||
}
|
||||
|
||||
async init(): Promise<void> {
|
||||
this.jobs = await this.storage.getFaxJobs();
|
||||
}
|
||||
|
||||
async noteDialing(callId: string, number: string, providerId: string): Promise<void> {
|
||||
const jobs = this.jobs;
|
||||
const now = Date.now();
|
||||
const existing = jobs.find((job) => job.callId === callId);
|
||||
if (existing) {
|
||||
existing.number = number;
|
||||
existing.providerId = providerId;
|
||||
existing.updatedAt = now;
|
||||
} else {
|
||||
jobs.unshift({
|
||||
id: callId,
|
||||
callId,
|
||||
number,
|
||||
providerId,
|
||||
direction: 'outbound',
|
||||
status: 'dialing',
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
});
|
||||
}
|
||||
await this.writeJobs();
|
||||
}
|
||||
|
||||
async noteStarted(event: IFaxStartedEvent): Promise<void> {
|
||||
const now = Date.now();
|
||||
const job = this.getOrCreateJob(event.call_id, event.direction, now);
|
||||
job.status = 'started';
|
||||
job.transport = event.transport;
|
||||
job.filePath = event.file_path;
|
||||
await this.ensureOutboundFileObject(job, event.file_path);
|
||||
job.codec = event.codec;
|
||||
job.remoteMedia = event.remote_media;
|
||||
job.updatedAt = now;
|
||||
await this.writeJobs();
|
||||
}
|
||||
|
||||
async noteCompleted(event: IFaxCompletedEvent): Promise<void> {
|
||||
const now = Date.now();
|
||||
const job = this.getOrCreateJob(event.call_id, event.direction, now);
|
||||
job.status = 'completed';
|
||||
job.transport = event.transport;
|
||||
job.filePath = event.file_path;
|
||||
await this.ensureOutboundFileObject(job, event.file_path);
|
||||
job.codec = event.codec;
|
||||
job.success = event.success;
|
||||
job.completionCode = event.completion_code ?? null;
|
||||
job.completionLabel = event.completion_label ?? null;
|
||||
job.stats = event.stats;
|
||||
job.updatedAt = now;
|
||||
await this.writeJobs();
|
||||
}
|
||||
|
||||
async noteFailed(event: IFaxFailedEvent): Promise<void> {
|
||||
const now = Date.now();
|
||||
const job = this.getOrCreateJob(event.call_id, event.direction, now);
|
||||
job.status = 'failed';
|
||||
job.transport = event.transport;
|
||||
job.filePath = event.file_path;
|
||||
await this.ensureOutboundFileObject(job, event.file_path);
|
||||
job.error = event.error;
|
||||
job.success = false;
|
||||
job.updatedAt = now;
|
||||
await this.writeJobs();
|
||||
}
|
||||
|
||||
getJobs(): IFaxJob[] {
|
||||
return [...this.jobs];
|
||||
}
|
||||
|
||||
private getOrCreateJob(
|
||||
callId: string,
|
||||
direction: 'outbound' | 'inbound',
|
||||
now: number,
|
||||
): IFaxJob {
|
||||
let job = this.jobs.find((entry) => entry.callId === callId);
|
||||
if (!job) {
|
||||
job = {
|
||||
id: callId,
|
||||
callId,
|
||||
direction,
|
||||
status: 'dialing',
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
};
|
||||
this.jobs.unshift(job);
|
||||
}
|
||||
return job;
|
||||
}
|
||||
|
||||
private async ensureOutboundFileObject(jobArg: IFaxJob, filePathArg: string | undefined): Promise<void> {
|
||||
if (jobArg.direction !== 'outbound' || jobArg.objectKey || !filePathArg) return;
|
||||
|
||||
const localPath = path.isAbsolute(filePathArg) ? filePathArg : path.join(process.cwd(), filePathArg);
|
||||
if (!fs.existsSync(localPath)) return;
|
||||
|
||||
const extension = path.extname(localPath) || '.tif';
|
||||
jobArg.objectKey = await this.storage.putFileObject(`fax/outbound/${jobArg.callId}${extension}`, localPath);
|
||||
}
|
||||
|
||||
private async writeJobs(): Promise<void> {
|
||||
await this.storage.writeFaxJobs(this.jobs);
|
||||
this.log(`[fax] persisted ${this.jobs.length} job(s)`);
|
||||
}
|
||||
}
|
||||
+146
-107
@@ -11,14 +11,42 @@ import path from 'node:path';
|
||||
import http from 'node:http';
|
||||
import https from 'node:https';
|
||||
import { WebSocketServer, WebSocket } from 'ws';
|
||||
import { maskConfig, type IAppConfig } from './config.ts';
|
||||
import type { FaxBoxManager } from './faxbox.ts';
|
||||
import type { FaxJobManager } from './faxjobs.ts';
|
||||
import { handleWebRtcSignaling } from './webrtcbridge.ts';
|
||||
import type { VoiceboxManager } from './voicebox.ts';
|
||||
|
||||
// CallManager was previously used for WebRTC call handling. Now replaced by Rust proxy-engine.
|
||||
// Kept as `any` type for backward compat with the function signature until full WebRTC port.
|
||||
type CallManager = any;
|
||||
interface IHandleRequestContext {
|
||||
getStatus: () => unknown;
|
||||
getConfig: () => IAppConfig;
|
||||
updateConfig: (updatesArg: any) => Promise<IAppConfig>;
|
||||
log: (msg: string) => void;
|
||||
onStartCall: (number: string, deviceId?: string, providerId?: string) => Promise<{ id: string } | null>;
|
||||
onHangupCall: (callId: string) => boolean;
|
||||
faxBoxManager?: FaxBoxManager;
|
||||
faxJobManager?: FaxJobManager;
|
||||
voiceboxManager?: VoiceboxManager;
|
||||
}
|
||||
|
||||
const CONFIG_PATH = path.join(process.cwd(), '.nogit', 'config.json');
|
||||
interface IWebUiOptions extends IHandleRequestContext {
|
||||
port: number;
|
||||
onWebRtcOffer?: (sessionId: string, sdp: string, ws: WebSocket) => Promise<void>;
|
||||
onWebRtcIce?: (sessionId: string, candidate: unknown) => Promise<void>;
|
||||
onWebRtcClose?: (sessionId: string) => Promise<void>;
|
||||
onWebRtcAccept?: (callId: string, sessionId: string) => void;
|
||||
}
|
||||
|
||||
interface IWebRtcSocketMessage {
|
||||
type?: string;
|
||||
sessionId?: string;
|
||||
callId?: string;
|
||||
sdp?: string;
|
||||
candidate?: unknown;
|
||||
userAgent?: string;
|
||||
_remoteIp?: string | null;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebSocket broadcast
|
||||
@@ -82,14 +110,9 @@ function loadStaticFiles(): void {
|
||||
async function handleRequest(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
getStatus: () => unknown,
|
||||
log: (msg: string) => void,
|
||||
onStartCall: (number: string, deviceId?: string, providerId?: string) => { id: string } | null,
|
||||
onHangupCall: (callId: string) => boolean,
|
||||
onConfigSaved?: () => void,
|
||||
callManager?: CallManager,
|
||||
voiceboxManager?: VoiceboxManager,
|
||||
context: IHandleRequestContext,
|
||||
): Promise<void> {
|
||||
const { getStatus, getConfig, updateConfig, log, onStartCall, onHangupCall, faxBoxManager, faxJobManager, voiceboxManager } = context;
|
||||
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
|
||||
const method = req.method || 'GET';
|
||||
|
||||
@@ -106,7 +129,7 @@ async function handleRequest(
|
||||
if (!number || typeof number !== 'string') {
|
||||
return sendJson(res, { ok: false, error: 'missing "number" field' }, 400);
|
||||
}
|
||||
const call = onStartCall(number, body?.deviceId, body?.providerId);
|
||||
const call = await onStartCall(number, body?.deviceId, body?.providerId);
|
||||
if (call) return sendJson(res, { ok: true, callId: call.id });
|
||||
return sendJson(res, { ok: false, error: 'call origination failed — provider not registered or no ports available' }, 503);
|
||||
} catch (e: any) {
|
||||
@@ -128,14 +151,81 @@ async function handleRequest(
|
||||
}
|
||||
}
|
||||
|
||||
// API: add leg to call.
|
||||
// API: send outbound fax.
|
||||
if (url.pathname === '/api/fax' && method === 'POST') {
|
||||
try {
|
||||
const body = await readJsonBody(req);
|
||||
const number = body?.number;
|
||||
let filePath = body?.filePath;
|
||||
if (!number || typeof number !== 'string') {
|
||||
return sendJson(res, { ok: false, error: 'missing "number" field' }, 400);
|
||||
}
|
||||
if (!filePath || typeof filePath !== 'string') {
|
||||
return sendJson(res, { ok: false, error: 'missing "filePath" field' }, 400);
|
||||
}
|
||||
if (faxBoxManager) {
|
||||
filePath = await faxBoxManager.prepareOutboundFaxFile(filePath);
|
||||
}
|
||||
const { sendFax } = await import('./proxybridge.ts');
|
||||
const callId = await sendFax(number, filePath, body?.providerId);
|
||||
if (callId) {
|
||||
log(`[dashboard] fax started: ${callId} -> ${number} file=${filePath}`);
|
||||
return sendJson(res, { ok: true, callId });
|
||||
}
|
||||
return sendJson(res, { ok: false, error: 'fax origination failed' }, 503);
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
}
|
||||
|
||||
// API: fax jobs.
|
||||
if (url.pathname === '/api/fax/jobs' && method === 'GET' && faxJobManager) {
|
||||
return sendJson(res, { ok: true, jobs: faxJobManager.getJobs() });
|
||||
}
|
||||
|
||||
// API: fax inbox - list messages.
|
||||
const faxListMatch = url.pathname.match(/^\/api\/fax\/inboxes\/([^/]+)$/);
|
||||
if (faxListMatch && method === 'GET' && faxBoxManager) {
|
||||
const boxId = faxListMatch[1];
|
||||
return sendJson(res, { ok: true, messages: faxBoxManager.getMessages(boxId) });
|
||||
}
|
||||
|
||||
// API: fax inbox - stream TIFF.
|
||||
const faxFileMatch = url.pathname.match(/^\/api\/fax\/inboxes\/([^/]+)\/([^/]+)\/file$/);
|
||||
if (faxFileMatch && method === 'GET' && faxBoxManager) {
|
||||
const [, boxId, msgId] = faxFileMatch;
|
||||
const filePath = await faxBoxManager.getMessageFilePath(boxId, msgId);
|
||||
if (!filePath) return sendJson(res, { ok: false, error: 'not found' }, 404);
|
||||
const stat = fs.statSync(filePath);
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'image/tiff',
|
||||
'Content-Length': stat.size.toString(),
|
||||
'Accept-Ranges': 'bytes',
|
||||
});
|
||||
fs.createReadStream(filePath).pipe(res);
|
||||
return;
|
||||
}
|
||||
|
||||
// API: fax inbox - delete message.
|
||||
const faxDeleteMatch = url.pathname.match(/^\/api\/fax\/inboxes\/([^/]+)\/([^/]+)$/);
|
||||
if (faxDeleteMatch && method === 'DELETE' && faxBoxManager) {
|
||||
const [, boxId, msgId] = faxDeleteMatch;
|
||||
return sendJson(res, { ok: await faxBoxManager.deleteMessage(boxId, msgId) });
|
||||
}
|
||||
|
||||
// API: add a SIP device to a call (mid-call INVITE to desk phone).
|
||||
if (url.pathname.startsWith('/api/call/') && url.pathname.endsWith('/addleg') && method === 'POST') {
|
||||
try {
|
||||
const callId = url.pathname.split('/')[3];
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.deviceId) return sendJson(res, { ok: false, error: 'missing deviceId' }, 400);
|
||||
const ok = callManager?.addDeviceToCall(callId, body.deviceId) ?? false;
|
||||
return sendJson(res, { ok });
|
||||
const { addDeviceLeg } = await import('./proxybridge.ts');
|
||||
const legId = await addDeviceLeg(callId, body.deviceId);
|
||||
if (legId) {
|
||||
return sendJson(res, { ok: true, legId });
|
||||
} else {
|
||||
return sendJson(res, { ok: false, error: 'device not registered or call not found' }, 404);
|
||||
}
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -147,8 +237,9 @@ async function handleRequest(
|
||||
const callId = url.pathname.split('/')[3];
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.number) return sendJson(res, { ok: false, error: 'missing number' }, 400);
|
||||
const ok = callManager?.addExternalToCall(callId, body.number, body.providerId) ?? false;
|
||||
return sendJson(res, { ok });
|
||||
const { addLeg: addLegFn } = await import('./proxybridge.ts');
|
||||
const legId = await addLegFn(callId, body.number, body.providerId);
|
||||
return sendJson(res, { ok: !!legId, legId });
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -160,22 +251,22 @@ async function handleRequest(
|
||||
const callId = url.pathname.split('/')[3];
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.legId) return sendJson(res, { ok: false, error: 'missing legId' }, 400);
|
||||
const ok = callManager?.removeLegFromCall(callId, body.legId) ?? false;
|
||||
const { removeLeg: removeLegFn } = await import('./proxybridge.ts');
|
||||
const ok = await removeLegFn(callId, body.legId);
|
||||
return sendJson(res, { ok });
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
}
|
||||
|
||||
// API: transfer leg.
|
||||
// API: transfer leg (not yet implemented).
|
||||
if (url.pathname === '/api/transfer' && method === 'POST') {
|
||||
try {
|
||||
const body = await readJsonBody(req);
|
||||
if (!body?.sourceCallId || !body?.legId || !body?.targetCallId) {
|
||||
return sendJson(res, { ok: false, error: 'missing sourceCallId, legId, or targetCallId' }, 400);
|
||||
}
|
||||
const ok = callManager?.transferLeg(body.sourceCallId, body.legId, body.targetCallId) ?? false;
|
||||
return sendJson(res, { ok });
|
||||
return sendJson(res, { ok: false, error: 'not yet implemented' }, 501);
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -184,10 +275,7 @@ async function handleRequest(
|
||||
// API: get config (sans passwords).
|
||||
if (url.pathname === '/api/config' && method === 'GET') {
|
||||
try {
|
||||
const raw = fs.readFileSync(CONFIG_PATH, 'utf8');
|
||||
const cfg = JSON.parse(raw);
|
||||
const safe = { ...cfg, providers: cfg.providers?.map((p: any) => ({ ...p, password: '••••••' })) };
|
||||
return sendJson(res, safe);
|
||||
return sendJson(res, maskConfig(getConfig()));
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 500);
|
||||
}
|
||||
@@ -197,63 +285,9 @@ async function handleRequest(
|
||||
if (url.pathname === '/api/config' && method === 'POST') {
|
||||
try {
|
||||
const updates = await readJsonBody(req);
|
||||
const raw = fs.readFileSync(CONFIG_PATH, 'utf8');
|
||||
const cfg = JSON.parse(raw);
|
||||
|
||||
// Update existing providers.
|
||||
if (updates.providers) {
|
||||
for (const up of updates.providers) {
|
||||
const existing = cfg.providers?.find((p: any) => p.id === up.id);
|
||||
if (existing) {
|
||||
if (up.displayName !== undefined) existing.displayName = up.displayName;
|
||||
if (up.password && up.password !== '••••••') existing.password = up.password;
|
||||
if (up.domain !== undefined) existing.domain = up.domain;
|
||||
if (up.outboundProxy !== undefined) existing.outboundProxy = up.outboundProxy;
|
||||
if (up.username !== undefined) existing.username = up.username;
|
||||
if (up.registerIntervalSec !== undefined) existing.registerIntervalSec = up.registerIntervalSec;
|
||||
if (up.codecs !== undefined) existing.codecs = up.codecs;
|
||||
if (up.quirks !== undefined) existing.quirks = up.quirks;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add a new provider.
|
||||
if (updates.addProvider) {
|
||||
cfg.providers ??= [];
|
||||
cfg.providers.push(updates.addProvider);
|
||||
}
|
||||
|
||||
// Remove a provider.
|
||||
if (updates.removeProvider) {
|
||||
cfg.providers = (cfg.providers || []).filter((p: any) => p.id !== updates.removeProvider);
|
||||
// Clean up routing references — remove routes that reference this provider.
|
||||
if (cfg.routing?.routes) {
|
||||
cfg.routing.routes = cfg.routing.routes.filter((r: any) =>
|
||||
r.match?.sourceProvider !== updates.removeProvider &&
|
||||
r.action?.provider !== updates.removeProvider
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (updates.devices) {
|
||||
for (const ud of updates.devices) {
|
||||
const existing = cfg.devices?.find((d: any) => d.id === ud.id);
|
||||
if (existing && ud.displayName !== undefined) existing.displayName = ud.displayName;
|
||||
}
|
||||
}
|
||||
if (updates.routing) {
|
||||
if (updates.routing.routes) {
|
||||
cfg.routing.routes = updates.routing.routes;
|
||||
}
|
||||
}
|
||||
if (updates.contacts !== undefined) cfg.contacts = updates.contacts;
|
||||
if (updates.voiceboxes !== undefined) cfg.voiceboxes = updates.voiceboxes;
|
||||
if (updates.ivr !== undefined) cfg.ivr = updates.ivr;
|
||||
|
||||
fs.writeFileSync(CONFIG_PATH, JSON.stringify(cfg, null, 2) + '\n');
|
||||
log('[config] updated config.json');
|
||||
onConfigSaved?.();
|
||||
return sendJson(res, { ok: true });
|
||||
const config = await updateConfig(updates);
|
||||
log('[config] updated smartdata config');
|
||||
return sendJson(res, { ok: true, config: maskConfig(config) });
|
||||
} catch (e: any) {
|
||||
return sendJson(res, { ok: false, error: e.message }, 400);
|
||||
}
|
||||
@@ -277,7 +311,7 @@ async function handleRequest(
|
||||
const vmAudioMatch = url.pathname.match(/^\/api\/voicemail\/([^/]+)\/([^/]+)\/audio$/);
|
||||
if (vmAudioMatch && method === 'GET' && voiceboxManager) {
|
||||
const [, boxId, msgId] = vmAudioMatch;
|
||||
const audioPath = voiceboxManager.getMessageAudioPath(boxId, msgId);
|
||||
const audioPath = await voiceboxManager.getMessageAudioPath(boxId, msgId);
|
||||
if (!audioPath) return sendJson(res, { ok: false, error: 'not found' }, 404);
|
||||
const stat = fs.statSync(audioPath);
|
||||
res.writeHead(200, {
|
||||
@@ -293,14 +327,14 @@ async function handleRequest(
|
||||
const vmHeardMatch = url.pathname.match(/^\/api\/voicemail\/([^/]+)\/([^/]+)\/heard$/);
|
||||
if (vmHeardMatch && method === 'POST' && voiceboxManager) {
|
||||
const [, boxId, msgId] = vmHeardMatch;
|
||||
return sendJson(res, { ok: voiceboxManager.markHeard(boxId, msgId) });
|
||||
return sendJson(res, { ok: await voiceboxManager.markHeard(boxId, msgId) });
|
||||
}
|
||||
|
||||
// API: voicemail - delete message.
|
||||
const vmDeleteMatch = url.pathname.match(/^\/api\/voicemail\/([^/]+)\/([^/]+)$/);
|
||||
if (vmDeleteMatch && method === 'DELETE' && voiceboxManager) {
|
||||
const [, boxId, msgId] = vmDeleteMatch;
|
||||
return sendJson(res, { ok: voiceboxManager.deleteMessage(boxId, msgId) });
|
||||
return sendJson(res, { ok: await voiceboxManager.deleteMessage(boxId, msgId) });
|
||||
}
|
||||
|
||||
// Static files.
|
||||
@@ -333,21 +367,24 @@ async function handleRequest(
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function initWebUi(
|
||||
getStatus: () => unknown,
|
||||
log: (msg: string) => void,
|
||||
onStartCall: (number: string, deviceId?: string, providerId?: string) => { id: string } | null,
|
||||
onHangupCall: (callId: string) => boolean,
|
||||
onConfigSaved?: () => void,
|
||||
callManager?: CallManager,
|
||||
voiceboxManager?: VoiceboxManager,
|
||||
/** WebRTC signaling handlers — forwarded to Rust proxy-engine. */
|
||||
onWebRtcOffer?: (sessionId: string, sdp: string, ws: WebSocket) => Promise<void>,
|
||||
onWebRtcIce?: (sessionId: string, candidate: any) => Promise<void>,
|
||||
onWebRtcClose?: (sessionId: string) => Promise<void>,
|
||||
/** Called when browser sends webrtc-accept (callId + sessionId linking). */
|
||||
onWebRtcAccept?: (callId: string, sessionId: string) => void,
|
||||
options: IWebUiOptions,
|
||||
): void {
|
||||
const WEB_PORT = 3060;
|
||||
const {
|
||||
port,
|
||||
getStatus,
|
||||
getConfig,
|
||||
updateConfig,
|
||||
log,
|
||||
onStartCall,
|
||||
onHangupCall,
|
||||
faxBoxManager,
|
||||
faxJobManager,
|
||||
voiceboxManager,
|
||||
onWebRtcOffer,
|
||||
onWebRtcIce,
|
||||
onWebRtcClose,
|
||||
onWebRtcAccept,
|
||||
} = options;
|
||||
|
||||
loadStaticFiles();
|
||||
|
||||
@@ -361,12 +398,12 @@ export function initWebUi(
|
||||
const cert = fs.readFileSync(certPath, 'utf8');
|
||||
const key = fs.readFileSync(keyPath, 'utf8');
|
||||
server = https.createServer({ cert, key }, (req, res) =>
|
||||
handleRequest(req, res, getStatus, log, onStartCall, onHangupCall, onConfigSaved, callManager, voiceboxManager).catch(() => { res.writeHead(500); res.end(); }),
|
||||
handleRequest(req, res, { getStatus, getConfig, updateConfig, log, onStartCall, onHangupCall, faxBoxManager, faxJobManager, voiceboxManager }).catch(() => { res.writeHead(500); res.end(); }),
|
||||
);
|
||||
useTls = true;
|
||||
} catch {
|
||||
server = http.createServer((req, res) =>
|
||||
handleRequest(req, res, getStatus, log, onStartCall, onHangupCall, onConfigSaved, callManager, voiceboxManager).catch(() => { res.writeHead(500); res.end(); }),
|
||||
handleRequest(req, res, { getStatus, getConfig, updateConfig, log, onStartCall, onHangupCall, faxBoxManager, faxJobManager, voiceboxManager }).catch(() => { res.writeHead(500); res.end(); }),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -380,12 +417,12 @@ export function initWebUi(
|
||||
|
||||
socket.on('message', (raw) => {
|
||||
try {
|
||||
const msg = JSON.parse(raw.toString());
|
||||
const msg = JSON.parse(raw.toString()) as IWebRtcSocketMessage;
|
||||
if (msg.type === 'webrtc-offer' && msg.sessionId) {
|
||||
// Forward to Rust proxy-engine for WebRTC handling.
|
||||
if (onWebRtcOffer) {
|
||||
if (onWebRtcOffer && typeof msg.sdp === 'string') {
|
||||
log(`[webrtc-ws] offer msg keys: ${Object.keys(msg).join(',')}, sdp type: ${typeof msg.sdp}, sdp len: ${msg.sdp?.length || 0}`);
|
||||
onWebRtcOffer(msg.sessionId, msg.sdp, socket as any).catch((e: any) =>
|
||||
onWebRtcOffer(msg.sessionId, msg.sdp, socket).catch((e: any) =>
|
||||
log(`[webrtc] offer error: ${e.message}`));
|
||||
}
|
||||
} else if (msg.type === 'webrtc-ice' && msg.sessionId) {
|
||||
@@ -403,7 +440,9 @@ export function initWebUi(
|
||||
}
|
||||
} else if (msg.type?.startsWith('webrtc-')) {
|
||||
msg._remoteIp = remoteIp;
|
||||
handleWebRtcSignaling(socket as any, msg);
|
||||
if (msg.type) {
|
||||
handleWebRtcSignaling(socket, msg as IWebRtcSocketMessage & { type: string });
|
||||
}
|
||||
}
|
||||
} catch { /* ignore */ }
|
||||
});
|
||||
@@ -412,8 +451,8 @@ export function initWebUi(
|
||||
socket.on('error', () => wsClients.delete(socket));
|
||||
});
|
||||
|
||||
server.listen(WEB_PORT, '0.0.0.0', () => {
|
||||
log(`web ui listening on ${useTls ? 'https' : 'http'}://0.0.0.0:${WEB_PORT}`);
|
||||
server.listen(port, '0.0.0.0', () => {
|
||||
log(`web ui listening on ${useTls ? 'https' : 'http'}://0.0.0.0:${port}`);
|
||||
});
|
||||
|
||||
setInterval(() => broadcastWs('status', getStatus()), 1000);
|
||||
|
||||
@@ -1,199 +0,0 @@
|
||||
/**
|
||||
* Audio transcoding bridge — uses smartrust to communicate with the Rust
|
||||
* opus-codec binary, which handles Opus ↔ G.722 ↔ PCMU/PCMA transcoding.
|
||||
*
|
||||
* All codec conversion happens in Rust (libopus + SpanDSP G.722 port).
|
||||
* The TypeScript side just passes raw payloads back and forth.
|
||||
*/
|
||||
|
||||
import path from 'node:path';
|
||||
import { RustBridge } from '@push.rocks/smartrust';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Command type map for smartrust
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type TCodecCommands = {
|
||||
init: {
|
||||
params: Record<string, never>;
|
||||
result: Record<string, never>;
|
||||
};
|
||||
create_session: {
|
||||
params: { session_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
destroy_session: {
|
||||
params: { session_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
transcode: {
|
||||
params: { data_b64: string; from_pt: number; to_pt: number; session_id?: string; direction?: string };
|
||||
result: { data_b64: string };
|
||||
};
|
||||
encode_pcm: {
|
||||
params: { data_b64: string; sample_rate: number; to_pt: number; session_id?: string };
|
||||
result: { data_b64: string };
|
||||
};
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Bridge singleton
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let bridge: RustBridge<TCodecCommands> | null = null;
|
||||
let initialized = false;
|
||||
|
||||
function buildLocalPaths(): string[] {
|
||||
const root = process.cwd();
|
||||
return [
|
||||
path.join(root, 'dist_rust', 'opus-codec'),
|
||||
path.join(root, 'rust', 'target', 'release', 'opus-codec'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'opus-codec'),
|
||||
];
|
||||
}
|
||||
|
||||
let logFn: ((msg: string) => void) | undefined;
|
||||
|
||||
/**
|
||||
* Initialize the audio transcoding bridge. Spawns the Rust binary.
|
||||
*/
|
||||
export async function initCodecBridge(log?: (msg: string) => void): Promise<boolean> {
|
||||
if (initialized && bridge) return true;
|
||||
logFn = log;
|
||||
|
||||
try {
|
||||
bridge = new RustBridge<TCodecCommands>({
|
||||
binaryName: 'opus-codec',
|
||||
localPaths: buildLocalPaths(),
|
||||
});
|
||||
|
||||
const spawned = await bridge.spawn();
|
||||
if (!spawned) {
|
||||
log?.('[codec] failed to spawn opus-codec binary');
|
||||
bridge = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Auto-restart: reset state when the Rust process exits so the next
|
||||
// transcode attempt triggers re-initialization instead of silent failure.
|
||||
bridge.on('exit', () => {
|
||||
logFn?.('[codec] Rust audio transcoder process exited — will re-init on next use');
|
||||
bridge = null;
|
||||
initialized = false;
|
||||
});
|
||||
|
||||
await bridge.sendCommand('init', {} as any);
|
||||
initialized = true;
|
||||
log?.('[codec] Rust audio transcoder initialized (Opus + G.722 + PCMU/PCMA)');
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log?.(`[codec] init error: ${e.message}`);
|
||||
bridge = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Session management — per-call codec isolation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create an isolated codec session. Each session gets its own Opus/G.722
|
||||
* encoder/decoder state, preventing concurrent calls from corrupting each
|
||||
* other's stateful codec predictions.
|
||||
*/
|
||||
export async function createSession(sessionId: string): Promise<boolean> {
|
||||
if (!bridge || !initialized) {
|
||||
// Attempt auto-reinit if bridge died.
|
||||
const ok = await initCodecBridge(logFn);
|
||||
if (!ok) return false;
|
||||
}
|
||||
try {
|
||||
await bridge!.sendCommand('create_session', { session_id: sessionId });
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[codec] create_session error: ${e?.message || e}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy a codec session, freeing its encoder/decoder state.
|
||||
*/
|
||||
export async function destroySession(sessionId: string): Promise<void> {
|
||||
if (!bridge || !initialized) return;
|
||||
try {
|
||||
await bridge.sendCommand('destroy_session', { session_id: sessionId });
|
||||
} catch {
|
||||
// Best-effort cleanup.
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Transcoding
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Transcode an RTP payload between two codecs.
|
||||
* All codec work (Opus, G.722, PCMU, PCMA) + resampling happens in Rust.
|
||||
*
|
||||
* @param data - raw RTP payload (no header)
|
||||
* @param fromPT - source payload type (0=PCMU, 8=PCMA, 9=G.722, 111=Opus)
|
||||
* @param toPT - target payload type
|
||||
* @param sessionId - optional session for isolated codec state
|
||||
* @returns transcoded payload, or null on failure
|
||||
*/
|
||||
export async function transcode(data: Buffer, fromPT: number, toPT: number, sessionId?: string, direction?: string): Promise<Buffer | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const params: any = {
|
||||
data_b64: data.toString('base64'),
|
||||
from_pt: fromPT,
|
||||
to_pt: toPT,
|
||||
};
|
||||
if (sessionId) params.session_id = sessionId;
|
||||
if (direction) params.direction = direction;
|
||||
const result = await bridge.sendCommand('transcode', params);
|
||||
return Buffer.from(result.data_b64, 'base64');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode raw 16-bit PCM to a target codec.
|
||||
* @param pcmData - raw 16-bit LE PCM bytes
|
||||
* @param sampleRate - input sample rate (e.g. 22050 for Piper TTS)
|
||||
* @param toPT - target payload type (9=G.722, 111=Opus, 0=PCMU, 8=PCMA)
|
||||
* @param sessionId - optional session for isolated codec state
|
||||
*/
|
||||
export async function encodePcm(pcmData: Buffer, sampleRate: number, toPT: number, sessionId?: string): Promise<Buffer | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const params: any = {
|
||||
data_b64: pcmData.toString('base64'),
|
||||
sample_rate: sampleRate,
|
||||
to_pt: toPT,
|
||||
};
|
||||
if (sessionId) params.session_id = sessionId;
|
||||
const result = await bridge.sendCommand('encode_pcm', params);
|
||||
return Buffer.from(result.data_b64, 'base64');
|
||||
} catch (e: any) {
|
||||
console.error('[encodePcm] error:', e?.message || e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Check if the codec bridge is ready. */
|
||||
export function isCodecReady(): boolean {
|
||||
return initialized && bridge !== null;
|
||||
}
|
||||
|
||||
/** Shut down the codec bridge. */
|
||||
export function shutdownCodecBridge(): void {
|
||||
if (bridge) {
|
||||
try { bridge.kill(); } catch { /* ignore */ }
|
||||
bridge = null;
|
||||
initialized = false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
import * as smartbucket from '@push.rocks/smartbucket';
|
||||
import * as smartdata from '@push.rocks/smartdata';
|
||||
|
||||
export { smartbucket, smartdata };
|
||||
+418
-77
@@ -4,13 +4,39 @@
|
||||
* The proxy-engine handles ALL SIP protocol mechanics. TypeScript only:
|
||||
* - Sends configuration
|
||||
* - Receives high-level events (incoming_call, call_ended, etc.)
|
||||
* - Sends high-level commands (hangup, make_call, play_audio)
|
||||
* - Sends high-level commands (hangup, make_call, add_leg, webrtc_offer)
|
||||
*
|
||||
* No raw SIP ever touches TypeScript.
|
||||
*/
|
||||
|
||||
import path from 'node:path';
|
||||
import { RustBridge } from '@push.rocks/smartrust';
|
||||
import type { TProxyEventMap } from './shared/proxy-events.ts';
|
||||
export type {
|
||||
ICallAnsweredEvent,
|
||||
ICallEndedEvent,
|
||||
ICallRingingEvent,
|
||||
IDeviceRegisteredEvent,
|
||||
IFaxCompletedEvent,
|
||||
IFaxFailedEvent,
|
||||
IFaxStartedEvent,
|
||||
IIncomingCallEvent,
|
||||
ILegAddedEvent,
|
||||
ILegRemovedEvent,
|
||||
ILegStateChangedEvent,
|
||||
IOutboundCallEvent,
|
||||
IOutboundCallStartedEvent,
|
||||
IProviderRegisteredEvent,
|
||||
IRecordingDoneEvent,
|
||||
ISipUnhandledEvent,
|
||||
IVoicemailErrorEvent,
|
||||
IVoicemailStartedEvent,
|
||||
IWebRtcAudioRxEvent,
|
||||
IWebRtcIceCandidateEvent,
|
||||
IWebRtcStateEvent,
|
||||
IWebRtcTrackEvent,
|
||||
TProxyEventMap,
|
||||
} from './shared/proxy-events.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Command type map for smartrust
|
||||
@@ -29,59 +55,103 @@ type TProxyCommands = {
|
||||
params: { number: string; device_id?: string; provider_id?: string };
|
||||
result: { call_id: string };
|
||||
};
|
||||
play_audio: {
|
||||
params: { call_id: string; leg_id?: string; file_path: string; codec?: number };
|
||||
send_fax: {
|
||||
params: { number: string; file_path: string; provider_id?: string };
|
||||
result: { call_id: string; codec: 'PCMU' | 'PCMA' };
|
||||
};
|
||||
add_leg: {
|
||||
params: { call_id: string; number: string; provider_id?: string };
|
||||
result: { leg_id: string };
|
||||
};
|
||||
remove_leg: {
|
||||
params: { call_id: string; leg_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
start_recording: {
|
||||
params: { call_id: string; file_path: string; max_duration_ms?: number };
|
||||
add_device_leg: {
|
||||
params: { call_id: string; device_id: string };
|
||||
result: { leg_id: string };
|
||||
};
|
||||
transfer_leg: {
|
||||
params: { source_call_id: string; leg_id: string; target_call_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
stop_recording: {
|
||||
params: { call_id: string };
|
||||
result: { file_path: string; duration_ms: number };
|
||||
replace_leg: {
|
||||
params: { call_id: string; old_leg_id: string; number: string; provider_id?: string };
|
||||
result: { new_leg_id: string };
|
||||
};
|
||||
start_interaction: {
|
||||
params: {
|
||||
call_id: string;
|
||||
leg_id: string;
|
||||
prompt_wav: string;
|
||||
expected_digits: string;
|
||||
timeout_ms: number;
|
||||
};
|
||||
result: { result: 'digit' | 'timeout' | 'cancelled'; digit?: string };
|
||||
};
|
||||
start_tts_interaction: {
|
||||
params: {
|
||||
call_id: string;
|
||||
leg_id: string;
|
||||
text: string;
|
||||
voice?: string;
|
||||
model?: string;
|
||||
voices?: string;
|
||||
expected_digits: string;
|
||||
timeout_ms: number;
|
||||
};
|
||||
result: { result: 'digit' | 'timeout' | 'cancelled'; digit?: string };
|
||||
};
|
||||
add_tool_leg: {
|
||||
params: {
|
||||
call_id: string;
|
||||
tool_type: 'recording' | 'transcription';
|
||||
config?: Record<string, unknown>;
|
||||
};
|
||||
result: { tool_leg_id: string };
|
||||
};
|
||||
remove_tool_leg: {
|
||||
params: { call_id: string; tool_leg_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
set_leg_metadata: {
|
||||
params: { call_id: string; leg_id: string; key: string; value: unknown };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
generate_tts: {
|
||||
params: { model: string; voices: string; voice: string; text: string; output: string; cacheable?: boolean };
|
||||
result: { output: string };
|
||||
};
|
||||
// WebRTC signaling — bridged from the browser via the TS control plane.
|
||||
webrtc_offer: {
|
||||
params: { session_id: string; sdp: string };
|
||||
result: { sdp: string };
|
||||
};
|
||||
webrtc_ice: {
|
||||
params: {
|
||||
session_id: string;
|
||||
candidate: string;
|
||||
sdp_mid?: string;
|
||||
sdp_mline_index?: number;
|
||||
};
|
||||
result: Record<string, never>;
|
||||
};
|
||||
webrtc_link: {
|
||||
params: {
|
||||
session_id: string;
|
||||
call_id: string;
|
||||
provider_media_addr: string;
|
||||
provider_media_port: number;
|
||||
sip_pt?: number;
|
||||
};
|
||||
result: Record<string, never>;
|
||||
};
|
||||
webrtc_close: {
|
||||
params: { session_id: string };
|
||||
result: Record<string, never>;
|
||||
};
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Event types from Rust
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface IIncomingCallEvent {
|
||||
call_id: string;
|
||||
from_uri: string;
|
||||
to_number: string;
|
||||
provider_id: string;
|
||||
}
|
||||
|
||||
export interface IOutboundCallEvent {
|
||||
call_id: string;
|
||||
from_device: string | null;
|
||||
to_number: string;
|
||||
}
|
||||
|
||||
export interface ICallEndedEvent {
|
||||
call_id: string;
|
||||
reason: string;
|
||||
duration: number;
|
||||
from_side?: string;
|
||||
}
|
||||
|
||||
export interface IProviderRegisteredEvent {
|
||||
provider_id: string;
|
||||
registered: boolean;
|
||||
public_ip: string | null;
|
||||
}
|
||||
|
||||
export interface IDeviceRegisteredEvent {
|
||||
device_id: string;
|
||||
display_name: string;
|
||||
address: string;
|
||||
port: number;
|
||||
aor: string;
|
||||
expires: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Bridge singleton
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -90,10 +160,34 @@ let bridge: RustBridge<TProxyCommands> | null = null;
|
||||
let initialized = false;
|
||||
let logFn: ((msg: string) => void) | undefined;
|
||||
|
||||
type TWebRtcIceCandidate = {
|
||||
candidate?: string;
|
||||
sdpMid?: string;
|
||||
sdpMLineIndex?: number;
|
||||
} | string;
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
function buildLocalPaths(): string[] {
|
||||
const root = process.cwd();
|
||||
// Map Node's process.arch to tsrust's friendly target name.
|
||||
// tsrust writes multi-target binaries as <bin>_<os>_<arch>,
|
||||
// e.g. proxy-engine_linux_amd64 / proxy-engine_linux_arm64.
|
||||
const archSuffix =
|
||||
process.arch === 'arm64' ? 'linux_arm64' :
|
||||
process.arch === 'x64' ? 'linux_amd64' :
|
||||
null;
|
||||
const multiTarget = archSuffix
|
||||
? [path.join(root, 'dist_rust', `proxy-engine_${archSuffix}`)]
|
||||
: [];
|
||||
return [
|
||||
// 1. Multi-target output matching the running host arch (Docker image, CI, multi-target dev).
|
||||
...multiTarget,
|
||||
// 2. Single-target (unsuffixed) output — legacy/fallback when tsrust runs without targets.
|
||||
path.join(root, 'dist_rust', 'proxy-engine'),
|
||||
// 3. Direct cargo builds for dev iteration.
|
||||
path.join(root, 'rust', 'target', 'release', 'proxy-engine'),
|
||||
path.join(root, 'rust', 'target', 'debug', 'proxy-engine'),
|
||||
];
|
||||
@@ -134,8 +228,8 @@ export async function initProxyEngine(log?: (msg: string) => void): Promise<bool
|
||||
initialized = true;
|
||||
log?.('[proxy-engine] spawned and ready');
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
log?.(`[proxy-engine] init error: ${e.message}`);
|
||||
} catch (error: unknown) {
|
||||
log?.(`[proxy-engine] init error: ${errorMessage(error)}`);
|
||||
bridge = null;
|
||||
return false;
|
||||
}
|
||||
@@ -145,14 +239,14 @@ export async function initProxyEngine(log?: (msg: string) => void): Promise<bool
|
||||
* Send the full app config to the proxy engine.
|
||||
* This binds the SIP socket, starts provider registrations, etc.
|
||||
*/
|
||||
export async function configureProxyEngine(config: Record<string, unknown>): Promise<boolean> {
|
||||
export async function configureProxyEngine(config: TProxyCommands['configure']['params']): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
const result = await bridge.sendCommand('configure', config as any);
|
||||
logFn?.(`[proxy-engine] configured, SIP bound on ${(result as any)?.bound || '?'}`);
|
||||
const result = await sendProxyCommand('configure', config);
|
||||
logFn?.(`[proxy-engine] configured, SIP bound on ${result.bound || '?'}`);
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] configure error: ${e.message}`);
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] configure error: ${errorMessage(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -163,14 +257,29 @@ export async function configureProxyEngine(config: Record<string, unknown>): Pro
|
||||
export async function makeCall(number: string, deviceId?: string, providerId?: string): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await bridge.sendCommand('make_call', {
|
||||
const result = await sendProxyCommand('make_call', {
|
||||
number,
|
||||
device_id: deviceId,
|
||||
provider_id: providerId,
|
||||
} as any);
|
||||
return (result as any)?.call_id || null;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] make_call error: ${e?.message || e}`);
|
||||
});
|
||||
return result.call_id || null;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] make_call error: ${errorMessage(error)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function sendFax(number: string, filePath: string, providerId?: string): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await sendProxyCommand('send_fax', {
|
||||
number,
|
||||
file_path: filePath,
|
||||
provider_id: providerId,
|
||||
});
|
||||
return result.call_id || null;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] send_fax error: ${errorMessage(error)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -181,7 +290,7 @@ export async function makeCall(number: string, deviceId?: string, providerId?: s
|
||||
export async function hangupCall(callId: string): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await bridge.sendCommand('hangup', { call_id: callId } as any);
|
||||
await sendProxyCommand('hangup', { call_id: callId });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
@@ -194,10 +303,9 @@ export async function hangupCall(callId: string): Promise<boolean> {
|
||||
export async function webrtcOffer(sessionId: string, sdp: string): Promise<{ sdp: string } | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await bridge.sendCommand('webrtc_offer', { session_id: sessionId, sdp } as any);
|
||||
return result as any;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] webrtc_offer error: ${e?.message || e}`);
|
||||
return await sendProxyCommand('webrtc_offer', { session_id: sessionId, sdp });
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] webrtc_offer error: ${errorMessage(error)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -205,15 +313,15 @@ export async function webrtcOffer(sessionId: string, sdp: string): Promise<{ sdp
|
||||
/**
|
||||
* Forward an ICE candidate to the proxy engine.
|
||||
*/
|
||||
export async function webrtcIce(sessionId: string, candidate: any): Promise<void> {
|
||||
export async function webrtcIce(sessionId: string, candidate: TWebRtcIceCandidate): Promise<void> {
|
||||
if (!bridge || !initialized) return;
|
||||
try {
|
||||
await bridge.sendCommand('webrtc_ice', {
|
||||
await sendProxyCommand('webrtc_ice', {
|
||||
session_id: sessionId,
|
||||
candidate: candidate?.candidate || candidate,
|
||||
sdp_mid: candidate?.sdpMid,
|
||||
sdp_mline_index: candidate?.sdpMLineIndex,
|
||||
} as any);
|
||||
candidate: typeof candidate === 'string' ? candidate : candidate.candidate || '',
|
||||
sdp_mid: typeof candidate === 'string' ? undefined : candidate.sdpMid,
|
||||
sdp_mline_index: typeof candidate === 'string' ? undefined : candidate.sdpMLineIndex,
|
||||
});
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
|
||||
@@ -224,16 +332,48 @@ export async function webrtcIce(sessionId: string, candidate: any): Promise<void
|
||||
export async function webrtcLink(sessionId: string, callId: string, providerMediaAddr: string, providerMediaPort: number, sipPt: number = 9): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await bridge.sendCommand('webrtc_link', {
|
||||
await sendProxyCommand('webrtc_link', {
|
||||
session_id: sessionId,
|
||||
call_id: callId,
|
||||
provider_media_addr: providerMediaAddr,
|
||||
provider_media_port: providerMediaPort,
|
||||
sip_pt: sipPt,
|
||||
} as any);
|
||||
});
|
||||
return true;
|
||||
} catch (e: any) {
|
||||
logFn?.(`[proxy-engine] webrtc_link error: ${e?.message || e}`);
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] webrtc_link error: ${errorMessage(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an external SIP leg to an existing call (multiparty).
|
||||
*/
|
||||
export async function addLeg(callId: string, number: string, providerId?: string): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await sendProxyCommand('add_leg', {
|
||||
call_id: callId,
|
||||
number,
|
||||
provider_id: providerId,
|
||||
});
|
||||
return result.leg_id || null;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] add_leg error: ${errorMessage(error)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a leg from a call.
|
||||
*/
|
||||
export async function removeLeg(callId: string, legId: string): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await sendProxyCommand('remove_leg', { call_id: callId, leg_id: legId });
|
||||
return true;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] remove_leg error: ${errorMessage(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -244,17 +384,209 @@ export async function webrtcLink(sessionId: string, callId: string, providerMedi
|
||||
export async function webrtcClose(sessionId: string): Promise<void> {
|
||||
if (!bridge || !initialized) return;
|
||||
try {
|
||||
await bridge.sendCommand('webrtc_close', { session_id: sessionId } as any);
|
||||
await sendProxyCommand('webrtc_close', { session_id: sessionId });
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Device leg & interaction commands
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Add a local SIP device to an existing call (mid-call INVITE to desk phone).
|
||||
*/
|
||||
export async function addDeviceLeg(callId: string, deviceId: string): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await sendProxyCommand('add_device_leg', {
|
||||
call_id: callId,
|
||||
device_id: deviceId,
|
||||
});
|
||||
return result.leg_id || null;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] add_device_leg error: ${errorMessage(error)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transfer a leg from one call to another (leg stays connected, switches mixer).
|
||||
*/
|
||||
export async function transferLeg(
|
||||
sourceCallId: string,
|
||||
legId: string,
|
||||
targetCallId: string,
|
||||
): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await sendProxyCommand('transfer_leg', {
|
||||
source_call_id: sourceCallId,
|
||||
leg_id: legId,
|
||||
target_call_id: targetCallId,
|
||||
});
|
||||
return true;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] transfer_leg error: ${errorMessage(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace a leg: terminate the old leg and dial a new number into the same call.
|
||||
*/
|
||||
export async function replaceLeg(
|
||||
callId: string,
|
||||
oldLegId: string,
|
||||
number: string,
|
||||
providerId?: string,
|
||||
): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await sendProxyCommand('replace_leg', {
|
||||
call_id: callId,
|
||||
old_leg_id: oldLegId,
|
||||
number,
|
||||
provider_id: providerId,
|
||||
});
|
||||
return result.new_leg_id || null;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] replace_leg error: ${errorMessage(error)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start an interaction on a specific leg — isolate it, play a prompt, collect DTMF.
|
||||
* Blocks until the interaction completes (digit pressed, timeout, or cancelled).
|
||||
*/
|
||||
export async function startInteraction(
|
||||
callId: string,
|
||||
legId: string,
|
||||
promptWav: string,
|
||||
expectedDigits: string,
|
||||
timeoutMs: number,
|
||||
): Promise<{ result: 'digit' | 'timeout' | 'cancelled'; digit?: string } | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
return await sendProxyCommand('start_interaction', {
|
||||
call_id: callId,
|
||||
leg_id: legId,
|
||||
prompt_wav: promptWav,
|
||||
expected_digits: expectedDigits,
|
||||
timeout_ms: timeoutMs,
|
||||
});
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] start_interaction error: ${errorMessage(error)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a live TTS interaction on a specific leg. The first chunk is rendered
|
||||
* up front and the rest streams into the mixer while playback is already live.
|
||||
*/
|
||||
export async function startTtsInteraction(
|
||||
callId: string,
|
||||
legId: string,
|
||||
text: string,
|
||||
expectedDigits: string,
|
||||
timeoutMs: number,
|
||||
options?: {
|
||||
voice?: string;
|
||||
model?: string;
|
||||
voices?: string;
|
||||
},
|
||||
): Promise<{ result: 'digit' | 'timeout' | 'cancelled'; digit?: string } | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
return await sendProxyCommand('start_tts_interaction', {
|
||||
call_id: callId,
|
||||
leg_id: legId,
|
||||
text,
|
||||
expected_digits: expectedDigits,
|
||||
timeout_ms: timeoutMs,
|
||||
voice: options?.voice,
|
||||
model: options?.model,
|
||||
voices: options?.voices,
|
||||
});
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] start_tts_interaction error: ${errorMessage(error)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a tool leg (recording or transcription) to a call.
|
||||
* Tool legs receive per-source unmerged audio from all participants.
|
||||
*/
|
||||
export async function addToolLeg(
|
||||
callId: string,
|
||||
toolType: 'recording' | 'transcription',
|
||||
config?: Record<string, unknown>,
|
||||
): Promise<string | null> {
|
||||
if (!bridge || !initialized) return null;
|
||||
try {
|
||||
const result = await sendProxyCommand('add_tool_leg', {
|
||||
call_id: callId,
|
||||
tool_type: toolType,
|
||||
config,
|
||||
});
|
||||
return result.tool_leg_id || null;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] add_tool_leg error: ${errorMessage(error)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a tool leg from a call. Triggers finalization (WAV files, metadata).
|
||||
*/
|
||||
export async function removeToolLeg(callId: string, toolLegId: string): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await sendProxyCommand('remove_tool_leg', {
|
||||
call_id: callId,
|
||||
tool_leg_id: toolLegId,
|
||||
});
|
||||
return true;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] remove_tool_leg error: ${errorMessage(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a metadata key-value pair on a leg.
|
||||
*/
|
||||
export async function setLegMetadata(
|
||||
callId: string,
|
||||
legId: string,
|
||||
key: string,
|
||||
value: unknown,
|
||||
): Promise<boolean> {
|
||||
if (!bridge || !initialized) return false;
|
||||
try {
|
||||
await sendProxyCommand('set_leg_metadata', {
|
||||
call_id: callId,
|
||||
leg_id: legId,
|
||||
key,
|
||||
value,
|
||||
});
|
||||
return true;
|
||||
} catch (error: unknown) {
|
||||
logFn?.(`[proxy-engine] set_leg_metadata error: ${errorMessage(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to an event from the proxy engine.
|
||||
* Event names: incoming_call, outbound_device_call, call_ringing,
|
||||
* call_answered, call_ended, provider_registered, device_registered,
|
||||
* dtmf_digit, recording_done, sip_unhandled
|
||||
* dtmf_digit, recording_done, tool_recording_done, tool_transcription_done,
|
||||
* leg_added, leg_removed, sip_unhandled
|
||||
*/
|
||||
export function onProxyEvent(event: string, handler: (data: any) => void): void {
|
||||
export function onProxyEvent<K extends keyof TProxyEventMap>(event: K, handler: (data: TProxyEventMap[K]) => void): void {
|
||||
if (!bridge) throw new Error('proxy engine not initialized');
|
||||
bridge.on(`management:${event}`, handler);
|
||||
}
|
||||
@@ -264,6 +596,15 @@ export function isProxyReady(): boolean {
|
||||
return initialized && bridge !== null;
|
||||
}
|
||||
|
||||
/** Send an arbitrary command to the proxy engine bridge. */
|
||||
export async function sendProxyCommand<K extends keyof TProxyCommands>(
|
||||
method: K,
|
||||
params: TProxyCommands[K]['params'],
|
||||
): Promise<TProxyCommands[K]['result']> {
|
||||
if (!bridge || !initialized) throw new Error('proxy engine not initialized');
|
||||
return bridge.sendCommand(method, params) as Promise<TProxyCommands[K]['result']>;
|
||||
}
|
||||
|
||||
/** Shut down the proxy engine. */
|
||||
export function shutdownProxyEngine(): void {
|
||||
if (bridge) {
|
||||
|
||||
@@ -0,0 +1,266 @@
|
||||
import { hangupCall, onProxyEvent } from '../proxybridge.ts';
|
||||
import type { FaxBoxManager } from '../faxbox.ts';
|
||||
import type { FaxJobManager } from '../faxjobs.ts';
|
||||
import type { VoiceboxManager } from '../voicebox.ts';
|
||||
import type { StatusStore } from './status-store.ts';
|
||||
import type { IProviderMediaInfo, WebRtcLinkManager } from './webrtc-linking.ts';
|
||||
|
||||
export interface IRegisterProxyEventHandlersOptions {
|
||||
log: (msg: string) => void;
|
||||
statusStore: StatusStore;
|
||||
faxBoxManager: FaxBoxManager;
|
||||
faxJobManager: FaxJobManager;
|
||||
voiceboxManager: VoiceboxManager;
|
||||
webRtcLinks: WebRtcLinkManager;
|
||||
getBrowserDeviceIds: () => string[];
|
||||
sendToBrowserDevice: (deviceId: string, data: unknown) => boolean;
|
||||
broadcast: (type: string, data: unknown) => void;
|
||||
onLinkWebRtcSession: (callId: string, sessionId: string, media: IProviderMediaInfo) => void;
|
||||
onCloseWebRtcSession: (sessionId: string) => void;
|
||||
}
|
||||
|
||||
export function registerProxyEventHandlers(options: IRegisterProxyEventHandlersOptions): void {
|
||||
const {
|
||||
log,
|
||||
statusStore,
|
||||
faxBoxManager,
|
||||
faxJobManager,
|
||||
voiceboxManager,
|
||||
webRtcLinks,
|
||||
getBrowserDeviceIds,
|
||||
sendToBrowserDevice,
|
||||
broadcast,
|
||||
onLinkWebRtcSession,
|
||||
onCloseWebRtcSession,
|
||||
} = options;
|
||||
|
||||
const legMediaDetails = (data: {
|
||||
codec?: string | null;
|
||||
mediaProtocol?: string | null;
|
||||
remoteMedia?: string | null;
|
||||
rtpPort?: number | null;
|
||||
}): string => {
|
||||
const parts: string[] = [];
|
||||
if (data.codec) {
|
||||
parts.push(`codec=${data.codec}`);
|
||||
}
|
||||
if (data.mediaProtocol) {
|
||||
parts.push(`media=${data.mediaProtocol}`);
|
||||
}
|
||||
if (data.remoteMedia) {
|
||||
parts.push(`remote=${data.remoteMedia}`);
|
||||
}
|
||||
if (data.rtpPort !== undefined && data.rtpPort !== null) {
|
||||
parts.push(`rtp=${data.rtpPort}`);
|
||||
}
|
||||
return parts.length ? ` ${parts.join(' ')}` : '';
|
||||
};
|
||||
|
||||
onProxyEvent('provider_registered', (data) => {
|
||||
const previous = statusStore.noteProviderRegistered(data);
|
||||
if (previous) {
|
||||
if (data.registered && !previous.wasRegistered) {
|
||||
log(`[provider:${data.provider_id}] registered (publicIp=${data.public_ip})`);
|
||||
} else if (!data.registered && previous.wasRegistered) {
|
||||
log(`[provider:${data.provider_id}] registration lost`);
|
||||
}
|
||||
}
|
||||
broadcast('registration', { providerId: data.provider_id, registered: data.registered });
|
||||
});
|
||||
|
||||
onProxyEvent('device_registered', (data) => {
|
||||
if (statusStore.noteDeviceRegistered(data)) {
|
||||
log(`[registrar] ${data.display_name} registered from ${data.address}:${data.port}`);
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('incoming_call', (data) => {
|
||||
log(`[call] incoming: ${data.from_uri} -> ${data.to_number} via ${data.provider_id} (${data.call_id})`);
|
||||
statusStore.noteIncomingCall(data);
|
||||
|
||||
if (data.ring_browsers === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const deviceId of getBrowserDeviceIds()) {
|
||||
sendToBrowserDevice(deviceId, {
|
||||
type: 'webrtc-incoming',
|
||||
callId: data.call_id,
|
||||
from: data.from_uri,
|
||||
deviceId,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('outbound_device_call', (data) => {
|
||||
log(`[call] outbound: device ${data.from_device} -> ${data.to_number} (${data.call_id})`);
|
||||
statusStore.noteOutboundDeviceCall(data);
|
||||
});
|
||||
|
||||
onProxyEvent('outbound_call_started', (data) => {
|
||||
log(`[call] outbound started: ${data.call_id} -> ${data.number} via ${data.provider_id}`);
|
||||
statusStore.noteOutboundCallStarted(data);
|
||||
|
||||
if (data.ring_browsers === false) {
|
||||
void faxJobManager.noteDialing(data.call_id, data.number, data.provider_id)
|
||||
.catch((error) => log(`[fax] persist dialing failed: ${error instanceof Error ? error.message : String(error)}`));
|
||||
}
|
||||
|
||||
if (data.ring_browsers === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const deviceId of getBrowserDeviceIds()) {
|
||||
sendToBrowserDevice(deviceId, {
|
||||
type: 'webrtc-incoming',
|
||||
callId: data.call_id,
|
||||
from: data.number,
|
||||
deviceId,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('call_ringing', (data) => {
|
||||
statusStore.noteCallRinging(data);
|
||||
});
|
||||
|
||||
onProxyEvent('call_answered', (data) => {
|
||||
if (statusStore.noteCallAnswered(data)) {
|
||||
log(`[call] ${data.call_id} connected`);
|
||||
}
|
||||
|
||||
if (data.media_protocol && data.media_protocol !== 'rtp') {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!data.provider_media_addr || !data.provider_media_port) {
|
||||
return;
|
||||
}
|
||||
|
||||
const target = webRtcLinks.noteCallAnswered(data.call_id, {
|
||||
addr: data.provider_media_addr,
|
||||
port: data.provider_media_port,
|
||||
sipPt: data.sip_pt ?? 9,
|
||||
});
|
||||
|
||||
if (!target) {
|
||||
log(`[webrtc] media info cached for call=${data.call_id}, waiting for session accept`);
|
||||
return;
|
||||
}
|
||||
|
||||
onLinkWebRtcSession(data.call_id, target.sessionId, target.media);
|
||||
});
|
||||
|
||||
onProxyEvent('call_ended', (data) => {
|
||||
if (statusStore.noteCallEnded(data)) {
|
||||
log(`[call] ${data.call_id} ended: ${data.reason} (${data.duration}s)`);
|
||||
}
|
||||
|
||||
broadcast('webrtc-call-ended', { callId: data.call_id });
|
||||
|
||||
const sessionId = webRtcLinks.cleanupCall(data.call_id);
|
||||
if (sessionId) {
|
||||
onCloseWebRtcSession(sessionId);
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('sip_unhandled', (data) => {
|
||||
log(`[sip] unhandled ${data.method_or_status} Call-ID=${data.call_id?.slice(0, 20)} from=${data.from_addr}:${data.from_port}`);
|
||||
});
|
||||
|
||||
onProxyEvent('leg_added', (data) => {
|
||||
log(
|
||||
`[leg] added: call=${data.call_id} leg=${data.leg_id} kind=${data.kind} state=${data.state}${legMediaDetails(data)}`,
|
||||
);
|
||||
statusStore.noteLegAdded(data);
|
||||
});
|
||||
|
||||
onProxyEvent('leg_removed', (data) => {
|
||||
log(`[leg] removed: call=${data.call_id} leg=${data.leg_id}`);
|
||||
statusStore.noteLegRemoved(data);
|
||||
});
|
||||
|
||||
onProxyEvent('leg_state_changed', (data) => {
|
||||
log(
|
||||
`[leg] state: call=${data.call_id} leg=${data.leg_id} -> ${data.state}${legMediaDetails(data)}`,
|
||||
);
|
||||
statusStore.noteLegStateChanged(data);
|
||||
});
|
||||
|
||||
onProxyEvent('webrtc_ice_candidate', (data) => {
|
||||
broadcast('webrtc-ice', {
|
||||
sessionId: data.session_id,
|
||||
candidate: {
|
||||
candidate: data.candidate,
|
||||
sdpMid: data.sdp_mid,
|
||||
sdpMLineIndex: data.sdp_mline_index,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
onProxyEvent('webrtc_state', (data) => {
|
||||
log(`[webrtc] session=${data.session_id?.slice(0, 8)} state=${data.state}`);
|
||||
});
|
||||
|
||||
onProxyEvent('webrtc_track', (data) => {
|
||||
log(`[webrtc] session=${data.session_id?.slice(0, 8)} track=${data.kind} codec=${data.codec}`);
|
||||
});
|
||||
|
||||
onProxyEvent('webrtc_audio_rx', (data) => {
|
||||
if (data.packet_count === 1 || data.packet_count === 50) {
|
||||
log(`[webrtc] session=${data.session_id?.slice(0, 8)} browser audio rx #${data.packet_count}`);
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('voicemail_started', (data) => {
|
||||
log(`[voicemail] started for call ${data.call_id} box=${data.voicebox_id || 'default'} caller=${data.caller_number}`);
|
||||
});
|
||||
|
||||
onProxyEvent('recording_done', (data) => {
|
||||
const boxId = data.voicebox_id || 'default';
|
||||
log(`[voicemail] recording done: ${data.file_path} (${data.duration_ms}ms) box=${boxId} caller=${data.caller_number}`);
|
||||
void voiceboxManager.addMessage(boxId, {
|
||||
callerNumber: data.caller_number || 'Unknown',
|
||||
callerName: null,
|
||||
fileName: data.file_path,
|
||||
durationMs: data.duration_ms,
|
||||
}).catch((error) => log(`[voicemail] persist failed: ${error instanceof Error ? error.message : String(error)}`));
|
||||
});
|
||||
|
||||
onProxyEvent('voicemail_error', (data) => {
|
||||
log(`[voicemail] error: ${data.error} call=${data.call_id}`);
|
||||
});
|
||||
|
||||
onProxyEvent('fax_started', (data) => {
|
||||
void faxJobManager.noteStarted(data).catch((error) => log(`[fax] persist start failed: ${error instanceof Error ? error.message : String(error)}`));
|
||||
log(`[fax] started: call=${data.call_id} leg=${data.leg_id} ${data.direction}/${data.transport} codec=${data.codec || '?'} file=${data.file_path}`);
|
||||
});
|
||||
|
||||
onProxyEvent('fax_completed', (data) => {
|
||||
void faxJobManager.noteCompleted(data).catch((error) => log(`[fax] persist completion failed: ${error instanceof Error ? error.message : String(error)}`));
|
||||
log(
|
||||
`[fax] completed: call=${data.call_id} leg=${data.leg_id} success=${data.success} pagesTx=${data.stats.pages_tx} bitrate=${data.stats.bit_rate} completion=${data.completion_label || data.completion_code || 'unknown'}`,
|
||||
);
|
||||
if (data.direction === 'inbound' && data.success && data.fax_box_id) {
|
||||
void faxBoxManager.addMessage(data.fax_box_id, {
|
||||
callerNumber: data.caller_number,
|
||||
fileName: data.file_path,
|
||||
completionCode: data.completion_code,
|
||||
completionLabel: data.completion_label,
|
||||
pageCount: data.stats.pages_rx || data.stats.pages_tx,
|
||||
bitRate: data.stats.bit_rate,
|
||||
}).catch((error) => log(`[fax] persist inbox failed: ${error instanceof Error ? error.message : String(error)}`));
|
||||
}
|
||||
if (data.direction === 'outbound' || data.fax_box_id) {
|
||||
void hangupCall(data.call_id);
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('fax_failed', (data) => {
|
||||
void faxJobManager.noteFailed(data).catch((error) => log(`[fax] persist failure failed: ${error instanceof Error ? error.message : String(error)}`));
|
||||
log(`[fax] failed: call=${data.call_id} leg=${data.leg_id} error=${data.error}`);
|
||||
if (data.direction === 'outbound' || data.fax_box_id) {
|
||||
void hangupCall(data.call_id);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,328 @@
|
||||
import type { IAppConfig } from '../config.ts';
|
||||
import type {
|
||||
ICallAnsweredEvent,
|
||||
ICallEndedEvent,
|
||||
ICallRingingEvent,
|
||||
IDeviceRegisteredEvent,
|
||||
IIncomingCallEvent,
|
||||
ILegAddedEvent,
|
||||
ILegRemovedEvent,
|
||||
ILegStateChangedEvent,
|
||||
IOutboundCallEvent,
|
||||
IOutboundCallStartedEvent,
|
||||
IProviderRegisteredEvent,
|
||||
} from '../shared/proxy-events.ts';
|
||||
import type {
|
||||
IActiveCall,
|
||||
ICallHistoryEntry,
|
||||
IDeviceStatus,
|
||||
IProviderStatus,
|
||||
IStatusSnapshot,
|
||||
TLegType,
|
||||
} from '../shared/status.ts';
|
||||
|
||||
const MAX_HISTORY = 100;
|
||||
const CODEC_NAMES: Record<number, string> = {
|
||||
0: 'PCMU',
|
||||
8: 'PCMA',
|
||||
9: 'G.722',
|
||||
111: 'Opus',
|
||||
};
|
||||
|
||||
export class StatusStore {
|
||||
private appConfig: IAppConfig;
|
||||
private providerStatuses = new Map<string, IProviderStatus>();
|
||||
private deviceStatuses = new Map<string, IDeviceStatus>();
|
||||
private activeCalls = new Map<string, IActiveCall>();
|
||||
private callHistory: ICallHistoryEntry[] = [];
|
||||
|
||||
constructor(appConfig: IAppConfig) {
|
||||
this.appConfig = appConfig;
|
||||
this.rebuildConfigState();
|
||||
}
|
||||
|
||||
updateConfig(appConfig: IAppConfig): void {
|
||||
this.appConfig = appConfig;
|
||||
this.rebuildConfigState();
|
||||
}
|
||||
|
||||
buildStatusSnapshot(
|
||||
instanceId: string,
|
||||
startTime: number,
|
||||
browserDeviceIds: string[],
|
||||
voicemailCounts: Record<string, number>,
|
||||
): IStatusSnapshot {
|
||||
const devices = [...this.deviceStatuses.values()];
|
||||
for (const deviceId of browserDeviceIds) {
|
||||
devices.push({
|
||||
id: deviceId,
|
||||
displayName: 'Browser',
|
||||
address: null,
|
||||
port: 0,
|
||||
aor: null,
|
||||
connected: true,
|
||||
isBrowser: true,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
instanceId,
|
||||
uptime: Math.floor((Date.now() - startTime) / 1000),
|
||||
lanIp: this.appConfig.proxy.lanIp,
|
||||
providers: [...this.providerStatuses.values()],
|
||||
devices,
|
||||
calls: [...this.activeCalls.values()].map((call) => ({
|
||||
...call,
|
||||
duration: Math.floor((Date.now() - call.startedAt) / 1000),
|
||||
legs: [...call.legs.values()].map((leg) => ({
|
||||
...leg,
|
||||
pktSent: 0,
|
||||
pktReceived: 0,
|
||||
transcoding: false,
|
||||
})),
|
||||
})),
|
||||
callHistory: this.callHistory,
|
||||
contacts: this.appConfig.contacts || [],
|
||||
voicemailCounts,
|
||||
};
|
||||
}
|
||||
|
||||
noteDashboardCallStarted(callId: string, number: string, providerId?: string): void {
|
||||
const call = this.getOrCreateCall(callId, 'outbound');
|
||||
call.direction = 'outbound';
|
||||
call.callerNumber = null;
|
||||
call.calleeNumber = number;
|
||||
call.providerUsed = providerId || null;
|
||||
call.state = 'setting-up';
|
||||
}
|
||||
|
||||
noteProviderRegistered(data: IProviderRegisteredEvent): { wasRegistered: boolean } | null {
|
||||
const provider = this.providerStatuses.get(data.provider_id);
|
||||
if (!provider) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const wasRegistered = provider.registered;
|
||||
provider.registered = data.registered;
|
||||
provider.publicIp = data.public_ip;
|
||||
return { wasRegistered };
|
||||
}
|
||||
|
||||
noteDeviceRegistered(data: IDeviceRegisteredEvent): boolean {
|
||||
const device = this.deviceStatuses.get(data.device_id);
|
||||
if (!device) {
|
||||
return false;
|
||||
}
|
||||
|
||||
device.address = data.address;
|
||||
device.port = data.port;
|
||||
device.aor = data.aor;
|
||||
device.connected = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
noteIncomingCall(data: IIncomingCallEvent): void {
|
||||
const call = this.getOrCreateCall(data.call_id, 'inbound');
|
||||
call.direction = 'inbound';
|
||||
call.callerNumber = data.from_uri;
|
||||
call.calleeNumber = data.to_number;
|
||||
call.providerUsed = data.provider_id;
|
||||
if (call.state === 'setting-up') {
|
||||
call.state = 'ringing';
|
||||
}
|
||||
}
|
||||
|
||||
noteOutboundDeviceCall(data: IOutboundCallEvent): void {
|
||||
const call = this.getOrCreateCall(data.call_id, 'outbound');
|
||||
call.direction = 'outbound';
|
||||
call.callerNumber = data.from_device;
|
||||
call.calleeNumber = data.to_number;
|
||||
call.providerUsed = null;
|
||||
}
|
||||
|
||||
noteOutboundCallStarted(data: IOutboundCallStartedEvent): void {
|
||||
const call = this.getOrCreateCall(data.call_id, 'outbound');
|
||||
call.direction = 'outbound';
|
||||
call.callerNumber = call.callerNumber ?? null;
|
||||
call.calleeNumber = data.number;
|
||||
call.providerUsed = data.provider_id;
|
||||
}
|
||||
|
||||
noteCallRinging(data: ICallRingingEvent): void {
|
||||
const call = this.getOrCreateCall(data.call_id);
|
||||
call.state = 'ringing';
|
||||
}
|
||||
|
||||
noteCallAnswered(data: ICallAnsweredEvent): boolean {
|
||||
const call = this.getOrCreateCall(data.call_id);
|
||||
|
||||
call.state = 'connected';
|
||||
|
||||
if (data.provider_media_addr && data.provider_media_port) {
|
||||
for (const leg of call.legs.values()) {
|
||||
if (leg.type !== 'sip-provider') {
|
||||
continue;
|
||||
}
|
||||
|
||||
leg.remoteMedia = `${data.provider_media_addr}:${data.provider_media_port}`;
|
||||
if (data.media_protocol) {
|
||||
leg.mediaProtocol = data.media_protocol;
|
||||
}
|
||||
if (data.media_protocol === 't38-udptl') {
|
||||
leg.codec = 'T.38';
|
||||
} else if (data.sip_pt !== undefined) {
|
||||
leg.codec = CODEC_NAMES[data.sip_pt] || `PT${data.sip_pt}`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
noteCallEnded(data: ICallEndedEvent): boolean {
|
||||
const call = this.activeCalls.get(data.call_id);
|
||||
if (!call) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.callHistory.unshift({
|
||||
id: call.id,
|
||||
direction: call.direction,
|
||||
callerNumber: call.callerNumber,
|
||||
calleeNumber: call.calleeNumber,
|
||||
providerUsed: call.providerUsed,
|
||||
startedAt: call.startedAt,
|
||||
duration: data.duration,
|
||||
legs: [...call.legs.values()].map((leg) => ({
|
||||
id: leg.id,
|
||||
type: leg.type,
|
||||
state: leg.state,
|
||||
codec: leg.codec,
|
||||
rtpPort: leg.rtpPort,
|
||||
mediaProtocol: leg.mediaProtocol,
|
||||
remoteMedia: leg.remoteMedia,
|
||||
metadata: leg.metadata || {},
|
||||
})),
|
||||
});
|
||||
|
||||
if (this.callHistory.length > MAX_HISTORY) {
|
||||
this.callHistory.pop();
|
||||
}
|
||||
|
||||
this.activeCalls.delete(data.call_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
noteLegAdded(data: ILegAddedEvent): void {
|
||||
const call = this.getOrCreateCall(data.call_id);
|
||||
|
||||
call.legs.set(data.leg_id, {
|
||||
id: data.leg_id,
|
||||
type: data.kind,
|
||||
state: data.state,
|
||||
codec: data.codec ?? null,
|
||||
rtpPort: data.rtpPort ?? null,
|
||||
mediaProtocol: data.mediaProtocol ?? null,
|
||||
remoteMedia: data.remoteMedia ?? null,
|
||||
metadata: data.metadata || {},
|
||||
});
|
||||
}
|
||||
|
||||
noteLegRemoved(data: ILegRemovedEvent): void {
|
||||
this.activeCalls.get(data.call_id)?.legs.delete(data.leg_id);
|
||||
}
|
||||
|
||||
noteLegStateChanged(data: ILegStateChangedEvent): void {
|
||||
const call = this.getOrCreateCall(data.call_id);
|
||||
|
||||
const existingLeg = call.legs.get(data.leg_id);
|
||||
if (existingLeg) {
|
||||
existingLeg.state = data.state;
|
||||
if (data.codec !== undefined) {
|
||||
existingLeg.codec = data.codec;
|
||||
}
|
||||
if (data.rtpPort !== undefined) {
|
||||
existingLeg.rtpPort = data.rtpPort;
|
||||
}
|
||||
if (data.mediaProtocol !== undefined) {
|
||||
existingLeg.mediaProtocol = data.mediaProtocol;
|
||||
}
|
||||
if (data.remoteMedia !== undefined) {
|
||||
existingLeg.remoteMedia = data.remoteMedia;
|
||||
}
|
||||
if (data.metadata) {
|
||||
existingLeg.metadata = data.metadata;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
call.legs.set(data.leg_id, {
|
||||
id: data.leg_id,
|
||||
type: this.inferLegType(data.leg_id),
|
||||
state: data.state,
|
||||
codec: data.codec ?? null,
|
||||
rtpPort: data.rtpPort ?? null,
|
||||
mediaProtocol: data.mediaProtocol ?? null,
|
||||
remoteMedia: data.remoteMedia ?? null,
|
||||
metadata: data.metadata || {},
|
||||
});
|
||||
}
|
||||
|
||||
private rebuildConfigState(): void {
|
||||
const nextProviderStatuses = new Map<string, IProviderStatus>();
|
||||
for (const provider of this.appConfig.providers) {
|
||||
const previous = this.providerStatuses.get(provider.id);
|
||||
nextProviderStatuses.set(provider.id, {
|
||||
id: provider.id,
|
||||
displayName: provider.displayName,
|
||||
registered: previous?.registered ?? false,
|
||||
publicIp: previous?.publicIp ?? null,
|
||||
});
|
||||
}
|
||||
this.providerStatuses = nextProviderStatuses;
|
||||
|
||||
const nextDeviceStatuses = new Map<string, IDeviceStatus>();
|
||||
for (const device of this.appConfig.devices) {
|
||||
const previous = this.deviceStatuses.get(device.id);
|
||||
nextDeviceStatuses.set(device.id, {
|
||||
id: device.id,
|
||||
displayName: device.displayName,
|
||||
address: previous?.address ?? null,
|
||||
port: previous?.port ?? 0,
|
||||
aor: previous?.aor ?? null,
|
||||
connected: previous?.connected ?? false,
|
||||
isBrowser: false,
|
||||
});
|
||||
}
|
||||
this.deviceStatuses = nextDeviceStatuses;
|
||||
}
|
||||
|
||||
private inferLegType(legId: string): TLegType {
|
||||
if (legId.includes('-prov')) {
|
||||
return 'sip-provider';
|
||||
}
|
||||
if (legId.includes('-dev')) {
|
||||
return 'sip-device';
|
||||
}
|
||||
return 'webrtc';
|
||||
}
|
||||
|
||||
private getOrCreateCall(callId: string, direction: 'inbound' | 'outbound' = 'inbound'): IActiveCall {
|
||||
let call = this.activeCalls.get(callId);
|
||||
if (!call) {
|
||||
call = {
|
||||
id: callId,
|
||||
direction,
|
||||
callerNumber: null,
|
||||
calleeNumber: null,
|
||||
providerUsed: null,
|
||||
state: 'setting-up',
|
||||
startedAt: Date.now(),
|
||||
legs: new Map(),
|
||||
};
|
||||
this.activeCalls.set(callId, call);
|
||||
}
|
||||
return call;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
export interface IProviderMediaInfo {
|
||||
addr: string;
|
||||
port: number;
|
||||
sipPt: number;
|
||||
}
|
||||
|
||||
export interface IWebRtcLinkTarget {
|
||||
sessionId: string;
|
||||
media: IProviderMediaInfo;
|
||||
}
|
||||
|
||||
export class WebRtcLinkManager {
|
||||
private sessionToCall = new Map<string, string>();
|
||||
private callToSession = new Map<string, string>();
|
||||
private pendingCallMedia = new Map<string, IProviderMediaInfo>();
|
||||
|
||||
acceptCall(callId: string, sessionId: string): IProviderMediaInfo | null {
|
||||
const previousCallId = this.sessionToCall.get(sessionId);
|
||||
if (previousCallId && previousCallId !== callId) {
|
||||
this.callToSession.delete(previousCallId);
|
||||
}
|
||||
|
||||
const previousSessionId = this.callToSession.get(callId);
|
||||
if (previousSessionId && previousSessionId !== sessionId) {
|
||||
this.sessionToCall.delete(previousSessionId);
|
||||
}
|
||||
|
||||
this.sessionToCall.set(sessionId, callId);
|
||||
this.callToSession.set(callId, sessionId);
|
||||
|
||||
const pendingMedia = this.pendingCallMedia.get(callId) ?? null;
|
||||
if (pendingMedia) {
|
||||
this.pendingCallMedia.delete(callId);
|
||||
}
|
||||
return pendingMedia;
|
||||
}
|
||||
|
||||
noteCallAnswered(callId: string, media: IProviderMediaInfo): IWebRtcLinkTarget | null {
|
||||
const sessionId = this.callToSession.get(callId);
|
||||
if (!sessionId) {
|
||||
this.pendingCallMedia.set(callId, media);
|
||||
return null;
|
||||
}
|
||||
|
||||
return { sessionId, media };
|
||||
}
|
||||
|
||||
removeSession(sessionId: string): string | null {
|
||||
const callId = this.sessionToCall.get(sessionId) ?? null;
|
||||
this.sessionToCall.delete(sessionId);
|
||||
if (callId) {
|
||||
this.callToSession.delete(callId);
|
||||
}
|
||||
return callId;
|
||||
}
|
||||
|
||||
cleanupCall(callId: string): string | null {
|
||||
const sessionId = this.callToSession.get(callId) ?? null;
|
||||
this.callToSession.delete(callId);
|
||||
this.pendingCallMedia.delete(callId);
|
||||
if (sessionId) {
|
||||
this.sessionToCall.delete(sessionId);
|
||||
}
|
||||
return sessionId;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,208 @@
|
||||
import type { TLegType } from './status.ts';
|
||||
|
||||
export interface IIncomingCallEvent {
|
||||
call_id: string;
|
||||
from_uri: string;
|
||||
to_number: string;
|
||||
provider_id: string;
|
||||
ring_browsers?: boolean;
|
||||
}
|
||||
|
||||
export interface IOutboundCallEvent {
|
||||
call_id: string;
|
||||
from_device: string | null;
|
||||
to_number: string;
|
||||
}
|
||||
|
||||
export interface IOutboundCallStartedEvent {
|
||||
call_id: string;
|
||||
number: string;
|
||||
provider_id: string;
|
||||
ring_browsers?: boolean;
|
||||
}
|
||||
|
||||
export interface ICallRingingEvent {
|
||||
call_id: string;
|
||||
}
|
||||
|
||||
export interface ICallAnsweredEvent {
|
||||
call_id: string;
|
||||
provider_media_addr?: string;
|
||||
provider_media_port?: number;
|
||||
media_protocol?: string;
|
||||
sip_pt?: number;
|
||||
}
|
||||
|
||||
export interface ICallEndedEvent {
|
||||
call_id: string;
|
||||
reason: string;
|
||||
duration: number;
|
||||
from_side?: string;
|
||||
}
|
||||
|
||||
export interface IProviderRegisteredEvent {
|
||||
provider_id: string;
|
||||
registered: boolean;
|
||||
public_ip: string | null;
|
||||
}
|
||||
|
||||
export interface IDeviceRegisteredEvent {
|
||||
device_id: string;
|
||||
display_name: string;
|
||||
address: string;
|
||||
port: number;
|
||||
aor: string;
|
||||
expires: number;
|
||||
}
|
||||
|
||||
export interface ISipUnhandledEvent {
|
||||
method_or_status: string;
|
||||
call_id?: string;
|
||||
from_addr: string;
|
||||
from_port: number;
|
||||
}
|
||||
|
||||
export interface ILegAddedEvent {
|
||||
call_id: string;
|
||||
leg_id: string;
|
||||
kind: TLegType;
|
||||
state: string;
|
||||
codec?: string | null;
|
||||
rtpPort?: number | null;
|
||||
mediaProtocol?: string | null;
|
||||
remoteMedia?: string | null;
|
||||
metadata?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface ILegRemovedEvent {
|
||||
call_id: string;
|
||||
leg_id: string;
|
||||
}
|
||||
|
||||
export interface ILegStateChangedEvent {
|
||||
call_id: string;
|
||||
leg_id: string;
|
||||
state: string;
|
||||
codec?: string | null;
|
||||
rtpPort?: number | null;
|
||||
mediaProtocol?: string | null;
|
||||
remoteMedia?: string | null;
|
||||
metadata?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface IWebRtcIceCandidateEvent {
|
||||
session_id: string;
|
||||
candidate: string;
|
||||
sdp_mid?: string;
|
||||
sdp_mline_index?: number;
|
||||
}
|
||||
|
||||
export interface IWebRtcStateEvent {
|
||||
session_id?: string;
|
||||
state: string;
|
||||
}
|
||||
|
||||
export interface IWebRtcTrackEvent {
|
||||
session_id?: string;
|
||||
kind: string;
|
||||
codec: string;
|
||||
}
|
||||
|
||||
export interface IWebRtcAudioRxEvent {
|
||||
session_id?: string;
|
||||
packet_count: number;
|
||||
}
|
||||
|
||||
export interface IVoicemailStartedEvent {
|
||||
call_id: string;
|
||||
voicebox_id?: string;
|
||||
caller_number?: string;
|
||||
}
|
||||
|
||||
export interface IRecordingDoneEvent {
|
||||
call_id?: string;
|
||||
voicebox_id?: string;
|
||||
file_path: string;
|
||||
duration_ms: number;
|
||||
caller_number?: string;
|
||||
}
|
||||
|
||||
export interface IVoicemailErrorEvent {
|
||||
call_id: string;
|
||||
error: string;
|
||||
}
|
||||
|
||||
export interface IFaxStartedEvent {
|
||||
call_id: string;
|
||||
leg_id: string;
|
||||
direction: 'outbound' | 'inbound';
|
||||
transport: 'audio' | 't38';
|
||||
file_path: string;
|
||||
fax_box_id?: string;
|
||||
caller_number?: string;
|
||||
codec?: string;
|
||||
remote_media?: string;
|
||||
}
|
||||
|
||||
export interface IFaxCompletedEvent {
|
||||
call_id: string;
|
||||
leg_id: string;
|
||||
direction: 'outbound' | 'inbound';
|
||||
transport: 'audio' | 't38';
|
||||
file_path: string;
|
||||
fax_box_id?: string;
|
||||
caller_number?: string;
|
||||
codec?: string;
|
||||
success: boolean;
|
||||
completion_code?: number | null;
|
||||
completion_label?: string | null;
|
||||
stats: {
|
||||
bit_rate: number;
|
||||
error_correcting_mode: boolean;
|
||||
pages_tx: number;
|
||||
pages_rx: number;
|
||||
image_size: number;
|
||||
bad_rows: number;
|
||||
longest_bad_row_run: number;
|
||||
ecm_retries: number;
|
||||
current_status: number;
|
||||
rtp_events: number;
|
||||
rtn_events: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface IFaxFailedEvent {
|
||||
call_id: string;
|
||||
leg_id: string;
|
||||
direction: 'outbound' | 'inbound';
|
||||
transport: 'audio' | 't38';
|
||||
file_path: string;
|
||||
fax_box_id?: string;
|
||||
caller_number?: string;
|
||||
error: string;
|
||||
}
|
||||
|
||||
export type TProxyEventMap = {
|
||||
provider_registered: IProviderRegisteredEvent;
|
||||
device_registered: IDeviceRegisteredEvent;
|
||||
incoming_call: IIncomingCallEvent;
|
||||
outbound_device_call: IOutboundCallEvent;
|
||||
outbound_call_started: IOutboundCallStartedEvent;
|
||||
call_ringing: ICallRingingEvent;
|
||||
call_answered: ICallAnsweredEvent;
|
||||
call_ended: ICallEndedEvent;
|
||||
sip_unhandled: ISipUnhandledEvent;
|
||||
leg_added: ILegAddedEvent;
|
||||
leg_removed: ILegRemovedEvent;
|
||||
leg_state_changed: ILegStateChangedEvent;
|
||||
webrtc_ice_candidate: IWebRtcIceCandidateEvent;
|
||||
webrtc_state: IWebRtcStateEvent;
|
||||
webrtc_track: IWebRtcTrackEvent;
|
||||
webrtc_audio_rx: IWebRtcAudioRxEvent;
|
||||
voicemail_started: IVoicemailStartedEvent;
|
||||
recording_done: IRecordingDoneEvent;
|
||||
voicemail_error: IVoicemailErrorEvent;
|
||||
fax_started: IFaxStartedEvent;
|
||||
fax_completed: IFaxCompletedEvent;
|
||||
fax_failed: IFaxFailedEvent;
|
||||
};
|
||||
@@ -0,0 +1,94 @@
|
||||
import type { IContact } from '../config.ts';
|
||||
|
||||
export type TLegType = 'sip-device' | 'sip-provider' | 'webrtc' | 'tool';
|
||||
export type TCallDirection = 'inbound' | 'outbound';
|
||||
|
||||
export interface IProviderStatus {
|
||||
id: string;
|
||||
displayName: string;
|
||||
registered: boolean;
|
||||
publicIp: string | null;
|
||||
}
|
||||
|
||||
export interface IDeviceStatus {
|
||||
id: string;
|
||||
displayName: string;
|
||||
address: string | null;
|
||||
port: number;
|
||||
aor: string | null;
|
||||
connected: boolean;
|
||||
isBrowser: boolean;
|
||||
}
|
||||
|
||||
export interface IActiveLeg {
|
||||
id: string;
|
||||
type: TLegType;
|
||||
state: string;
|
||||
codec: string | null;
|
||||
rtpPort: number | null;
|
||||
mediaProtocol: string | null;
|
||||
remoteMedia: string | null;
|
||||
metadata: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface IActiveCall {
|
||||
id: string;
|
||||
direction: TCallDirection;
|
||||
callerNumber: string | null;
|
||||
calleeNumber: string | null;
|
||||
providerUsed: string | null;
|
||||
state: string;
|
||||
startedAt: number;
|
||||
legs: Map<string, IActiveLeg>;
|
||||
}
|
||||
|
||||
export interface IHistoryLeg {
|
||||
id: string;
|
||||
type: TLegType;
|
||||
state: string;
|
||||
codec: string | null;
|
||||
rtpPort: number | null;
|
||||
remoteMedia: string | null;
|
||||
metadata: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface ICallHistoryEntry {
|
||||
id: string;
|
||||
direction: TCallDirection;
|
||||
callerNumber: string | null;
|
||||
calleeNumber: string | null;
|
||||
providerUsed: string | null;
|
||||
startedAt: number;
|
||||
duration: number;
|
||||
legs: IHistoryLeg[];
|
||||
}
|
||||
|
||||
export interface ILegStatus extends IActiveLeg {
|
||||
pktSent: number;
|
||||
pktReceived: number;
|
||||
transcoding: boolean;
|
||||
}
|
||||
|
||||
export interface ICallStatus {
|
||||
id: string;
|
||||
direction: TCallDirection;
|
||||
callerNumber: string | null;
|
||||
calleeNumber: string | null;
|
||||
providerUsed: string | null;
|
||||
state: string;
|
||||
startedAt: number;
|
||||
duration: number;
|
||||
legs: ILegStatus[];
|
||||
}
|
||||
|
||||
export interface IStatusSnapshot {
|
||||
instanceId: string;
|
||||
uptime: number;
|
||||
lanIp: string;
|
||||
providers: IProviderStatus[];
|
||||
devices: IDeviceStatus[];
|
||||
calls: ICallStatus[];
|
||||
callHistory: ICallHistoryEntry[];
|
||||
contacts: IContact[];
|
||||
voicemailCounts: Record<string, number>;
|
||||
}
|
||||
+192
-507
@@ -1,37 +1,22 @@
|
||||
/**
|
||||
* SIP proxy — entry point.
|
||||
* SIP proxy bootstrap.
|
||||
*
|
||||
* Spawns the Rust proxy-engine which handles ALL SIP protocol mechanics.
|
||||
* TypeScript is the control plane:
|
||||
* - Loads config and pushes it to Rust
|
||||
* - Receives high-level events (incoming calls, registration, etc.)
|
||||
* - Drives the web dashboard
|
||||
* - Manages IVR, voicemail, announcements
|
||||
* - Handles WebRTC browser signaling (forwarded to Rust in Phase 2)
|
||||
*
|
||||
* No raw SIP ever touches TypeScript.
|
||||
* Spawns the Rust proxy-engine, wires runtime state/event handling,
|
||||
* and starts the web dashboard plus browser signaling layer.
|
||||
*/
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
import { loadConfig } from './config.ts';
|
||||
import type { IAppConfig } from './config.ts';
|
||||
import { applyConfigUpdates, type IAppConfig } from './config.ts';
|
||||
import { FaxBoxManager } from './faxbox.ts';
|
||||
import { FaxJobManager } from './faxjobs.ts';
|
||||
import { broadcastWs, initWebUi } from './frontend.ts';
|
||||
import {
|
||||
initWebRtcSignaling,
|
||||
sendToBrowserDevice,
|
||||
getAllBrowserDeviceIds,
|
||||
getBrowserDeviceWs,
|
||||
} from './webrtcbridge.ts';
|
||||
import { initCodecBridge } from './opusbridge.ts';
|
||||
import { initAnnouncement } from './announcement.ts';
|
||||
import { PromptCache } from './call/prompt-cache.ts';
|
||||
import { initWebRtcSignaling, getAllBrowserDeviceIds, sendToBrowserDevice } from './webrtcbridge.ts';
|
||||
import { VoiceboxManager } from './voicebox.ts';
|
||||
import {
|
||||
initProxyEngine,
|
||||
configureProxyEngine,
|
||||
onProxyEvent,
|
||||
hangupCall,
|
||||
makeCall,
|
||||
shutdownProxyEngine,
|
||||
@@ -40,535 +25,235 @@ import {
|
||||
webrtcLink,
|
||||
webrtcClose,
|
||||
} from './proxybridge.ts';
|
||||
import type {
|
||||
IIncomingCallEvent,
|
||||
IOutboundCallEvent,
|
||||
ICallEndedEvent,
|
||||
IProviderRegisteredEvent,
|
||||
IDeviceRegisteredEvent,
|
||||
} from './proxybridge.ts';
|
||||
import { registerProxyEventHandlers } from './runtime/proxy-events.ts';
|
||||
import { StatusStore } from './runtime/status-store.ts';
|
||||
import { SiprouterStorage } from './storage.ts';
|
||||
import { WebRtcLinkManager, type IProviderMediaInfo } from './runtime/webrtc-linking.ts';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Config
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let appConfig: IAppConfig = loadConfig();
|
||||
let appConfig: IAppConfig;
|
||||
|
||||
const LOG_PATH = path.join(process.cwd(), 'sip_trace.log');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Logging
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const startTime = Date.now();
|
||||
const instanceId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
|
||||
const storage = new SiprouterStorage(log);
|
||||
let statusStore: StatusStore;
|
||||
let webRtcLinks: WebRtcLinkManager;
|
||||
let faxBoxManager: FaxBoxManager;
|
||||
let faxJobManager: FaxJobManager;
|
||||
let voiceboxManager: VoiceboxManager;
|
||||
|
||||
function now(): string {
|
||||
return new Date().toISOString().replace('T', ' ').slice(0, 19);
|
||||
}
|
||||
|
||||
function log(msg: string): void {
|
||||
const line = `${now()} ${msg}\n`;
|
||||
function log(message: string): void {
|
||||
const line = `${now()} ${message}\n`;
|
||||
fs.appendFileSync(LOG_PATH, line);
|
||||
process.stdout.write(line);
|
||||
broadcastWs('log', { message: msg });
|
||||
broadcastWs('log', { message });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shadow state — maintained from Rust events for the dashboard
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface IProviderStatus {
|
||||
id: string;
|
||||
displayName: string;
|
||||
registered: boolean;
|
||||
publicIp: string | null;
|
||||
function errorMessage(error: unknown): string {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
|
||||
interface IDeviceStatus {
|
||||
id: string;
|
||||
displayName: string;
|
||||
address: string | null;
|
||||
port: number;
|
||||
connected: boolean;
|
||||
isBrowser: boolean;
|
||||
}
|
||||
|
||||
interface IActiveCall {
|
||||
id: string;
|
||||
direction: string;
|
||||
callerNumber: string | null;
|
||||
calleeNumber: string | null;
|
||||
providerUsed: string | null;
|
||||
state: string;
|
||||
startedAt: number;
|
||||
}
|
||||
|
||||
interface ICallHistoryEntry {
|
||||
id: string;
|
||||
direction: string;
|
||||
callerNumber: string | null;
|
||||
calleeNumber: string | null;
|
||||
startedAt: number;
|
||||
duration: number;
|
||||
}
|
||||
|
||||
const providerStatuses = new Map<string, IProviderStatus>();
|
||||
const deviceStatuses = new Map<string, IDeviceStatus>();
|
||||
const activeCalls = new Map<string, IActiveCall>();
|
||||
const callHistory: ICallHistoryEntry[] = [];
|
||||
const MAX_HISTORY = 100;
|
||||
|
||||
// WebRTC session ↔ call linking state.
|
||||
// Both pieces (session accept + call media info) can arrive in any order.
|
||||
const webrtcSessionToCall = new Map<string, string>(); // sessionId → callId
|
||||
const webrtcCallToSession = new Map<string, string>(); // callId → sessionId
|
||||
const pendingCallMedia = new Map<string, { addr: string; port: number; sipPt: number }>(); // callId → provider media info
|
||||
|
||||
// Initialize provider statuses from config (all start as unregistered).
|
||||
for (const p of appConfig.providers) {
|
||||
providerStatuses.set(p.id, {
|
||||
id: p.id,
|
||||
displayName: p.displayName,
|
||||
registered: false,
|
||||
publicIp: null,
|
||||
});
|
||||
}
|
||||
|
||||
// Initialize device statuses from config.
|
||||
for (const d of appConfig.devices) {
|
||||
deviceStatuses.set(d.id, {
|
||||
id: d.id,
|
||||
displayName: d.displayName,
|
||||
address: null,
|
||||
port: 0,
|
||||
connected: false,
|
||||
isBrowser: false,
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Initialize subsystems
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const promptCache = new PromptCache(log);
|
||||
const voiceboxManager = new VoiceboxManager(log);
|
||||
voiceboxManager.init(appConfig.voiceboxes ?? []);
|
||||
|
||||
// WebRTC signaling (browser device registration).
|
||||
initWebRtcSignaling({ log });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Status snapshot (fed to web dashboard)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function getStatus() {
|
||||
// Merge SIP devices (from Rust) + browser devices (from TS WebSocket).
|
||||
const devices = [...deviceStatuses.values()];
|
||||
for (const bid of getAllBrowserDeviceIds()) {
|
||||
devices.push({
|
||||
id: bid,
|
||||
displayName: 'Browser',
|
||||
address: null,
|
||||
port: 0,
|
||||
connected: true,
|
||||
isBrowser: true,
|
||||
});
|
||||
}
|
||||
|
||||
function buildProxyConfig(config: IAppConfig): Record<string, unknown> {
|
||||
return {
|
||||
instanceId,
|
||||
uptime: Math.floor((Date.now() - startTime) / 1000),
|
||||
lanIp: appConfig.proxy.lanIp,
|
||||
providers: [...providerStatuses.values()],
|
||||
devices,
|
||||
calls: [...activeCalls.values()].map((c) => ({
|
||||
...c,
|
||||
duration: Math.floor((Date.now() - c.startedAt) / 1000),
|
||||
legs: [],
|
||||
})),
|
||||
callHistory,
|
||||
contacts: appConfig.contacts || [],
|
||||
voicemailCounts: voiceboxManager.getAllUnheardCounts(),
|
||||
proxy: config.proxy,
|
||||
providers: config.providers,
|
||||
devices: config.devices,
|
||||
routing: config.routing,
|
||||
faxboxes: config.faxboxes ?? [],
|
||||
voiceboxes: config.voiceboxes ?? [],
|
||||
ivr: config.ivr,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Start Rust proxy engine
|
||||
// ---------------------------------------------------------------------------
|
||||
function getStatus() {
|
||||
return statusStore.buildStatusSnapshot(
|
||||
instanceId,
|
||||
startTime,
|
||||
getAllBrowserDeviceIds(),
|
||||
voiceboxManager.getAllUnheardCounts(),
|
||||
);
|
||||
}
|
||||
|
||||
function requestWebRtcLink(callId: string, sessionId: string, media: IProviderMediaInfo): void {
|
||||
log(`[webrtc] linking session=${sessionId.slice(0, 8)} to call=${callId} media=${media.addr}:${media.port} pt=${media.sipPt}`);
|
||||
void webrtcLink(sessionId, callId, media.addr, media.port, media.sipPt).then((ok) => {
|
||||
log(`[webrtc] link result: ${ok}`);
|
||||
});
|
||||
}
|
||||
|
||||
async function configureRuntime(config: IAppConfig): Promise<boolean> {
|
||||
return configureProxyEngine(buildProxyConfig(config));
|
||||
}
|
||||
|
||||
async function reloadConfig(): Promise<void> {
|
||||
try {
|
||||
const previousConfig = appConfig;
|
||||
const nextConfig = await storage.getAppConfig();
|
||||
|
||||
appConfig = nextConfig;
|
||||
statusStore.updateConfig(nextConfig);
|
||||
await faxBoxManager.init(nextConfig.faxboxes ?? []);
|
||||
await voiceboxManager.init(nextConfig.voiceboxes ?? []);
|
||||
|
||||
if (nextConfig.proxy.lanPort !== previousConfig.proxy.lanPort) {
|
||||
log('[config] proxy.lanPort changed; restart required for SIP socket rebinding');
|
||||
}
|
||||
if (nextConfig.proxy.webUiPort !== previousConfig.proxy.webUiPort) {
|
||||
log('[config] proxy.webUiPort changed; restart required for web UI rebinding');
|
||||
}
|
||||
|
||||
const configured = await configureRuntime(nextConfig);
|
||||
if (configured) {
|
||||
log('[config] reloaded - proxy engine reconfigured');
|
||||
} else {
|
||||
log('[config] reload failed - proxy engine rejected config');
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
log(`[config] reload failed: ${errorMessage(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function updateConfig(updatesArg: any): Promise<IAppConfig> {
|
||||
const nextConfig = applyConfigUpdates(appConfig, updatesArg);
|
||||
await storage.writeAppConfig(nextConfig);
|
||||
await reloadConfig();
|
||||
return appConfig;
|
||||
}
|
||||
|
||||
async function startProxyEngine(): Promise<void> {
|
||||
const ok = await initProxyEngine(log);
|
||||
if (!ok) {
|
||||
const started = await initProxyEngine(log);
|
||||
if (!started) {
|
||||
log('[FATAL] failed to start proxy engine');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Subscribe to events from Rust BEFORE sending configure.
|
||||
onProxyEvent('provider_registered', (data: IProviderRegisteredEvent) => {
|
||||
const ps = providerStatuses.get(data.provider_id);
|
||||
if (ps) {
|
||||
const wasRegistered = ps.registered;
|
||||
ps.registered = data.registered;
|
||||
ps.publicIp = data.public_ip;
|
||||
if (data.registered && !wasRegistered) {
|
||||
log(`[provider:${data.provider_id}] registered (publicIp=${data.public_ip})`);
|
||||
} else if (!data.registered && wasRegistered) {
|
||||
log(`[provider:${data.provider_id}] registration lost`);
|
||||
}
|
||||
broadcastWs('registration', { providerId: data.provider_id, registered: data.registered });
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('device_registered', (data: IDeviceRegisteredEvent) => {
|
||||
const ds = deviceStatuses.get(data.device_id);
|
||||
if (ds) {
|
||||
ds.address = data.address;
|
||||
ds.port = data.port;
|
||||
ds.connected = true;
|
||||
log(`[registrar] ${data.display_name} registered from ${data.address}:${data.port}`);
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('incoming_call', (data: IIncomingCallEvent) => {
|
||||
log(`[call] incoming: ${data.from_uri} → ${data.to_number} via ${data.provider_id} (${data.call_id})`);
|
||||
activeCalls.set(data.call_id, {
|
||||
id: data.call_id,
|
||||
direction: 'inbound',
|
||||
callerNumber: data.from_uri,
|
||||
calleeNumber: data.to_number,
|
||||
providerUsed: data.provider_id,
|
||||
state: 'ringing',
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
|
||||
// Notify browsers of incoming call.
|
||||
const browserIds = getAllBrowserDeviceIds();
|
||||
for (const bid of browserIds) {
|
||||
sendToBrowserDevice(bid, {
|
||||
type: 'webrtc-incoming',
|
||||
callId: data.call_id,
|
||||
from: data.from_uri,
|
||||
deviceId: bid,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('outbound_device_call', (data: IOutboundCallEvent) => {
|
||||
log(`[call] outbound: device ${data.from_device} → ${data.to_number} (${data.call_id})`);
|
||||
activeCalls.set(data.call_id, {
|
||||
id: data.call_id,
|
||||
direction: 'outbound',
|
||||
callerNumber: data.from_device,
|
||||
calleeNumber: data.to_number,
|
||||
providerUsed: null,
|
||||
state: 'setting-up',
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
});
|
||||
|
||||
onProxyEvent('outbound_call_started', (data: any) => {
|
||||
log(`[call] outbound started: ${data.call_id} → ${data.number} via ${data.provider_id}`);
|
||||
activeCalls.set(data.call_id, {
|
||||
id: data.call_id,
|
||||
direction: 'outbound',
|
||||
callerNumber: null,
|
||||
calleeNumber: data.number,
|
||||
providerUsed: data.provider_id,
|
||||
state: 'setting-up',
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
|
||||
// Notify all browser devices — they can connect via WebRTC to listen/talk.
|
||||
const browserIds = getAllBrowserDeviceIds();
|
||||
for (const bid of browserIds) {
|
||||
sendToBrowserDevice(bid, {
|
||||
type: 'webrtc-incoming',
|
||||
callId: data.call_id,
|
||||
from: data.number,
|
||||
deviceId: bid,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('call_ringing', (data: { call_id: string }) => {
|
||||
const call = activeCalls.get(data.call_id);
|
||||
if (call) call.state = 'ringing';
|
||||
});
|
||||
|
||||
onProxyEvent('call_answered', (data: { call_id: string; provider_media_addr?: string; provider_media_port?: number; sip_pt?: number }) => {
|
||||
const call = activeCalls.get(data.call_id);
|
||||
if (call) {
|
||||
call.state = 'connected';
|
||||
log(`[call] ${data.call_id} connected`);
|
||||
}
|
||||
|
||||
// Try to link WebRTC session to this call for audio bridging.
|
||||
if (data.provider_media_addr && data.provider_media_port) {
|
||||
const sessionId = webrtcCallToSession.get(data.call_id);
|
||||
if (sessionId) {
|
||||
// Both session and media info available — link now.
|
||||
const sipPt = data.sip_pt ?? 9;
|
||||
log(`[webrtc] linking session=${sessionId.slice(0, 8)} to call=${data.call_id} media=${data.provider_media_addr}:${data.provider_media_port} pt=${sipPt}`);
|
||||
webrtcLink(sessionId, data.call_id, data.provider_media_addr, data.provider_media_port, sipPt).then((ok) => {
|
||||
log(`[webrtc] link result: ${ok}`);
|
||||
});
|
||||
} else {
|
||||
// Session not yet accepted — store media info for when it arrives.
|
||||
pendingCallMedia.set(data.call_id, {
|
||||
addr: data.provider_media_addr,
|
||||
port: data.provider_media_port,
|
||||
sipPt: data.sip_pt ?? 9,
|
||||
});
|
||||
log(`[webrtc] media info cached for call=${data.call_id}, waiting for session accept`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('call_ended', (data: ICallEndedEvent) => {
|
||||
const call = activeCalls.get(data.call_id);
|
||||
if (call) {
|
||||
log(`[call] ${data.call_id} ended: ${data.reason} (${data.duration}s)`);
|
||||
// Move to history.
|
||||
callHistory.unshift({
|
||||
id: call.id,
|
||||
direction: call.direction,
|
||||
callerNumber: call.callerNumber,
|
||||
calleeNumber: call.calleeNumber,
|
||||
startedAt: call.startedAt,
|
||||
duration: data.duration,
|
||||
});
|
||||
if (callHistory.length > MAX_HISTORY) callHistory.pop();
|
||||
activeCalls.delete(data.call_id);
|
||||
|
||||
// Notify browser(s) that the call ended.
|
||||
broadcastWs('webrtc-call-ended', { callId: data.call_id });
|
||||
|
||||
// Clean up WebRTC session mappings.
|
||||
const sessionId = webrtcCallToSession.get(data.call_id);
|
||||
if (sessionId) {
|
||||
webrtcCallToSession.delete(data.call_id);
|
||||
webrtcSessionToCall.delete(sessionId);
|
||||
webrtcClose(sessionId).catch(() => {});
|
||||
}
|
||||
pendingCallMedia.delete(data.call_id);
|
||||
}
|
||||
});
|
||||
|
||||
onProxyEvent('sip_unhandled', (data: any) => {
|
||||
log(`[sip] unhandled ${data.method_or_status} Call-ID=${data.call_id?.slice(0, 20)} from=${data.from_addr}:${data.from_port}`);
|
||||
});
|
||||
|
||||
// WebRTC events from Rust — forward ICE candidates to browser via WebSocket.
|
||||
onProxyEvent('webrtc_ice_candidate', (data: any) => {
|
||||
// Find the browser's WebSocket by session ID and send the ICE candidate.
|
||||
broadcastWs('webrtc-ice', {
|
||||
sessionId: data.session_id,
|
||||
candidate: { candidate: data.candidate, sdpMid: data.sdp_mid, sdpMLineIndex: data.sdp_mline_index },
|
||||
});
|
||||
});
|
||||
|
||||
onProxyEvent('webrtc_state', (data: any) => {
|
||||
log(`[webrtc] session=${data.session_id?.slice(0, 8)} state=${data.state}`);
|
||||
});
|
||||
|
||||
onProxyEvent('webrtc_track', (data: any) => {
|
||||
log(`[webrtc] session=${data.session_id?.slice(0, 8)} track=${data.kind} codec=${data.codec}`);
|
||||
});
|
||||
|
||||
onProxyEvent('webrtc_audio_rx', (data: any) => {
|
||||
if (data.packet_count === 1 || data.packet_count === 50) {
|
||||
log(`[webrtc] session=${data.session_id?.slice(0, 8)} browser audio rx #${data.packet_count}`);
|
||||
}
|
||||
});
|
||||
|
||||
// Voicemail events.
|
||||
onProxyEvent('voicemail_started', (data: any) => {
|
||||
log(`[voicemail] started for call ${data.call_id} caller=${data.caller_number}`);
|
||||
});
|
||||
|
||||
onProxyEvent('recording_done', (data: any) => {
|
||||
log(`[voicemail] recording done: ${data.file_path} (${data.duration_ms}ms) caller=${data.caller_number}`);
|
||||
// Save voicemail metadata via VoiceboxManager.
|
||||
voiceboxManager.addMessage?.('default', {
|
||||
callerNumber: data.caller_number || 'Unknown',
|
||||
callerName: null,
|
||||
fileName: data.file_path,
|
||||
durationMs: data.duration_ms,
|
||||
});
|
||||
});
|
||||
|
||||
onProxyEvent('voicemail_error', (data: any) => {
|
||||
log(`[voicemail] error: ${data.error} call=${data.call_id}`);
|
||||
});
|
||||
|
||||
// Send full config to Rust — this binds the SIP socket and starts registrations.
|
||||
const configured = await configureProxyEngine({
|
||||
proxy: appConfig.proxy,
|
||||
providers: appConfig.providers,
|
||||
devices: appConfig.devices,
|
||||
routing: appConfig.routing,
|
||||
registerProxyEventHandlers({
|
||||
log,
|
||||
statusStore,
|
||||
faxBoxManager,
|
||||
faxJobManager,
|
||||
voiceboxManager,
|
||||
webRtcLinks,
|
||||
getBrowserDeviceIds: getAllBrowserDeviceIds,
|
||||
sendToBrowserDevice,
|
||||
broadcast: broadcastWs,
|
||||
onLinkWebRtcSession: requestWebRtcLink,
|
||||
onCloseWebRtcSession: (sessionId) => {
|
||||
void webrtcClose(sessionId);
|
||||
},
|
||||
});
|
||||
|
||||
const configured = await configureRuntime(appConfig);
|
||||
if (!configured) {
|
||||
log('[FATAL] failed to configure proxy engine');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const providerList = appConfig.providers.map((p) => p.displayName).join(', ');
|
||||
const deviceList = appConfig.devices.map((d) => d.displayName).join(', ');
|
||||
const providerList = appConfig.providers.map((provider) => provider.displayName).join(', ');
|
||||
const deviceList = appConfig.devices.map((device) => device.displayName).join(', ');
|
||||
log(`proxy engine started | LAN ${appConfig.proxy.lanIp}:${appConfig.proxy.lanPort} | providers: ${providerList} | devices: ${deviceList}`);
|
||||
|
||||
// Initialize audio codec bridge (still needed for WebRTC transcoding).
|
||||
try {
|
||||
await initCodecBridge(log);
|
||||
await initAnnouncement(log);
|
||||
|
||||
// Pre-generate prompts.
|
||||
await promptCache.generateBeep('voicemail-beep', 1000, 500, 8000);
|
||||
for (const vb of appConfig.voiceboxes ?? []) {
|
||||
if (!vb.enabled) continue;
|
||||
const promptId = `voicemail-greeting-${vb.id}`;
|
||||
if (vb.greetingWavPath) {
|
||||
await promptCache.loadWavPrompt(promptId, vb.greetingWavPath);
|
||||
} else {
|
||||
const text = vb.greetingText || 'The person you are trying to reach is not available. Please leave a message after the tone.';
|
||||
await promptCache.generatePrompt(promptId, text, vb.greetingVoice || 'af_bella');
|
||||
}
|
||||
}
|
||||
if (appConfig.ivr?.enabled) {
|
||||
for (const menu of appConfig.ivr.menus) {
|
||||
await promptCache.generatePrompt(`ivr-menu-${menu.id}`, menu.promptText, menu.promptVoice || 'af_bella');
|
||||
}
|
||||
}
|
||||
log(`[startup] prompts cached: ${promptCache.listIds().join(', ') || 'none'}`);
|
||||
} catch (e) {
|
||||
log(`[codec] init failed: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Web UI
|
||||
// ---------------------------------------------------------------------------
|
||||
async function main(): Promise<void> {
|
||||
await storage.init();
|
||||
appConfig = await storage.getAppConfig();
|
||||
|
||||
initWebUi(
|
||||
getStatus,
|
||||
log,
|
||||
(number, deviceId, providerId) => {
|
||||
// Outbound calls from dashboard — send make_call command to Rust.
|
||||
log(`[dashboard] start call: ${number} device=${deviceId || 'any'} provider=${providerId || 'auto'}`);
|
||||
// Fire-and-forget — the async result comes via events.
|
||||
makeCall(number, deviceId, providerId).then((callId) => {
|
||||
if (callId) {
|
||||
log(`[dashboard] call started: ${callId}`);
|
||||
activeCalls.set(callId, {
|
||||
id: callId,
|
||||
direction: 'outbound',
|
||||
callerNumber: null,
|
||||
calleeNumber: number,
|
||||
providerUsed: providerId || null,
|
||||
state: 'setting-up',
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
} else {
|
||||
statusStore = new StatusStore(appConfig);
|
||||
webRtcLinks = new WebRtcLinkManager();
|
||||
faxBoxManager = new FaxBoxManager(log, storage);
|
||||
faxJobManager = new FaxJobManager(log, storage);
|
||||
voiceboxManager = new VoiceboxManager(log, storage);
|
||||
|
||||
await faxBoxManager.init(appConfig.faxboxes ?? []);
|
||||
await faxJobManager.init();
|
||||
await voiceboxManager.init(appConfig.voiceboxes ?? []);
|
||||
initWebRtcSignaling({ log });
|
||||
|
||||
initWebUi({
|
||||
port: appConfig.proxy.webUiPort,
|
||||
getStatus,
|
||||
getConfig: () => appConfig,
|
||||
updateConfig,
|
||||
log,
|
||||
onStartCall: async (number, deviceId, providerId) => {
|
||||
log(`[dashboard] start call: ${number} device=${deviceId || 'any'} provider=${providerId || 'auto'}`);
|
||||
const callId = await makeCall(number, deviceId, providerId);
|
||||
if (!callId) {
|
||||
log(`[dashboard] call failed for ${number}`);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
// Return a temporary ID so the frontend doesn't show "failed" immediately.
|
||||
return { id: `pending-${Date.now()}` };
|
||||
},
|
||||
(callId) => {
|
||||
hangupCall(callId);
|
||||
return true;
|
||||
},
|
||||
() => {
|
||||
// Config saved — reconfigure Rust engine.
|
||||
try {
|
||||
const fresh = loadConfig();
|
||||
Object.assign(appConfig, fresh);
|
||||
|
||||
// Update shadow state.
|
||||
for (const p of fresh.providers) {
|
||||
if (!providerStatuses.has(p.id)) {
|
||||
providerStatuses.set(p.id, {
|
||||
id: p.id, displayName: p.displayName, registered: false, publicIp: null,
|
||||
});
|
||||
}
|
||||
}
|
||||
for (const d of fresh.devices) {
|
||||
if (!deviceStatuses.has(d.id)) {
|
||||
deviceStatuses.set(d.id, {
|
||||
id: d.id, displayName: d.displayName, address: null, port: 0, connected: false, isBrowser: false,
|
||||
});
|
||||
}
|
||||
log(`[dashboard] call started: ${callId}`);
|
||||
statusStore.noteDashboardCallStarted(callId, number, providerId);
|
||||
return { id: callId };
|
||||
},
|
||||
onHangupCall: (callId) => {
|
||||
void hangupCall(callId);
|
||||
return true;
|
||||
},
|
||||
faxBoxManager,
|
||||
faxJobManager,
|
||||
voiceboxManager,
|
||||
onWebRtcOffer: async (sessionId, sdp, ws) => {
|
||||
log(`[webrtc] offer from browser session=${sessionId.slice(0, 8)} sdp_type=${typeof sdp} sdp_len=${sdp?.length || 0}`);
|
||||
if (!sdp || typeof sdp !== 'string' || sdp.length < 10) {
|
||||
log(`[webrtc] WARNING: invalid SDP (type=${typeof sdp}), skipping offer`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Re-send config to Rust.
|
||||
configureProxyEngine({
|
||||
proxy: fresh.proxy,
|
||||
providers: fresh.providers,
|
||||
devices: fresh.devices,
|
||||
routing: fresh.routing,
|
||||
}).then((ok) => {
|
||||
if (ok) log('[config] reloaded — proxy engine reconfigured');
|
||||
else log('[config] reload failed — proxy engine rejected config');
|
||||
});
|
||||
} catch (e: any) {
|
||||
log(`[config] reload failed: ${e.message}`);
|
||||
}
|
||||
},
|
||||
undefined, // callManager — legacy, replaced by Rust proxy-engine
|
||||
voiceboxManager, // voiceboxManager
|
||||
// WebRTC signaling → forwarded to Rust proxy-engine.
|
||||
async (sessionId, sdp, ws) => {
|
||||
log(`[webrtc] offer from browser session=${sessionId.slice(0, 8)} sdp_type=${typeof sdp} sdp_len=${sdp?.length || 0}`);
|
||||
if (!sdp || typeof sdp !== 'string' || sdp.length < 10) {
|
||||
log(`[webrtc] WARNING: invalid SDP (type=${typeof sdp}), skipping offer`);
|
||||
return;
|
||||
}
|
||||
log(`[webrtc] sending offer to Rust (${sdp.length}b)...`);
|
||||
const result = await webrtcOffer(sessionId, sdp);
|
||||
log(`[webrtc] Rust result: ${JSON.stringify(result)?.slice(0, 200)}`);
|
||||
if (result?.sdp) {
|
||||
ws.send(JSON.stringify({ type: 'webrtc-answer', sessionId, sdp: result.sdp }));
|
||||
log(`[webrtc] answer sent to browser session=${sessionId.slice(0, 8)}`);
|
||||
} else {
|
||||
log(`[webrtc] ERROR: no answer SDP from Rust`);
|
||||
}
|
||||
},
|
||||
async (sessionId, candidate) => {
|
||||
await webrtcIce(sessionId, candidate);
|
||||
},
|
||||
async (sessionId) => {
|
||||
await webrtcClose(sessionId);
|
||||
},
|
||||
// onWebRtcAccept — browser has accepted a call, linking session to call.
|
||||
(callId: string, sessionId: string) => {
|
||||
log(`[webrtc] accept: callId=${callId} sessionId=${sessionId.slice(0, 8)}`);
|
||||
log(`[webrtc] sending offer to Rust (${sdp.length}b)...`);
|
||||
const result = await webrtcOffer(sessionId, sdp);
|
||||
log(`[webrtc] Rust result: ${JSON.stringify(result)?.slice(0, 200)}`);
|
||||
if (result?.sdp) {
|
||||
ws.send(JSON.stringify({ type: 'webrtc-answer', sessionId, sdp: result.sdp }));
|
||||
log(`[webrtc] answer sent to browser session=${sessionId.slice(0, 8)}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Store bidirectional mapping.
|
||||
webrtcSessionToCall.set(sessionId, callId);
|
||||
webrtcCallToSession.set(callId, sessionId);
|
||||
log('[webrtc] ERROR: no answer SDP from Rust');
|
||||
},
|
||||
onWebRtcIce: async (sessionId, candidate) => {
|
||||
await webrtcIce(sessionId, candidate as Parameters<typeof webrtcIce>[1]);
|
||||
},
|
||||
onWebRtcClose: async (sessionId) => {
|
||||
webRtcLinks.removeSession(sessionId);
|
||||
await webrtcClose(sessionId);
|
||||
},
|
||||
onWebRtcAccept: (callId, sessionId) => {
|
||||
log(`[webrtc] accept: callId=${callId} sessionId=${sessionId.slice(0, 8)}`);
|
||||
|
||||
const pendingMedia = webRtcLinks.acceptCall(callId, sessionId);
|
||||
if (pendingMedia) {
|
||||
requestWebRtcLink(callId, sessionId, pendingMedia);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if we already have media info for this call (provider answered first).
|
||||
const media = pendingCallMedia.get(callId);
|
||||
if (media) {
|
||||
pendingCallMedia.delete(callId);
|
||||
log(`[webrtc] linking session=${sessionId.slice(0, 8)} to call=${callId} media=${media.addr}:${media.port} pt=${media.sipPt}`);
|
||||
webrtcLink(sessionId, callId, media.addr, media.port, media.sipPt).then((ok) => {
|
||||
log(`[webrtc] link result: ${ok}`);
|
||||
});
|
||||
} else {
|
||||
log(`[webrtc] session ${sessionId.slice(0, 8)} accepted, waiting for call_answered media info`);
|
||||
}
|
||||
},
|
||||
);
|
||||
},
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Start
|
||||
// ---------------------------------------------------------------------------
|
||||
await startProxyEngine();
|
||||
}
|
||||
|
||||
startProxyEngine();
|
||||
void main().catch((error) => {
|
||||
log(`[FATAL] ${errorMessage(error)}`);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
process.on('SIGINT', () => { log('SIGINT, exiting'); shutdownProxyEngine(); process.exit(0); });
|
||||
process.on('SIGTERM', () => { log('SIGTERM, exiting'); shutdownProxyEngine(); process.exit(0); });
|
||||
process.on('SIGINT', () => {
|
||||
log('SIGINT, exiting');
|
||||
shutdownProxyEngine();
|
||||
void storage.close();
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
process.on('SIGTERM', () => {
|
||||
log('SIGTERM, exiting');
|
||||
shutdownProxyEngine();
|
||||
void storage.close();
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
+250
@@ -0,0 +1,250 @@
|
||||
import fs from 'node:fs';
|
||||
import * as fsPromises from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
import * as plugins from './plugins.ts';
|
||||
import {
|
||||
createInitialConfigFromEnv,
|
||||
normalizeConfig,
|
||||
type IAppConfig,
|
||||
} from './config.ts';
|
||||
import type { IFaxMessage } from './faxbox.ts';
|
||||
import type { IFaxJob } from './faxjobs.ts';
|
||||
import type { IVoicemailMessage } from './voicebox.ts';
|
||||
|
||||
interface ISiprouterDataStore {
|
||||
appConfig: IAppConfig;
|
||||
faxJobs: IFaxJob[];
|
||||
faxMessagesByBox: Record<string, IFaxMessage[]>;
|
||||
voicemailMessagesByBox: Record<string, IVoicemailMessage[]>;
|
||||
}
|
||||
|
||||
type TLogFunction = (messageArg: string) => void;
|
||||
|
||||
const legacyConfigPath = path.join(process.cwd(), '.nogit', 'config.json');
|
||||
|
||||
function requiredEnv(keysArg: string[]): string {
|
||||
for (const key of keysArg) {
|
||||
const value = process.env[key];
|
||||
if (value) return value;
|
||||
}
|
||||
throw new Error(`Missing required environment variable: ${keysArg.join(' or ')}`);
|
||||
}
|
||||
|
||||
function optionalNumber(valueArg: string | undefined, fallbackArg?: number): number | undefined {
|
||||
if (!valueArg) return fallbackArg;
|
||||
const parsed = Number(valueArg);
|
||||
return Number.isFinite(parsed) ? parsed : fallbackArg;
|
||||
}
|
||||
|
||||
function optionalBoolean(valueArg: string | undefined, fallbackArg?: boolean): boolean | undefined {
|
||||
if (valueArg === undefined) return fallbackArg;
|
||||
return !['0', 'false', 'no', 'off'].includes(valueArg.toLowerCase());
|
||||
}
|
||||
|
||||
function normalizeObjectKey(keyArg: string): string {
|
||||
const normalizedKey = keyArg.replace(/\\/g, '/').replace(/^\/+/, '').replace(/\/+/g, '/');
|
||||
if (normalizedKey.split('/').includes('..')) {
|
||||
throw new Error(`Invalid object key: ${keyArg}`);
|
||||
}
|
||||
return normalizedKey;
|
||||
}
|
||||
|
||||
export class SiprouterStorage {
|
||||
private db!: InstanceType<typeof plugins.smartdata.SmartdataDb>;
|
||||
private store!: any;
|
||||
private bucket!: any;
|
||||
private readonly cacheDir = path.join(process.cwd(), '.nogit', 'cache');
|
||||
private readonly log: TLogFunction;
|
||||
|
||||
constructor(logArg: TLogFunction) {
|
||||
this.log = logArg;
|
||||
}
|
||||
|
||||
public async init(): Promise<void> {
|
||||
this.db = new plugins.smartdata.SmartdataDb(this.getMongoDescriptor() as any);
|
||||
await this.db.init();
|
||||
this.store = await this.db.createEasyStore('siprouter-data');
|
||||
|
||||
const smartBucket = new plugins.smartbucket.SmartBucket(this.getS3Descriptor() as any);
|
||||
const bucketName = requiredEnv(['SIPROUTER_S3_BUCKET', 'S3_BUCKET']);
|
||||
this.bucket = await smartBucket.bucketExists(bucketName)
|
||||
? await smartBucket.getBucketByName(bucketName)
|
||||
: await smartBucket.createBucket(bucketName);
|
||||
|
||||
await fsPromises.mkdir(this.cacheDir, { recursive: true });
|
||||
this.log('[storage] smartdata and smartbucket initialized');
|
||||
}
|
||||
|
||||
public async close(): Promise<void> {
|
||||
if (this.db) {
|
||||
await this.db.close();
|
||||
}
|
||||
}
|
||||
|
||||
public async getAppConfig(): Promise<IAppConfig> {
|
||||
const storedConfig = await this.readKey('appConfig');
|
||||
if (storedConfig) {
|
||||
return normalizeConfig(storedConfig);
|
||||
}
|
||||
|
||||
const legacyConfig = await this.readLegacyConfig();
|
||||
const initialConfig = legacyConfig || createInitialConfigFromEnv();
|
||||
await this.writeAppConfig(initialConfig);
|
||||
this.log(legacyConfig ? '[storage] imported legacy .nogit/config.json into smartdata' : '[storage] created initial smartdata config');
|
||||
return initialConfig;
|
||||
}
|
||||
|
||||
public async writeAppConfig(configArg: IAppConfig): Promise<void> {
|
||||
await this.writeKey('appConfig', normalizeConfig(configArg));
|
||||
}
|
||||
|
||||
public async getFaxJobs(): Promise<IFaxJob[]> {
|
||||
return (await this.readKey('faxJobs')) || [];
|
||||
}
|
||||
|
||||
public async writeFaxJobs(jobsArg: IFaxJob[]): Promise<void> {
|
||||
await this.writeKey('faxJobs', jobsArg);
|
||||
}
|
||||
|
||||
public async getVoicemailMessages(boxIdArg: string): Promise<IVoicemailMessage[]> {
|
||||
const allMessages = (await this.readKey('voicemailMessagesByBox')) || {};
|
||||
return allMessages[boxIdArg] || [];
|
||||
}
|
||||
|
||||
public async writeVoicemailMessages(boxIdArg: string, messagesArg: IVoicemailMessage[]): Promise<void> {
|
||||
const allMessages = (await this.readKey('voicemailMessagesByBox')) || {};
|
||||
allMessages[boxIdArg] = messagesArg;
|
||||
await this.writeKey('voicemailMessagesByBox', allMessages);
|
||||
}
|
||||
|
||||
public async getFaxMessages(boxIdArg: string): Promise<IFaxMessage[]> {
|
||||
const allMessages = (await this.readKey('faxMessagesByBox')) || {};
|
||||
return allMessages[boxIdArg] || [];
|
||||
}
|
||||
|
||||
public async writeFaxMessages(boxIdArg: string, messagesArg: IFaxMessage[]): Promise<void> {
|
||||
const allMessages = (await this.readKey('faxMessagesByBox')) || {};
|
||||
allMessages[boxIdArg] = messagesArg;
|
||||
await this.writeKey('faxMessagesByBox', allMessages);
|
||||
}
|
||||
|
||||
public async putFileObject(objectKeyArg: string, filePathArg: string): Promise<string> {
|
||||
const objectKey = normalizeObjectKey(objectKeyArg);
|
||||
const contents = await fsPromises.readFile(filePathArg);
|
||||
await this.bucket.fastPut({ path: objectKey, contents, overwrite: true });
|
||||
await this.removeCachedObject(objectKey);
|
||||
return objectKey;
|
||||
}
|
||||
|
||||
public async putBufferObject(objectKeyArg: string, bufferArg: Buffer): Promise<string> {
|
||||
const objectKey = normalizeObjectKey(objectKeyArg);
|
||||
await this.bucket.fastPut({ path: objectKey, contents: bufferArg, overwrite: true });
|
||||
await this.removeCachedObject(objectKey);
|
||||
return objectKey;
|
||||
}
|
||||
|
||||
public async getObjectAsCachedFile(objectKeyArg: string, fileNameArg?: string): Promise<string | null> {
|
||||
const objectKey = normalizeObjectKey(objectKeyArg);
|
||||
const cachePath = this.getCachePath(objectKey);
|
||||
try {
|
||||
if (fs.existsSync(cachePath)) {
|
||||
return cachePath;
|
||||
}
|
||||
const contents = await this.bucket.fastGet({ path: objectKey });
|
||||
await fsPromises.mkdir(path.dirname(cachePath), { recursive: true });
|
||||
await fsPromises.writeFile(cachePath, contents);
|
||||
return cachePath;
|
||||
} catch {
|
||||
if (fileNameArg) {
|
||||
const fallbackPath = path.join(this.cacheDir, path.basename(fileNameArg));
|
||||
return fs.existsSync(fallbackPath) ? fallbackPath : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public async removeObject(objectKeyArg: string | undefined): Promise<void> {
|
||||
if (!objectKeyArg) return;
|
||||
const objectKey = normalizeObjectKey(objectKeyArg);
|
||||
try {
|
||||
await this.bucket.fastRemove({ path: objectKey });
|
||||
} catch {
|
||||
// Missing objects are harmless during metadata cleanup.
|
||||
}
|
||||
await this.removeCachedObject(objectKey);
|
||||
}
|
||||
|
||||
private getCachePath(objectKeyArg: string): string {
|
||||
return path.join(this.cacheDir, normalizeObjectKey(objectKeyArg));
|
||||
}
|
||||
|
||||
private async removeCachedObject(objectKeyArg: string): Promise<void> {
|
||||
await fsPromises.rm(this.getCachePath(objectKeyArg), { force: true }).catch(() => {});
|
||||
}
|
||||
|
||||
private async readLegacyConfig(): Promise<IAppConfig | null> {
|
||||
try {
|
||||
const raw = await fsPromises.readFile(legacyConfigPath, 'utf8');
|
||||
return normalizeConfig(JSON.parse(raw) as IAppConfig);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async readKey<TKey extends keyof ISiprouterDataStore>(keyArg: TKey): Promise<ISiprouterDataStore[TKey] | undefined> {
|
||||
try {
|
||||
return await this.store.readKey(keyArg) as ISiprouterDataStore[TKey] | undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
private async writeKey<TKey extends keyof ISiprouterDataStore>(
|
||||
keyArg: TKey,
|
||||
valueArg: ISiprouterDataStore[TKey],
|
||||
): Promise<void> {
|
||||
await this.store.writeKey(keyArg, valueArg);
|
||||
}
|
||||
|
||||
private getMongoDescriptor(): Record<string, string> {
|
||||
const mongoDbUrl = requiredEnv([
|
||||
'SIPROUTER_MONGODB_URL',
|
||||
'MONGODB_URI',
|
||||
'MONGODB_URL',
|
||||
]);
|
||||
const descriptor: Record<string, string> = {
|
||||
mongoDbUrl,
|
||||
mongoDbName: process.env.SIPROUTER_MONGODB_NAME || process.env.MONGODB_DATABASE || process.env.MONGODB_NAME || 'siprouter',
|
||||
};
|
||||
|
||||
const mongoDbUser = process.env.SIPROUTER_MONGODB_USER || process.env.MONGODB_USERNAME || process.env.MONGODB_USER;
|
||||
const mongoDbPass = process.env.SIPROUTER_MONGODB_PASS || process.env.MONGODB_PASSWORD || process.env.MONGODB_PASS;
|
||||
if (mongoDbUser) descriptor.mongoDbUser = mongoDbUser;
|
||||
if (mongoDbPass) descriptor.mongoDbPass = mongoDbPass;
|
||||
return descriptor;
|
||||
}
|
||||
|
||||
private getS3Descriptor(): Record<string, string | number | boolean> {
|
||||
const rawEndpoint = requiredEnv(['SIPROUTER_S3_ENDPOINT', 'S3_ENDPOINT', 'AWS_ENDPOINT_URL']);
|
||||
let endpoint = rawEndpoint;
|
||||
let port = optionalNumber(process.env.SIPROUTER_S3_PORT || process.env.S3_PORT);
|
||||
let useSsl = optionalBoolean(process.env.SIPROUTER_S3_USESSL || process.env.S3_USESSL || process.env.S3_USE_SSL);
|
||||
|
||||
if (/^https?:\/\//.test(rawEndpoint)) {
|
||||
const url = new URL(rawEndpoint);
|
||||
endpoint = url.hostname;
|
||||
port = url.port ? Number(url.port) : port;
|
||||
useSsl = url.protocol === 'https:';
|
||||
}
|
||||
|
||||
return {
|
||||
endpoint,
|
||||
accessKey: requiredEnv(['SIPROUTER_S3_ACCESS_KEY', 'S3_ACCESS_KEY', 'AWS_ACCESS_KEY_ID']),
|
||||
accessSecret: requiredEnv(['SIPROUTER_S3_SECRET_KEY', 'S3_SECRET_KEY', 'AWS_SECRET_ACCESS_KEY']),
|
||||
region: process.env.SIPROUTER_S3_REGION || process.env.S3_REGION || process.env.AWS_REGION || 'us-east-1',
|
||||
...(port ? { port } : {}),
|
||||
...(useSsl !== undefined ? { useSsl } : {}),
|
||||
};
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user