fix(proxy-engine): improve inbound SIP routing diagnostics and enrich leg media state reporting

This commit is contained in:
2026-04-14 20:19:34 +00:00
parent 0d82a626b5
commit 88768f0586
46 changed files with 555689 additions and 107 deletions

View File

@@ -1,5 +1,14 @@
# Changelog
## 2026-04-14 - 1.25.2 - fix(proxy-engine)
improve inbound SIP routing diagnostics and enrich leg media state reporting
- Extract inbound called numbers from DID-related SIP headers when the request URI contains a provider account username.
- Emit detailed sip_unhandled diagnostics for inbound route misses, missing devices, and RTP allocation failures.
- Include codec, RTP port, remote media, and metadata in leg state change events and preserve those fields in runtime status/history views.
- Match hostname-based providers against resolved inbound source IPs to accept provider traffic sent from resolved addresses.
- Invalidate cached TTS WAV metadata across engine restarts and vendor the kokoro-tts crate via a local patch.
## 2026-04-14 - 1.25.1 - fix(proxy-engine)
respect explicit inbound route targets and store voicemail in the configured mailbox

2
rust/Cargo.lock generated
View File

@@ -1733,8 +1733,6 @@ dependencies = [
[[package]]
name = "kokoro-tts"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68e5d46e20a28fa5fd313d9ffcf4bbcf41570e64841d3944c832eef6b98d208b"
dependencies = [
"bincode 2.0.1",
"cc",

View File

@@ -9,3 +9,6 @@ resolver = "2"
[profile.release]
opt-level = 3
lto = true
[patch.crates-io]
kokoro-tts = { path = "vendor/kokoro-tts" }

View File

@@ -5,7 +5,7 @@
//! The mixer provides mix-minus audio to all participants.
use crate::call::{Call, CallDirection, CallState, LegId, LegInfo, LegKind, LegState};
use crate::config::{normalize_routing_identity, AppConfig, ProviderConfig};
use crate::config::{extract_inbound_called_number, normalize_routing_identity, AppConfig, ProviderConfig};
use crate::ipc::{emit_event, OutTx};
use crate::leg_io::{create_leg_channels, spawn_sip_inbound, spawn_sip_outbound};
use crate::mixer::spawn_mixer;
@@ -25,6 +25,32 @@ use std::sync::Arc;
use tokio::net::UdpSocket;
use tokio::sync::Mutex;
fn emit_inbound_diagnostic(
out_tx: &OutTx,
label: &str,
invite: &SipMessage,
from_addr: SocketAddr,
provider_id: &str,
called_number: &str,
caller_number: &str,
) {
emit_event(
out_tx,
"sip_unhandled",
serde_json::json!({
"method_or_status": format!(
"INVITE {label} provider={provider_id} called={called_number} caller={caller_number} ruri={} to={} pcalled={}",
invite.request_uri().unwrap_or(""),
invite.get_header("To").unwrap_or(""),
invite.get_header("P-Called-Party-ID").unwrap_or(""),
),
"call_id": invite.call_id(),
"from_addr": from_addr.ip().to_string(),
"from_port": from_addr.port(),
}),
);
}
/// Result of creating an inbound call — carries both the call id and
/// whether browsers should be notified (flows from the matched inbound
/// route's `ring_browsers` flag).
@@ -35,7 +61,17 @@ pub struct InboundCallCreated {
/// Emit a `leg_added` event with full leg information.
/// Free function (not a method) to avoid `&self` borrow conflicts when `self.calls` is borrowed.
fn emit_leg_added_event(tx: &OutTx, call_id: &str, leg: &LegInfo) {
fn codec_label(codec_pt: u8) -> String {
match codec_pt {
0 => "PCMU".to_string(),
8 => "PCMA".to_string(),
9 => "G.722".to_string(),
111 => "Opus".to_string(),
_ => format!("PT{codec_pt}"),
}
}
fn leg_metadata_json(leg: &LegInfo) -> serde_json::Value {
let metadata: serde_json::Value = if leg.metadata.is_empty() {
serde_json::json!({})
} else {
@@ -46,22 +82,35 @@ fn emit_leg_added_event(tx: &OutTx, call_id: &str, leg: &LegInfo) {
.collect(),
)
};
metadata
}
fn leg_event_payload(call_id: &str, leg: &LegInfo) -> serde_json::Value {
serde_json::json!({
"call_id": call_id,
"leg_id": leg.id,
"kind": leg.kind.as_str(),
"state": leg.state.as_str(),
"codec": codec_label(leg.codec_pt),
"rtpPort": leg.rtp_port,
"remoteMedia": leg.remote_media.map(|a| format!("{}:{}", a.ip(), a.port())),
"metadata": leg_metadata_json(leg),
})
}
fn emit_leg_added_event(tx: &OutTx, call_id: &str, leg: &LegInfo) {
emit_event(
tx,
"leg_added",
serde_json::json!({
"call_id": call_id,
"leg_id": leg.id,
"kind": leg.kind.as_str(),
"state": leg.state.as_str(),
"codec": sip_proto::helpers::codec_name(leg.codec_pt),
"rtpPort": leg.rtp_port,
"remoteMedia": leg.remote_media.map(|a| format!("{}:{}", a.ip(), a.port())),
"metadata": metadata,
}),
leg_event_payload(call_id, leg),
);
}
fn emit_leg_state_changed_event(tx: &OutTx, call_id: &str, leg: &LegInfo) {
emit_event(tx, "leg_state_changed", leg_event_payload(call_id, leg));
}
pub struct CallManager {
/// All active calls, keyed by internal call ID.
pub calls: HashMap<String, Call>,
@@ -232,11 +281,11 @@ impl CallManager {
"call_ringing",
serde_json::json!({ "call_id": call_id }),
);
emit_event(
&self.out_tx,
"leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": leg_id, "state": "ringing" }),
);
if let Some(call) = self.calls.get(call_id) {
if let Some(leg) = call.legs.get(leg_id) {
emit_leg_state_changed_event(&self.out_tx, call_id, leg);
}
}
}
SipLegAction::ConnectedWithAck(ack_buf) => {
let _ = socket.send_to(&ack_buf, target).await;
@@ -248,6 +297,7 @@ impl CallManager {
let sip_leg = leg.sip_leg.as_ref().unwrap();
let remote = sip_leg.remote_media;
leg.state = LegState::Connected;
leg.codec_pt = sip_pt;
leg.remote_media = remote;
call.state = CallState::Connected;
remote
@@ -298,8 +348,17 @@ impl CallManager {
dev_rtp_socket,
dev_remote,
dev_leg_id,
)) = device_leg_info
)) = device_leg_info
{
// Use the device's preferred codec from its INVITE SDP,
// not the provider's negotiated codec.
let dev_pt = device_invite
.has_sdp_body()
.then(|| parse_sdp_endpoint(&device_invite.body))
.flatten()
.and_then(|ep| ep.codec_pt)
.unwrap_or(sip_pt);
// Build SDP pointing device to our device_rtp port.
// Use LAN IP for the device (it's on the local network).
let call_ref = self.calls.get(call_id).unwrap();
@@ -336,23 +395,16 @@ impl CallManager {
if let Some(call) = self.calls.get_mut(call_id) {
if let Some(dev_leg) = call.legs.get_mut(&dev_leg_id) {
dev_leg.state = LegState::Connected;
dev_leg.codec_pt = dev_pt;
}
}
if let Some(call) = self.calls.get(call_id) {
if let Some(dev_leg) = call.legs.get(&dev_leg_id) {
emit_leg_state_changed_event(&self.out_tx, call_id, dev_leg);
}
}
emit_event(
&self.out_tx,
"leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": dev_leg_id, "state": "connected" }),
);
// Wire device leg to mixer.
// Use the device's preferred codec from its INVITE SDP,
// not the provider's negotiated codec.
let dev_pt = device_invite
.has_sdp_body()
.then(|| parse_sdp_endpoint(&device_invite.body))
.flatten()
.and_then(|ep| ep.codec_pt)
.unwrap_or(sip_pt);
if let Some(dev_remote_addr) = dev_remote {
let dev_channels = create_leg_channels();
spawn_sip_inbound(dev_rtp_socket.clone(), dev_channels.inbound_tx);
@@ -385,11 +437,11 @@ impl CallManager {
"sip_pt": sip_pt,
}),
);
emit_event(
&self.out_tx,
"leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": leg_id, "state": "connected" }),
);
if let Some(call) = self.calls.get(call_id) {
if let Some(leg) = call.legs.get(leg_id) {
emit_leg_state_changed_event(&self.out_tx, call_id, leg);
}
}
}
SipLegAction::Terminated(reason) => {
let duration = self
@@ -436,11 +488,11 @@ impl CallManager {
leg.state = LegState::Terminated;
}
}
emit_event(
&self.out_tx,
"leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": leg_id, "state": "terminated" }),
);
if let Some(call) = self.calls.get(call_id) {
if let Some(leg) = call.legs.get(leg_id) {
emit_leg_state_changed_event(&self.out_tx, call_id, leg);
}
}
emit_event(
&self.out_tx,
"call_ended",
@@ -684,11 +736,9 @@ impl CallManager {
if let Some(leg) = call.legs.get_mut(this_leg_id) {
leg.state = LegState::Ringing;
}
emit_event(
&self.out_tx,
"leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": this_leg_id, "state": "ringing" }),
);
if let Some(leg) = call.legs.get(this_leg_id) {
emit_leg_state_changed_event(&self.out_tx, call_id, leg);
}
} else if code >= 200 && code < 300 {
let mut needs_wiring = false;
if let Some(leg) = call.legs.get_mut(this_leg_id) {
@@ -708,11 +758,9 @@ impl CallManager {
needs_wiring = true;
}
emit_event(
&self.out_tx,
"leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": this_leg_id, "state": "connected" }),
);
if let Some(leg) = call.legs.get(this_leg_id) {
emit_leg_state_changed_event(&self.out_tx, call_id, leg);
}
if call.state != CallState::Connected {
call.state = CallState::Connected;
@@ -811,7 +859,7 @@ impl CallManager {
// Extract caller/callee info.
let from_header = invite.get_header("From").unwrap_or("");
let caller_number = normalize_routing_identity(from_header);
let called_number = normalize_routing_identity(invite.request_uri().unwrap_or(""));
let called_number = extract_inbound_called_number(invite);
// Resolve via the configured inbound routing table. The matched route
// is the source of truth for which external numbers this provider is
@@ -826,6 +874,15 @@ impl CallManager {
{
Some(route) => route,
None => {
emit_inbound_diagnostic(
&self.out_tx,
"route_miss",
invite,
from_addr,
provider_id,
&called_number,
&caller_number,
);
let resp = SipMessage::create_response(404, "Not Found", invite, None);
let _ = socket.send_to(&resp.serialize(), from_addr).await;
return None;
@@ -940,6 +997,15 @@ impl CallManager {
let provider_rtp = match rtp_pool.allocate().await {
Some(a) => a,
None => {
emit_inbound_diagnostic(
&self.out_tx,
"provider_rtp_unavailable",
invite,
from_addr,
provider_id,
&called_number,
&caller_number,
);
let resp = SipMessage::create_response(503, "Service Unavailable", invite, None);
let _ = socket.send_to(&resp.serialize(), from_addr).await;
return None;
@@ -948,6 +1014,15 @@ impl CallManager {
let device_rtp = match rtp_pool.allocate().await {
Some(a) => a,
None => {
emit_inbound_diagnostic(
&self.out_tx,
"device_rtp_unavailable",
invite,
from_addr,
provider_id,
&called_number,
&caller_number,
);
let resp = SipMessage::create_response(503, "Service Unavailable", invite, None);
let _ = socket.send_to(&resp.serialize(), from_addr).await;
return None;
@@ -1707,11 +1782,7 @@ impl CallManager {
}
}
leg.state = LegState::Terminated;
emit_event(
&self.out_tx,
"leg_state_changed",
serde_json::json!({ "call_id": call_id, "leg_id": leg.id, "state": "terminated" }),
);
emit_leg_state_changed_event(&self.out_tx, call_id, leg);
}
emit_event(
@@ -1760,6 +1831,16 @@ impl CallManager {
let rtp_alloc = match rtp_pool.allocate().await {
Some(a) => a,
None => {
let called_number = extract_inbound_called_number(invite);
emit_inbound_diagnostic(
&self.out_tx,
"voicemail_rtp_unavailable",
invite,
from_addr,
provider_id,
&called_number,
caller_number,
);
let resp = SipMessage::create_response(503, "Service Unavailable", invite, None);
let _ = socket.send_to(&resp.serialize(), from_addr).await;
return None;
@@ -1901,6 +1982,16 @@ impl CallManager {
let rtp_alloc = match rtp_pool.allocate().await {
Some(a) => a,
None => {
let called_number = extract_inbound_called_number(invite);
emit_inbound_diagnostic(
&self.out_tx,
"ivr_rtp_unavailable",
invite,
from_addr,
provider_id,
&called_number,
caller_number,
);
let resp = SipMessage::create_response(503, "Service Unavailable", invite, None);
let _ = socket.send_to(&resp.serialize(), from_addr).await;
return None;

View File

@@ -273,6 +273,38 @@ pub fn normalize_routing_identity(value: &str) -> String {
digits
}
fn looks_like_phone_identity(value: &str) -> bool {
let digits = value.chars().filter(|c| c.is_ascii_digit()).count();
digits >= 6 && value.chars().all(|c| c.is_ascii_digit() || c == '+')
}
/// Pick the best inbound called-number identity from common SIP headers.
///
/// Some providers deliver the DID in `To` / `P-Called-Party-ID` while the
/// request URI contains an account username. Prefer a phone-like identity when
/// present; otherwise fall back to the request URI user part.
pub fn extract_inbound_called_number(msg: &SipMessage) -> String {
let request_uri = normalize_routing_identity(msg.request_uri().unwrap_or(""));
if looks_like_phone_identity(&request_uri) {
return request_uri;
}
for header_name in [
"P-Called-Party-ID",
"X-Called-Party-ID",
"Diversion",
"History-Info",
"To",
] {
let candidate = normalize_routing_identity(msg.get_header(header_name).unwrap_or(""));
if looks_like_phone_identity(&candidate) {
return candidate;
}
}
request_uri
}
fn parse_numeric_range_value(value: &str) -> Option<(bool, &str)> {
let trimmed = value.trim();
if trimmed.is_empty() {
@@ -636,6 +668,20 @@ mod tests {
assert!(!support.ring_browsers);
}
#[test]
fn extract_inbound_called_number_prefers_did_headers_over_username_ruri() {
let raw = b"INVITE sip:2830573e1@proxy.example SIP/2.0\r\nTo: <sip:+4942116767548@proxy.example>\r\nFrom: <sip:+491701234567@provider.example>;tag=abc\r\nCall-ID: test-1\r\nCSeq: 1 INVITE\r\nContent-Length: 0\r\n\r\n";
let msg = SipMessage::parse(raw).expect("invite should parse");
assert_eq!(extract_inbound_called_number(&msg), "+4942116767548");
}
#[test]
fn extract_inbound_called_number_keeps_phone_ruri_when_already_present() {
let raw = b"INVITE sip:042116767548@proxy.example SIP/2.0\r\nTo: <sip:2830573e1@proxy.example>\r\nFrom: <sip:+491701234567@provider.example>;tag=abc\r\nCall-ID: test-2\r\nCSeq: 1 INVITE\r\nContent-Length: 0\r\n\r\n";
let msg = SipMessage::parse(raw).expect("invite should parse");
assert_eq!(extract_inbound_called_number(&msg), "042116767548");
}
#[test]
fn matches_pattern_supports_numeric_ranges() {
assert!(matches_pattern(

View File

@@ -25,7 +25,7 @@ mod voicemail;
mod webrtc_engine;
use crate::call_manager::CallManager;
use crate::config::{normalize_routing_identity, AppConfig};
use crate::config::{extract_inbound_called_number, normalize_routing_identity, AppConfig};
use crate::ipc::{emit_event, respond_err, respond_ok, Command, OutTx};
use crate::provider::ProviderManager;
use crate::registrar::Registrar;
@@ -346,7 +346,7 @@ async fn handle_sip_packet(
// Emit event so TypeScript knows about the call (for dashboard, IVR routing, etc).
let from_header = msg.get_header("From").unwrap_or("");
let from_uri = normalize_routing_identity(from_header);
let called_number = normalize_routing_identity(msg.request_uri().unwrap_or(""));
let called_number = extract_inbound_called_number(&msg);
emit_event(
&eng.out_tx,
@@ -369,6 +369,20 @@ async fn handle_sip_packet(
let dialed_number = normalize_routing_identity(msg.request_uri().unwrap_or(""));
let device = eng.registrar.find_by_address(&from_addr);
if device.is_none() {
emit_event(
&eng.out_tx,
"sip_unhandled",
serde_json::json!({
"method_or_status": "INVITE",
"call_id": msg.call_id(),
"from_addr": from_addr.ip().to_string(),
"from_port": from_addr.port(),
"is_from_provider": false,
}),
);
return;
}
let device_id = device.map(|d| d.device_id.clone());
// Find provider via routing rules.

View File

@@ -313,6 +313,23 @@ impl ProviderManager {
if ps.config.outbound_proxy.address == addr.ip().to_string() {
return Some(ps_arc.clone());
}
// Hostname-based providers (e.g. sipgate.de) often deliver inbound
// INVITEs from resolved IPs rather than the literal configured host.
// Resolve the proxy host and accept any matching IP/port variant.
use std::net::ToSocketAddrs;
if let Ok(resolved) = format!(
"{}:{}",
ps.config.outbound_proxy.address, ps.config.outbound_proxy.port
)
.to_socket_addrs()
{
for resolved_addr in resolved {
if resolved_addr == *addr || resolved_addr.ip() == addr.ip() {
return Some(ps_arc.clone());
}
}
}
}
None
}

View File

@@ -13,6 +13,7 @@ use crate::audio_player::pcm_to_mix_frames;
use kokoro_tts::{KokoroTts, Voice};
use std::path::Path;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::sync::{mpsc, watch};
pub const DEFAULT_MODEL_PATH: &str = ".nogit/tts/kokoro-v1.0.onnx";
@@ -47,6 +48,10 @@ pub struct TtsEngine {
/// Path that was used to load the current model (for cache invalidation).
loaded_model_path: String,
loaded_voices_path: String,
/// On-disk TTS WAVs are cacheable only within a single engine lifetime.
/// Every restart gets a new generation token, so prior process outputs are
/// treated as stale and regenerated on first use.
cache_generation: String,
}
impl TtsEngine {
@@ -55,6 +60,10 @@ impl TtsEngine {
tts: None,
loaded_model_path: String::new(),
loaded_voices_path: String::new(),
cache_generation: SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos().to_string())
.unwrap_or_else(|_| "0".to_string()),
}
}
@@ -228,7 +237,7 @@ impl TtsEngine {
return false;
}
match std::fs::read_to_string(&meta_path) {
Ok(contents) => contents == Self::cache_key(text, voice),
Ok(contents) => contents == self.cache_key(text, voice),
Err(_) => false,
}
}
@@ -236,12 +245,12 @@ impl TtsEngine {
/// Write the sidecar `.meta` file next to the WAV.
fn write_cache_meta(&self, output_path: &str, text: &str, voice: &str) {
let meta_path = format!("{output_path}.meta");
let _ = std::fs::write(&meta_path, Self::cache_key(text, voice));
let _ = std::fs::write(&meta_path, self.cache_key(text, voice));
}
/// Build the cache key from text + voice.
fn cache_key(text: &str, voice: &str) -> String {
format!("{}\0{}", text, voice)
/// Build the cache key from process generation + text + voice.
fn cache_key(&self, text: &str, voice: &str) -> String {
format!("{}\0{}\0{}", self.cache_generation, text, voice)
}
}

1
rust/vendor/kokoro-tts/.cargo-ok vendored Normal file
View File

@@ -0,0 +1 @@
{"v":1}

View File

@@ -0,0 +1,7 @@
{
"git": {
"sha1": "dfa3eda5e8c3f23f8b4c5d504acaebd6e7a45020",
"dirty": true
},
"path_in_vcs": ""
}

View File

@@ -0,0 +1,35 @@
name: Rust
on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
# Ubuntu 专属依赖安装
- name: Setup Ubuntu dependencies
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt-get update
sudo apt install libasound2-dev
# 构建项目
- name: Build
run: cargo build -vv
# 运行测试
- name: Run tests
run: cargo test --workspace -vv

5
rust/vendor/kokoro-tts/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
*.bin
*.onnx
Cargo.lock
/target
.idea

116
rust/vendor/kokoro-tts/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,116 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2024"
name = "kokoro-tts"
version = "0.3.2"
build = "build.rs"
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "用于Rust的轻量级AI离线语音合成器Kokoro TTS可轻松交叉编译到移动端"
readme = "README.md"
keywords = [
"TTS",
"Offline",
"Lite",
"AI",
"Synthesizer",
]
license = "Apache-2.0"
repository = "https://github.com/mzdk100/kokoro.git"
[features]
use-cmudict = ["cmudict-fast"]
[lib]
name = "kokoro_tts"
path = "src/lib.rs"
[[example]]
name = "synth_directly_v10"
path = "examples/synth_directly_v10.rs"
[[example]]
name = "synth_directly_v11"
path = "examples/synth_directly_v11.rs"
[[example]]
name = "synth_stream"
path = "examples/synth_stream.rs"
[dependencies.bincode]
version = "2.0"
[dependencies.chinese-number]
version = "0.7.8"
features = [
"number-to-chinese",
"chinese-to-number",
]
default-features = false
[dependencies.cmudict-fast]
version = "0.8.0"
optional = true
[dependencies.futures]
version = "0.3.31"
[dependencies.jieba-rs]
version = "0.8.1"
[dependencies.log]
version = "0.4.29"
[dependencies.ndarray]
version = "0.17.2"
[dependencies.ort]
version = "2.0.0-rc.11"
[dependencies.pin-project]
version = "1.1.10"
[dependencies.pinyin]
version = "0.11.0"
[dependencies.rand]
version = "0.10.0-rc.7"
[dependencies.regex]
version = "1.12.2"
[dependencies.tokio]
version = "1.49.0"
features = [
"fs",
"rt-multi-thread",
"time",
"sync",
]
[dev-dependencies.anyhow]
version = "1.0.100"
[dev-dependencies.tokio]
version = "1.49.0"
features = ["macros"]
[dev-dependencies.voxudio]
version = "0.5.7"
features = ["device"]
[build-dependencies.cc]
version = "1.2.53"

35
rust/vendor/kokoro-tts/Cargo.toml.orig generated vendored Normal file
View File

@@ -0,0 +1,35 @@
[package]
name = "kokoro-tts"
description = "用于Rust的轻量级AI离线语音合成器Kokoro TTS可轻松交叉编译到移动端"
version = "0.3.2"
edition = "2024"
keywords = ["TTS", "Offline", "Lite", "AI", "Synthesizer"]
license = "Apache-2.0"
repository = "https://github.com/mzdk100/kokoro.git"
readme = "README.md"
[features]
use-cmudict = ["cmudict-fast"]
[dependencies]
bincode = "2.0"
chinese-number = { version = "0.7.8",default-features = false,features = ["number-to-chinese", "chinese-to-number"] }
cmudict-fast = { version = "0.8.0", optional = true }
futures = "0.3.31"
jieba-rs = "0.8.1"
log = "0.4.29"
ndarray = "0.17.2"
ort = "2.0.0-rc.11"
pin-project = "1.1.10"
pinyin = "0.11.0"
rand="0.10.0-rc.7"
regex = "1.12.2"
tokio = { version = "1.49.0",features = ["fs", "rt-multi-thread","time", "sync"] }
[dev-dependencies]
anyhow = "1.0.100"
tokio = {version = "1.49.0",features = ["macros"]}
voxudio = { version = "0.5.7",features = ["device"] }
[build-dependencies]
cc = "1.2.53"

201
rust/vendor/kokoro-tts/LICENSE vendored Normal file
View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

59
rust/vendor/kokoro-tts/README.md vendored Normal file
View File

@@ -0,0 +1,59 @@
# Kokoro TTS的rust推理实现
[Kokoro](https://github.com/hexgrad/kokoro)
> **Kokoro**是具有8200万参数的开放式TTS型号。
> 尽管具有轻巧的体系结构但它的质量与大型型号相当同时更快更具成本效益。使用Apache许可的权重可以将Kokoro部署从生产环境到个人项目的任何地方。
## 概述
本项目包含幾个示例脚本展示了如何使用Kokoro库进行语音合成。这些示例展示了如何直接合成语音和通过流式合成来处理更长的文本。
## 前置条件
- Rust编程语言
- Tokio异步运行时
- Rodio音频处理和播放的库可选
- 下载模型资源,在這裡可以找到[1.0模型](https://github.com/mzdk100/kokoro/releases/tag/V1.0)和[1.1模型](https://github.com/mzdk100/kokoro/releases/tag/V1.1)
## 特点
- 跨平台可以轻松在Windows、Mac OS上构建也可以轻松交叉编译到安卓和iOS。
- 离线推理,不依赖网络。
- 足够轻量级有不同尺寸的模型可以选择最小的模型仅88M
- 发音人多样化,跨越多国语言。
## 使用方法
1. 运行示例,克隆或下载本项目到本地。在项目根目录下运行:
```shell
cargo run --example synth_directly_v10
cargo run --example synth_directly_v11
```
2. 集成到自己的项目中:
```shell
cargo add kokoro-tts
```
3. Linux依赖项
```shell
sudo apt install libasound2-dev
```
参考[examples](examples)文件夹中的示例代码进行开发。
## 许可证
本项目采用Apache-2.0许可证。请查看项目中的LICENSE文件了解更多信息。
## 注意
- 请确保在运行示例之前已经正确加载了模型和语音数据。
- 示例中的语音合成参数(如语音名称、文本内容、速度等)仅作为示例,实际使用时请根据需要进行调整。
## 贡献
如果您有任何改进意见或想要贡献代码请随时提交Pull Request或创建Issue。
## 免责声明
本项目中的示例代码仅用于演示目的。在使用本项目中的代码时,请确保遵守相关法律法规和社会主义核心价值观。开发者不对因使用本项目中的代码而导致的任何后果负责。

5
rust/vendor/kokoro-tts/build.rs vendored Normal file
View File

@@ -0,0 +1,5 @@
fn main() {
const SRC: &str = "src/transcription/en_ipa.c";
cc::Build::new().file(SRC).compile("es");
println!("cargo:rerun-if-changed={}", SRC);
}

135010
rust/vendor/kokoro-tts/dict/cmudict.dict vendored Normal file

File diff suppressed because it is too large Load Diff

BIN
rust/vendor/kokoro-tts/dict/espeak.dict vendored Normal file

Binary file not shown.

411980
rust/vendor/kokoro-tts/dict/pinyin.dict vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
use {
kokoro_tts::{KokoroTts, Voice},
voxudio::AudioPlayer,
};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let tts = KokoroTts::new("kokoro-v1.0.int8.onnx", "voices.bin").await?;
let (audio, took) = tts
.synth(
"Hello, world!你好我们是一群追逐梦想的人。我正在使用qq。",
Voice::ZfXiaoxiao(1.2),
)
.await?;
println!("Synth took: {:?}", took);
let mut player = AudioPlayer::new()?;
player.play()?;
player.write::<24000>(&audio, 1).await?;
Ok(())
}

View File

@@ -0,0 +1,21 @@
use {
kokoro_tts::{KokoroTts, Voice},
voxudio::AudioPlayer,
};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let tts = KokoroTts::new("kokoro-v1.1-zh.onnx", "voices-v1.1-zh.bin").await?;
let (audio, took) = tts
.synth(
"Hello, world!你好我们是一群追逐梦想的人。我正在使用qq。",
Voice::Zm045(1),
)
.await?;
println!("Synth took: {:?}", took);
let mut player = AudioPlayer::new()?;
player.play()?;
player.write::<24000>(&audio, 1).await?;
Ok(())
}

View File

@@ -0,0 +1,51 @@
use {
futures::StreamExt,
kokoro_tts::{KokoroTts, Voice},
voxudio::AudioPlayer,
};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let tts = KokoroTts::new("kokoro-v1.1-zh.onnx", "voices-v1.1-zh.bin").await?;
let (mut sink, mut stream) = tts.stream(Voice::Zm098(1));
sink.synth("hello world.").await?;
sink.synth("你好,我们是一群追逐梦想的人。").await?;
sink.set_voice(Voice::Zf032(2));
sink.synth("我正在使用qq。").await?;
sink.set_voice(Voice::Zf090(3));
sink.synth("今天天气如何?").await?;
sink.set_voice(Voice::Zm045(1));
sink.synth("你在使用Rust编程语言吗").await?;
sink.set_voice(Voice::Zf039(1));
sink.synth(
"你轻轻地走过那
在风雨花丛中
每一点一滴带走
是我醒来的梦
是在那天空上
最美丽的云朵
在那彩虹 最温柔的风",
)
.await?;
sink.set_voice(Voice::Zf088(1));
sink.synth(
"你静静看着我们
最不舍的面容
像流星划过夜空
转瞬即逝的梦
是最深情的脸 在这一瞬间
在遥远天边
",
)
.await?;
drop(sink);
let mut player = AudioPlayer::new()?;
player.play()?;
while let Some((audio, took)) = stream.next().await {
player.write::<24000>(&audio, 1).await?;
println!("Synth took: {:?}", took);
}
Ok(())
}

514
rust/vendor/kokoro-tts/g2p.py vendored Normal file
View File

@@ -0,0 +1,514 @@
import re
from typing import List, Optional, Tuple
from jieba import posseg, cut_for_search
from pypinyin import lazy_pinyin, load_phrases_dict, Style
from dataclasses import dataclass
@dataclass
class MToken:
tag: str
whitespace: str
phonemes: Optional[str] = None
ZH_MAP = {"b":"","p":"","m":"","f":"","d":"","t":"","n":"","l":"","g":"","k":"","h":"","j":"","q":"","x":"","zh":"","ch":"","sh":"","r":"","z":"","c":"","s":"","a":"","o":"","e":"","ie":"","ai":"","ei":"","ao":"","ou":"","an":"","en":"","ang":"","eng":"","er":"","i":"","u":"","v":"","ii":"","iii":"","ve":"","ia":"","ian":"","iang":"","iao":"","in":"","ing":"","iong":"","iou":"","ong":"","ua":"","uai":"","uan":"","uang":"","uei":"","uen":"","ueng":"","uo":"","van":"","vn":""}
for p in ';:,.!?/—…"()“” 12345R':
assert p not in ZH_MAP, p
ZH_MAP[p] = p
unk = ''
punc = frozenset(';:,.!?—…"()“”')
phrases_dict = {
'开户行': [['ka1i'], ['hu4'], ['hang2']],
'发卡行': [['fa4'], ['ka3'], ['hang2']],
'放款行': [['fa4ng'], ['kua3n'], ['hang2']],
'茧行': [['jia3n'], ['hang2']],
'行号': [['hang2'], ['ha4o']],
'各地': [['ge4'], ['di4']],
'借还款': [['jie4'], ['hua2n'], ['kua3n']],
'时间为': [['shi2'], ['jia1n'], ['we2i']],
'为准': [['we2i'], ['zhu3n']],
'色差': [['se4'], ['cha1']],
'': [['dia3']],
'': [['bei5']],
'': [['bu4']],
'': [['zuo5']],
'': [['lei5']],
'掺和': [['chan1'], ['huo5']]
}
must_erhua = {
"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
}
must_not_neural_tone_words = {
'男子', '女子', '分子', '原子', '量子', '莲子', '石子', '瓜子', '电子', '人人', '虎虎',
'幺幺', '干嘛', '学子', '哈哈', '数数', '袅袅', '局地', '以下', '娃哈哈', '花花草草', '留得',
'耕地', '想想', '熙熙', '攘攘', '卵子', '死死', '冉冉', '恳恳', '佼佼', '吵吵', '打打',
'考考', '整整', '莘莘', '落地', '算子', '家家户户', '青青'
}
must_neural_tone_words = {
'麻烦', '麻利', '鸳鸯', '高粱', '骨头', '骆驼', '马虎', '首饰', '馒头', '馄饨', '风筝',
'难为', '队伍', '阔气', '闺女', '门道', '锄头', '铺盖', '铃铛', '铁匠', '钥匙', '里脊',
'里头', '部分', '那么', '道士', '造化', '迷糊', '连累', '这么', '这个', '运气', '过去',
'软和', '转悠', '踏实', '跳蚤', '跟头', '趔趄', '财主', '豆腐', '讲究', '记性', '记号',
'认识', '规矩', '见识', '裁缝', '补丁', '衣裳', '衣服', '衙门', '街坊', '行李', '行当',
'蛤蟆', '蘑菇', '薄荷', '葫芦', '葡萄', '萝卜', '荸荠', '苗条', '苗头', '苍蝇', '芝麻',
'舒服', '舒坦', '舌头', '自在', '膏药', '脾气', '脑袋', '脊梁', '能耐', '胳膊', '胭脂',
'胡萝', '胡琴', '胡同', '聪明', '耽误', '耽搁', '耷拉', '耳朵', '老爷', '老实', '老婆',
'戏弄', '将军', '翻腾', '罗嗦', '罐头', '编辑', '结实', '红火', '累赘', '糨糊', '糊涂',
'精神', '粮食', '簸箕', '篱笆', '算计', '算盘', '答应', '笤帚', '笑语', '笑话', '窟窿',
'窝囊', '窗户', '稳当', '稀罕', '称呼', '秧歌', '秀气', '秀才', '福气', '祖宗', '砚台',
'码头', '石榴', '石头', '石匠', '知识', '眼睛', '眯缝', '眨巴', '眉毛', '相声', '盘算',
'白净', '痢疾', '痛快', '疟疾', '疙瘩', '疏忽', '畜生', '生意', '甘蔗', '琵琶', '琢磨',
'琉璃', '玻璃', '玫瑰', '玄乎', '狐狸', '状元', '特务', '牲口', '牙碜', '牌楼', '爽快',
'爱人', '热闹', '烧饼', '烟筒', '烂糊', '点心', '炊帚', '灯笼', '火候', '漂亮', '滑溜',
'溜达', '温和', '清楚', '消息', '浪头', '活泼', '比方', '正经', '欺负', '模糊', '槟榔',
'棺材', '棒槌', '棉花', '核桃', '栅栏', '柴火', '架势', '枕头', '枇杷', '机灵', '本事',
'木头', '木匠', '朋友', '月饼', '月亮', '暖和', '明白', '时候', '新鲜', '故事', '收拾',
'收成', '提防', '挖苦', '挑剔', '指甲', '指头', '拾掇', '拳头', '拨弄', '招牌', '招呼',
'抬举', '护士', '折腾', '扫帚', '打量', '打算', '打扮', '打听', '打发', '扎实', '扁担',
'戒指', '懒得', '意识', '意思', '悟性', '怪物', '思量', '怎么', '念头', '念叨', '别人',
'快活', '忙活', '志气', '心思', '得罪', '张罗', '弟兄', '开通', '应酬', '庄稼', '干事',
'帮手', '帐篷', '希罕', '师父', '师傅', '巴结', '巴掌', '差事', '工夫', '岁数', '屁股',
'尾巴', '少爷', '小气', '小伙', '将就', '对头', '对付', '寡妇', '家伙', '客气', '实在',
'官司', '学问', '字号', '嫁妆', '媳妇', '媒人', '婆家', '娘家', '委屈', '姑娘', '姐夫',
'妯娌', '妥当', '妖精', '奴才', '女婿', '头发', '太阳', '大爷', '大方', '大意', '大夫',
'多少', '多么', '外甥', '壮实', '地道', '地方', '在乎', '困难', '嘴巴', '嘱咐', '嘟囔',
'嘀咕', '喜欢', '喇嘛', '喇叭', '商量', '唾沫', '哑巴', '哈欠', '哆嗦', '咳嗽', '和尚',
'告诉', '告示', '含糊', '吓唬', '后头', '名字', '名堂', '合同', '吆喝', '叫唤', '口袋',
'厚道', '厉害', '千斤', '包袱', '包涵', '匀称', '勤快', '动静', '动弹', '功夫', '力气',
'前头', '刺猬', '刺激', '别扭', '利落', '利索', '利害', '分析', '出息', '凑合', '凉快',
'冷战', '冤枉', '冒失', '养活', '关系', '先生', '兄弟', '便宜', '使唤', '佩服', '作坊',
'体面', '位置', '似的', '伙计', '休息', '什么', '人家', '亲戚', '亲家', '交情', '云彩',
'事情', '买卖', '主意', '丫头', '丧气', '两口', '东西', '东家', '世故', '不由', '下水',
'下巴', '上头', '上司', '丈夫', '丈人', '一辈', '那个', '菩萨', '父亲', '母亲', '咕噜',
'邋遢', '费用', '冤家', '甜头', '介绍', '荒唐', '大人', '泥鳅', '幸福', '熟悉', '计划',
'扑腾', '蜡烛', '姥爷', '照顾', '喉咙', '吉他', '弄堂', '蚂蚱', '凤凰', '拖沓', '寒碜',
'糟蹋', '倒腾', '报复', '逻辑', '盘缠', '喽啰', '牢骚', '咖喱', '扫把', '惦记'
}
not_erhua = {
"虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
"拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿",
"流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿",
"孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿",
"狗儿", "少儿"
}
BU = ''
YI = ''
X_ENG = frozenset(['x', 'eng'])
# g2p
load_phrases_dict(phrases_dict)
def get_initials_finals(word: str) -> Tuple[List[str], List[str]]:
"""
Get word initial and final by pypinyin or g2pM
"""
initials = []
finals = []
orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
orig_finals = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
print(orig_initials, orig_finals)
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
en_index = [index for index, c in enumerate(word) if c == ""]
for i in en_index:
orig_finals[i] = "n2"
for c, v in zip(orig_initials, orig_finals):
if re.match(r'i\d', v):
if c in ['z', 'c', 's']:
# zi, ci, si
v = re.sub('i', 'ii', v)
elif c in ['zh', 'ch', 'sh', 'r']:
# zhi, chi, shi
v = re.sub('i', 'iii', v)
initials.append(c)
finals.append(v)
return initials, finals
def merge_erhua(initials: List[str], finals: List[str], word: str, pos: str) -> Tuple[List[str], List[str]]:
"""
Do erhub.
"""
# fix er1
for i, phn in enumerate(finals):
if i == len(finals) - 1 and word[i] == "" and phn == 'er1':
finals[i] = 'er2'
# 发音
if word not in must_erhua and (word in not_erhua or pos in {"a", "j", "nr"}):
return initials, finals
# "……" 等情况直接返回
if len(finals) != len(word):
return initials, finals
assert len(finals) == len(word)
# 不发音
new_initials = []
new_finals = []
for i, phn in enumerate(finals):
if i == len(finals) - 1 and word[i] == "" and phn in {"er2", "er5"} and word[-2:] not in not_erhua and new_finals:
new_finals[-1] = new_finals[-1][:-1] + "R" + new_finals[-1][-1]
else:
new_initials.append(initials[i])
new_finals.append(phn)
return new_initials, new_finals
# merge "不" and the word behind it
# if don't merge, "不" sometimes appears alone according to jieba, which may occur sandhi error
def merge_bu(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = []
for i, (word, pos) in enumerate(seg):
if pos not in X_ENG:
last_word = None
if i > 0:
last_word, _ = seg[i - 1]
if last_word == BU:
word = last_word + word
next_pos = None
if i + 1 < len(seg):
_, next_pos = seg[i + 1]
if word != BU or next_pos is None or next_pos in X_ENG:
new_seg.append((word, pos))
return new_seg
# function 1: merge "一" and reduplication words in it's left and right, e.g. "听","一","听" ->"听一听"
# function 2: merge single "一" and the word behind it
# if don't merge, "一" sometimes appears alone according to jieba, which may occur sandhi error
# e.g.
# input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')]
# output seg: [['听一听', 'v']]
def merge_yi(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = []
skip_next = False
# function 1
for i, (word, pos) in enumerate(seg):
if skip_next:
skip_next = False
continue
if i - 1 >= 0 and word == YI and i + 1 < len(seg) and seg[i - 1][0] == seg[i + 1][0] and seg[i - 1][1] == "v" and seg[i + 1][1] not in X_ENG:
new_seg[-1] = (new_seg[-1][0] + YI + seg[i + 1][0], new_seg[-1][1])
skip_next = True
else:
new_seg.append((word, pos))
seg = new_seg
new_seg = []
# function 2
for i, (word, pos) in enumerate(seg):
if new_seg and new_seg[-1][0] == YI and pos not in X_ENG:
new_seg[-1] = (new_seg[-1][0] + word, new_seg[-1][1])
else:
new_seg.append((word, pos))
return new_seg
def merge_reduplication(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = []
for i, (word, pos) in enumerate(seg):
if new_seg and word == new_seg[-1][0] and pos not in X_ENG:
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
else:
new_seg.append([word, pos])
return new_seg
def is_reduplication(word: str) -> bool:
return len(word) == 2 and word[0] == word[1]
# the first and the second words are all_tone_three
def merge_continuous_three_tones(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = []
sub_finals_list = []
for (word, pos) in seg:
if pos in X_ENG:
sub_finals_list.append(['0'])
continue
orig_finals = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
en_index = [index for index, c in enumerate(word) if c == ""]
for i in en_index:
orig_finals[i] = "n2"
sub_finals_list.append(orig_finals)
assert len(sub_finals_list) == len(seg)
merge_last = [False] * len(seg)
for i, (word, pos) in enumerate(seg):
if pos not in X_ENG and i - 1 >= 0 and all_tone_three(sub_finals_list[i - 1]) and all_tone_three(sub_finals_list[i]) and not merge_last[i - 1]:
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
if not is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
merge_last[i] = True
else:
new_seg.append([word, pos])
else:
new_seg.append([word, pos])
return new_seg
# the last char of first word and the first char of second word is tone_three
def merge_continuous_three_tones_2(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = []
sub_finals_list = []
for (word, pos) in seg:
if pos in X_ENG:
sub_finals_list.append(['0'])
continue
orig_finals = lazy_pinyin(
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
en_index = [index for index, c in enumerate(word) if c == ""]
for i in en_index:
orig_finals[i] = "n2"
sub_finals_list.append(orig_finals)
assert len(sub_finals_list) == len(seg)
merge_last = [False] * len(seg)
for i, (word, pos) in enumerate(seg):
if pos not in X_ENG and i - 1 >= 0 and sub_finals_list[i - 1][-1][-1] == "3" and sub_finals_list[i][0][-1] == "3" and not merge_last[i - 1]:
# if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
if not is_reduplication(seg[i - 1][0]) and len(seg[i - 1][0]) + len(seg[i][0]) <= 3:
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
merge_last[i] = True
else:
new_seg.append([word, pos])
else:
new_seg.append([word, pos])
return new_seg
def merge_er(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = []
for i, (word, pos) in enumerate(seg):
if i - 1 >= 0 and word == "" and new_seg[-1][1] not in X_ENG:
new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
else:
new_seg.append([word, pos])
return new_seg
def pre_merge_for_modify(seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
"""
seg: [(word, pos), ...]
"""
seg = merge_bu(seg)
seg = merge_yi(seg)
seg = merge_reduplication(seg)
seg = merge_continuous_three_tones(seg)
seg = merge_continuous_three_tones_2(seg)
return merge_er(seg)
def bu_sandhi(word: str, finals: List[str]) -> List[str]:
# e.g. 看不懂
if len(word) == 3 and word[1] == BU:
finals[1] = finals[1][:-1] + "5"
else:
for i, char in enumerate(word):
# "不" before tone4 should be bu2, e.g. 不怕
if char == BU and i + 1 < len(word) and finals[i + 1][-1] == "4":
finals[i] = finals[i][:-1] + "2"
return finals
def yi_sandhi(word: str, finals: List[str]) -> List[str]:
# "一" in number sequences, e.g. 一零零, 二一零
if word.find(YI) != -1 and all(
[item.isnumeric() for item in word if item != YI]):
return finals
# "一" between reduplication words shold be yi5, e.g. 看一看
elif len(word) == 3 and word[1] == YI and word[0] == word[-1]:
finals[1] = finals[1][:-1] + "5"
# when "一" is ordinal word, it should be yi1
elif word.startswith("第一"):
finals[1] = finals[1][:-1] + "1"
else:
for i, char in enumerate(word):
if char == YI and i + 1 < len(word):
# "一" before tone4 should be yi2, e.g. 一段
if finals[i + 1][-1] in {'4', '5'}:
finals[i] = finals[i][:-1] + "2"
# "一" before non-tone4 should be yi4, e.g. 一天
else:
# "一" 后面如果是标点,还读一声
if word[i + 1] not in punc:
finals[i] = finals[i][:-1] + "4"
return finals
def split_word(word: str) -> List[str]:
word_list = cut_for_search(word)
word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
first_subword = word_list[0]
first_begin_idx = word.find(first_subword)
if first_begin_idx == 0:
second_subword = word[len(first_subword):]
new_word_list = [first_subword, second_subword]
else:
second_subword = word[:-len(first_subword)]
new_word_list = [second_subword, first_subword]
return new_word_list
# the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041
# e.g.
# word: "家里"
# pos: "s"
# finals: ['ia1', 'i3']
def neural_sandhi(word: str, pos: str, finals: List[str]) -> List[str]:
if word in must_not_neural_tone_words:
return finals
# reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
for j, item in enumerate(word):
if j - 1 >= 0 and item == word[j - 1] and pos[0] in {"n", "v", "a"}:
finals[j] = finals[j][:-1] + "5"
ge_idx = word.find("")
if len(word) >= 1 and word[-1] in "吧呢啊呐噻嘛吖嗨呐哦哒滴哩哟喽啰耶喔诶":
finals[-1] = finals[-1][:-1] + "5"
elif len(word) >= 1 and word[-1] in "的地得":
finals[-1] = finals[-1][:-1] + "5"
# e.g. 走了, 看着, 去过
elif len(word) == 1 and word in "了着过" and pos in {"ul", "uz", "ug"}:
finals[-1] = finals[-1][:-1] + "5"
elif len(word) > 1 and word[-1] in "们子" and pos in {"r", "n"}:
finals[-1] = finals[-1][:-1] + "5"
# e.g. 桌上, 地下
elif len(word) > 1 and word[-1] in "上下" and pos in {"s", "l", "f"}:
finals[-1] = finals[-1][:-1] + "5"
# e.g. 上来, 下去
elif len(word) > 1 and word[-1] in "来去" and word[-2] in "上下进出回过起开":
finals[-1] = finals[-1][:-1] + "5"
# 个做量词
elif (ge_idx >= 1 and (word[ge_idx - 1].isnumeric() or word[ge_idx - 1] in "几有两半多各整每做是")) or word == '':
finals[ge_idx] = finals[ge_idx][:-1] + "5"
else:
if word in must_neural_tone_words or word[-2:] in must_neural_tone_words:
finals[-1] = finals[-1][:-1] + "5"
word_list = split_word(word)
finals_list = [finals[:len(word_list[0])], finals[len(word_list[0]):]]
for i, word in enumerate(word_list):
# conventional neural in Chinese
if word in must_neural_tone_words or word[-2:] in must_neural_tone_words:
finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
finals = sum(finals_list, [])
return finals
def all_tone_three(finals: List[str]) -> bool:
return all(x[-1] == "3" for x in finals)
def three_sandhi(word: str, finals: List[str]) -> List[str]:
if len(word) == 2 and all_tone_three(finals):
finals[0] = finals[0][:-1] + "2"
elif len(word) == 3:
word_list = split_word(word)
if all_tone_three(finals):
# disyllabic + monosyllabic, e.g. 蒙古/包
if len(word_list[0]) == 2:
finals[0] = finals[0][:-1] + "2"
finals[1] = finals[1][:-1] + "2"
# monosyllabic + disyllabic, e.g. 纸/老虎
elif len(word_list[0]) == 1:
finals[1] = finals[1][:-1] + "2"
else:
finals_list = [finals[:len(word_list[0])], finals[len(word_list[0]):]]
if len(finals_list) == 2:
for i, sub in enumerate(finals_list):
# e.g. 所有/人
if all_tone_three(sub) and len(sub) == 2:
finals_list[i][0] = finals_list[i][0][:-1] + "2"
# e.g. 好/喜欢
elif i == 1 and not all_tone_three(sub) and finals_list[i][0][-1] == "3" and finals_list[0][-1][-1] == "3":
finals_list[0][-1] = finals_list[0][-1][:-1] + "2"
finals = sum(finals_list, [])
# split idiom into two words who's length is 2
elif len(word) == 4:
finals_list = [finals[:2], finals[2:]]
finals = []
for sub in finals_list:
if all_tone_three(sub):
sub[0] = sub[0][:-1] + "2"
finals += sub
return finals
def modified_tone(word: str, pos: str, finals: List[str]) -> List[str]:
"""
word: 分词
pos: 词性
finals: 带调韵母, [final1, ..., finaln]
"""
finals = bu_sandhi(word, finals)
finals = yi_sandhi(word, finals)
finals = neural_sandhi(word, pos, finals)
return three_sandhi(word, finals)
def g2p(text: str, with_erhua: bool = True) -> str:
"""
Return: string of phonemes.
'ㄋㄧ2ㄏㄠ3/ㄕ十4ㄐㄝ4'
"""
tokens = []
seg_cut = posseg.lcut(text)
# fix wordseg bad case for sandhi
seg_cut = pre_merge_for_modify(seg_cut)
# 为了多音词获得更好的效果,这里采用整句预测
initials = []
finals = []
# pypinyin, g2pM
for word, pos in seg_cut:
if pos == 'x' and '\u4E00' <= min(word) and max(word) <= '\u9FFF':
pos = 'X'
elif pos != 'x' and word in punc:
pos = 'x'
tk = MToken(tag=pos, whitespace='')
if pos in X_ENG:
if not word.isspace():
if pos == 'x' and word in punc:
tk.phonemes = word
tokens.append(tk)
elif tokens:
tokens[-1].whitespace += word
continue
elif tokens and tokens[-1].tag not in X_ENG and not tokens[-1].whitespace:
tokens[-1].whitespace = '/'
# g2p
sub_initials, sub_finals = get_initials_finals(word)
# tone sandhi
sub_finals = modified_tone(word, pos, sub_finals)
# er hua
if with_erhua:
sub_initials, sub_finals = merge_erhua(sub_initials, sub_finals, word, pos)
initials.append(sub_initials)
finals.append(sub_finals)
# assert len(sub_initials) == len(sub_finals) == len(word)
# sum(iterable[, start])
# initials = sum(initials, [])
# finals = sum(finals, [])
phones = []
for c, v in zip(sub_initials, sub_finals):
# NOTE: post process for pypinyin outputs
# we discriminate i, ii and iii
if c:
phones.append(c)
# replace punctuation by ` `
# if c and c in punc:
# phones.append(c)
if v and (v not in punc or v != c):# and v not in rhy_phns:
phones.append(v)
phones = '_'.join(phones).replace('_eR', '_er').replace('R', '_R')
phones = re.sub(r'(?=\d)', '_', phones).split('_')
print(phones)
tk.phonemes = ''.join(ZH_MAP.get(p, unk) for p in phones)
tokens.append(tk)
return ''.join((unk if tk.phonemes is None else tk.phonemes) + tk.whitespace for tk in tokens)
print(g2p('时间为。Hello, world!你好我们是一群追逐梦想的人。我正在使用qq。忽略卢驴'))
seg = posseg.lcut('不好看', True)
print(seg, merge_bu(seg))
seg = merge_bu(posseg.lcut('听一听一个', True))
print(seg, merge_yi(seg))
seg = merge_bu(posseg.lcut('谢谢谢谢', True))
print(seg, merge_reduplication(seg))
seg = merge_bu(posseg.lcut('小美好', True))
print(seg, merge_continuous_three_tones(seg))
seg = merge_bu(posseg.lcut('风景好', True))
print(seg, merge_continuous_three_tones_2(seg))

3
rust/vendor/kokoro-tts/run.bat vendored Normal file
View File

@@ -0,0 +1,3 @@
set PATH=%PATH%;D:\msys64\mingw64\bin
cargo run --example synth_directly_v11
pause

80
rust/vendor/kokoro-tts/src/error.rs vendored Normal file
View File

@@ -0,0 +1,80 @@
use crate::G2PError;
use bincode::error::DecodeError;
use ndarray::ShapeError;
use ort::Error as OrtError;
use std::{
error::Error,
fmt::{Debug, Display, Formatter, Result as FmtResult},
io::Error as IoError,
time::SystemTimeError,
};
#[derive(Debug)]
pub enum KokoroError {
Decode(DecodeError),
G2P(G2PError),
Io(IoError),
ModelReleased,
Ort(OrtError),
Send(String),
Shape(ShapeError),
SystemTime(SystemTimeError),
VoiceNotFound(String),
VoiceVersionInvalid(String),
}
impl Display for KokoroError {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
write!(f, "KokoroError: ")?;
match self {
Self::Decode(e) => Display::fmt(e, f),
Self::G2P(e) => Display::fmt(e, f),
Self::Io(e) => Display::fmt(e, f),
Self::Ort(e) => Display::fmt(e, f),
Self::ModelReleased => write!(f, "ModelReleased"),
Self::Send(e) => Display::fmt(e, f),
Self::Shape(e) => Display::fmt(e, f),
Self::SystemTime(e) => Display::fmt(e, f),
Self::VoiceNotFound(name) => write!(f, "VoiceNotFound({})", name),
Self::VoiceVersionInvalid(msg) => write!(f, "VoiceVersionInvalid({})", msg),
}
}
}
impl Error for KokoroError {}
impl From<IoError> for KokoroError {
fn from(value: IoError) -> Self {
Self::Io(value)
}
}
impl From<DecodeError> for KokoroError {
fn from(value: DecodeError) -> Self {
Self::Decode(value)
}
}
impl From<OrtError> for KokoroError {
fn from(value: OrtError) -> Self {
Self::Ort(value)
}
}
impl From<G2PError> for KokoroError {
fn from(value: G2PError) -> Self {
Self::G2P(value)
}
}
impl From<ShapeError> for KokoroError {
fn from(value: ShapeError) -> Self {
Self::Shape(value)
}
}
impl From<SystemTimeError> for KokoroError {
fn from(value: SystemTimeError) -> Self {
Self::SystemTime(value)
}
}

321
rust/vendor/kokoro-tts/src/g2p.rs vendored Normal file
View File

@@ -0,0 +1,321 @@
/// 文本到国际音标的转换
mod v10;
mod v11;
use super::PinyinError;
use chinese_number::{ChineseCase, ChineseCountMethod, ChineseVariant, NumberToChinese};
#[cfg(feature = "use-cmudict")]
use cmudict_fast::{Cmudict, Error as CmudictError};
use pinyin::ToPinyin;
use regex::{Captures, Error as RegexError, Regex};
use std::{
error::Error,
fmt::{Display, Formatter, Result as FmtResult},
};
#[derive(Debug)]
pub enum G2PError {
#[cfg(feature = "use-cmudict")]
CmudictError(CmudictError),
EnptyData,
#[cfg(not(feature = "use-cmudict"))]
Nul(std::ffi::NulError),
Pinyin(PinyinError),
Regex(RegexError),
#[cfg(not(feature = "use-cmudict"))]
Utf8(std::str::Utf8Error),
}
impl Display for G2PError {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
write!(f, "G2PError: ")?;
match self {
#[cfg(feature = "use-cmudict")]
Self::CmudictError(e) => Display::fmt(e, f),
Self::EnptyData => Display::fmt("EmptyData", f),
#[cfg(not(feature = "use-cmudict"))]
Self::Nul(e) => Display::fmt(e, f),
Self::Pinyin(e) => Display::fmt(e, f),
Self::Regex(e) => Display::fmt(e, f),
#[cfg(not(feature = "use-cmudict"))]
Self::Utf8(e) => Display::fmt(e, f),
}
}
}
impl Error for G2PError {}
impl From<PinyinError> for G2PError {
fn from(value: PinyinError) -> Self {
Self::Pinyin(value)
}
}
impl From<RegexError> for G2PError {
fn from(value: RegexError) -> Self {
Self::Regex(value)
}
}
#[cfg(feature = "use-cmudict")]
impl From<CmudictError> for G2PError {
fn from(value: CmudictError) -> Self {
Self::CmudictError(value)
}
}
#[cfg(not(feature = "use-cmudict"))]
impl From<std::ffi::NulError> for G2PError {
fn from(value: std::ffi::NulError) -> Self {
Self::Nul(value)
}
}
#[cfg(not(feature = "use-cmudict"))]
impl From<std::str::Utf8Error> for G2PError {
fn from(value: std::str::Utf8Error) -> Self {
Self::Utf8(value)
}
}
fn word2ipa_zh(word: &str) -> Result<String, G2PError> {
let iter = word.chars().map(|i| match i.to_pinyin() {
None => Ok(i.to_string()),
Some(p) => v10::py2ipa(p.with_tone_num_end()),
});
let mut result = String::new();
for i in iter {
result.push_str(&i?);
}
Ok(result)
}
#[cfg(feature = "use-cmudict")]
fn word2ipa_en(word: &str) -> Result<String, G2PError> {
use super::{arpa_to_ipa, letters_to_ipa};
use std::{
io::{Error as IoError, ErrorKind},
str::FromStr,
sync::LazyLock,
};
fn get_cmudict<'a>() -> Result<&'a Cmudict, CmudictError> {
static CMUDICT: LazyLock<Result<Cmudict, CmudictError>> =
LazyLock::new(|| Cmudict::from_str(include_str!("../dict/cmudict.dict")));
CMUDICT.as_ref().map_err(|i| match i {
CmudictError::IoErr(e) => CmudictError::IoErr(IoError::new(ErrorKind::Other, e)),
CmudictError::InvalidLine(e) => CmudictError::InvalidLine(*e),
CmudictError::RuleParseError(e) => CmudictError::RuleParseError(e.clone()),
})
}
if word.chars().count() < 4 && word.chars().all(|c| c.is_ascii_uppercase()) {
return Ok(letters_to_ipa(word));
}
let dict = get_cmudict()?;
let upper = word.to_ascii_uppercase();
let lower = word.to_ascii_lowercase();
let Some(rules) = dict
.get(word)
.or_else(|| dict.get(&upper))
.or_else(|| dict.get(&lower))
else {
return Ok(letters_to_ipa(word));
};
if rules.is_empty() {
return Ok(word.to_owned());
}
let i = rand::random_range(0..rules.len());
let result = rules[i]
.pronunciation()
.iter()
.map(|i| arpa_to_ipa(&i.to_string()).unwrap_or_default())
.collect::<String>();
Ok(result)
}
#[cfg(not(feature = "use-cmudict"))]
fn word2ipa_en(word: &str) -> Result<String, G2PError> {
use super::letters_to_ipa;
use std::{
ffi::{CStr, CString, c_char},
sync::Once,
};
if word.chars().count() < 4 && word.chars().all(|c| c.is_ascii_uppercase()) {
return Ok(letters_to_ipa(word));
}
unsafe extern "C" {
fn TextToPhonemes(text: *const c_char) -> *const ::std::os::raw::c_char;
fn Initialize(data_dictlist: *const c_char);
}
unsafe {
static INIT: Once = Once::new();
INIT.call_once(|| {
static DATA: &[u8] = include_bytes!("../dict/espeak.dict");
Initialize(DATA.as_ptr() as _);
});
let word = CString::new(word.to_lowercase())?.into_raw() as *const c_char;
let res = TextToPhonemes(word);
Ok(CStr::from_ptr(res).to_str()?.to_string())
}
}
fn to_half_shape(text: &str) -> String {
let mut result = String::with_capacity(text.len() * 2); // 预分配合理空间
let chars = text.chars().peekable();
for c in chars {
match c {
// 处理需要后看的情况
'«' | '《' => result.push('“'),
'»' | '》' => result.push('”'),
'' => result.push('('),
'' => result.push(')'),
// 简单替换规则
'、' | '' => result.push(','),
'。' => result.push('.'),
'' => result.push('!'),
'' => result.push(':'),
'' => result.push(';'),
'' => result.push('?'),
// 默认字符
_ => result.push(c),
}
}
// 清理多余空格并返回
result
}
fn num_repr(text: &str) -> Result<String, G2PError> {
let regex = Regex::new(r#"\d+(\.\d+)?"#)?;
Ok(regex
.replace(text, |caps: &Captures| {
let text = &caps[0];
if let Ok(num) = text.parse::<f64>() {
num.to_chinese(
ChineseVariant::Traditional,
ChineseCase::Lower,
ChineseCountMethod::Low,
)
.map_or(text.to_owned(), |i| i)
} else if let Ok(num) = text.parse::<i64>() {
num.to_chinese(
ChineseVariant::Traditional,
ChineseCase::Lower,
ChineseCountMethod::Low,
)
.map_or(text.to_owned(), |i| i)
} else {
text.to_owned()
}
})
.to_string())
}
pub fn g2p(text: &str, use_v11: bool) -> Result<String, G2PError> {
let text = num_repr(text)?;
let sentence_pattern = Regex::new(
r#"([\u4E00-\u9FFF]+)|([,。:·?、!《》()【】〖〗〔〕“”‘’〈〉…— ]+)|([\u0000-\u00FF]+)+"#,
)?;
let en_word_pattern = Regex::new("\\w+|\\W+")?;
let jieba = jieba_rs::Jieba::new();
let mut result = String::new();
for i in sentence_pattern.captures_iter(&text) {
match (i.get(1), i.get(2), i.get(3)) {
(Some(text), _, _) => {
let text = to_half_shape(text.as_str());
if use_v11 {
if !result.is_empty() && !result.ends_with(' ') {
result.push(' ');
}
result.push_str(&v11::g2p(&text, true));
result.push(' ');
} else {
for i in jieba.cut(&text, true) {
result.push_str(&word2ipa_zh(i)?);
result.push(' ');
}
}
}
(_, Some(text), _) => {
let text = to_half_shape(text.as_str());
result = result.trim_end().to_string();
result.push_str(&text);
result.push(' ');
}
(_, _, Some(text)) => {
for i in en_word_pattern.captures_iter(text.as_str()) {
let c = (i[0]).chars().next().unwrap_or_default();
if c == '\''
|| c == '_'
|| c == '-'
|| c.is_ascii_lowercase()
|| c.is_ascii_uppercase()
{
let i = &i[0];
if result.trim_end().ends_with(['.', ',', '!', '?'])
&& !result.ends_with(' ')
{
result.push(' ');
}
result.push_str(&word2ipa_en(i)?);
} else if c == ' ' && result.ends_with(' ') {
result.push_str((i[0]).trim_start());
} else {
result.push_str(&i[0]);
}
}
}
_ => (),
};
}
Ok(result.trim().to_string())
}
#[cfg(test)]
mod tests {
#[cfg(not(feature = "use-cmudict"))]
#[test]
fn test_word2ipa_en() -> Result<(), super::G2PError> {
use super::word2ipa_en;
// println!("{:?}", espeak_rs::text_to_phonemes("days", "en", None, true, false));
assert_eq!("kjˌuːkjˈuː", word2ipa_en("qq")?);
assert_eq!("həlˈəʊ", word2ipa_en("hello")?);
assert_eq!("wˈɜːld", word2ipa_en("world")?);
assert_eq!("ˈapəl", word2ipa_en("apple")?);
assert_eq!("ˈɪldɹɛn", word2ipa_en("children")?);
assert_eq!("ˈaʊə", word2ipa_en("hour")?);
assert_eq!("dˈeɪz", word2ipa_en("days")?);
Ok(())
}
#[cfg(feature = "use-cmudict")]
#[test]
fn test_word2ipa_en_is_case_insensitive_for_dictionary_words() -> Result<(), super::G2PError> {
use super::word2ipa_en;
assert_eq!(word2ipa_en("Welcome")?, word2ipa_en("welcome")?);
Ok(())
}
#[test]
fn test_g2p() -> Result<(), super::G2PError> {
use super::g2p;
assert_eq!("ni↓xau↓ ʂɻ↘ʨje↘", g2p("你好世界", false)?);
assert_eq!("ㄋㄧ2ㄏㄠ3/ㄕ十4ㄐㄝ4", g2p("你好世界", true)?);
Ok(())
}
}

62
rust/vendor/kokoro-tts/src/g2p/v10.rs vendored Normal file
View File

@@ -0,0 +1,62 @@
use crate::{G2PError, pinyin_to_ipa};
fn retone(p: &str) -> String {
let chars: Vec<char> = p.chars().collect();
let mut result = String::with_capacity(p.len());
let mut i = 0;
while i < chars.len() {
match () {
// 三声调优先处理
_ if i + 2 < chars.len()
&& chars[i] == '˧'
&& chars[i + 1] == '˩'
&& chars[i + 2] == '˧' =>
{
result.push('↓');
i += 3;
}
// 二声调
_ if i + 1 < chars.len() && chars[i] == '˧' && chars[i + 1] == '˥' => {
result.push('↗');
i += 2;
}
// 四声调
_ if i + 1 < chars.len() && chars[i] == '˥' && chars[i + 1] == '˩' => {
result.push('↘');
i += 2;
}
// 一声调
_ if chars[i] == '˥' => {
result.push('→');
i += 1;
}
// 组合字符替换(ɻ̩ 和 ɱ̩)
_ if !(i + 1 >= chars.len() || chars[i+1] != '\u{0329}' || chars[i] != '\u{027B}' && chars[i] != '\u{0271}') =>
{
result.push('ɨ');
i += 2;
}
// 默认情况
_ => {
result.push(chars[i]);
i += 1;
}
}
}
assert!(
!result.contains('\u{0329}'),
"Unexpected combining mark in: {}",
result
);
result
}
pub(super) fn py2ipa(py: &str) -> Result<String, G2PError> {
pinyin_to_ipa(py)?
.first()
.map_or(Err(G2PError::EnptyData), |i| {
Ok(i.iter().map(|i| retone(i)).collect::<String>())
})
}

1263
rust/vendor/kokoro-tts/src/g2p/v11.rs vendored Normal file

File diff suppressed because it is too large Load Diff

83
rust/vendor/kokoro-tts/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,83 @@
mod error;
mod g2p;
mod stream;
mod synthesizer;
mod tokenizer;
mod transcription;
mod voice;
use {
bincode::{config::standard, decode_from_slice},
ort::{execution_providers::CUDAExecutionProvider, session::Session},
std::{collections::HashMap, path::Path, sync::Arc, time::Duration},
tokio::{fs::read, sync::Mutex},
};
pub use {error::*, g2p::*, stream::*, tokenizer::*, transcription::*, voice::*};
pub struct KokoroTts {
model: Arc<Mutex<Session>>,
voices: Arc<HashMap<String, Vec<Vec<Vec<f32>>>>>,
}
impl KokoroTts {
pub async fn new<P: AsRef<Path>>(model_path: P, voices_path: P) -> Result<Self, KokoroError> {
let voices = read(voices_path).await?;
let (voices, _) = decode_from_slice(&voices, standard())?;
let model = Session::builder()?
.with_execution_providers([CUDAExecutionProvider::default().build()])?
.commit_from_file(model_path)?;
Ok(Self {
model: Arc::new(model.into()),
voices,
})
}
pub async fn new_from_bytes<B>(model: B, voices: B) -> Result<Self, KokoroError>
where
B: AsRef<[u8]>,
{
let (voices, _) = decode_from_slice(voices.as_ref(), standard())?;
let model = Session::builder()?
.with_execution_providers([CUDAExecutionProvider::default().build()])?
.commit_from_memory(model.as_ref())?;
Ok(Self {
model: Arc::new(model.into()),
voices,
})
}
pub async fn synth<S>(&self, text: S, voice: Voice) -> Result<(Vec<f32>, Duration), KokoroError>
where
S: AsRef<str>,
{
let name = voice.get_name();
let pack = self
.voices
.get(name)
.ok_or(KokoroError::VoiceNotFound(name.to_owned()))?;
synthesizer::synth(Arc::downgrade(&self.model), text, pack, voice).await
}
pub fn stream<S>(&self, voice: Voice) -> (SynthSink<S>, SynthStream)
where
S: AsRef<str> + Send + 'static,
{
let voices = Arc::downgrade(&self.voices);
let model = Arc::downgrade(&self.model);
start_synth_session(voice, move |text, voice| {
let voices = voices.clone();
let model = model.clone();
async move {
let name = voice.get_name();
let voices = voices.upgrade().ok_or(KokoroError::ModelReleased)?;
let pack = voices
.get(name)
.ok_or(KokoroError::VoiceNotFound(name.to_owned()))?;
synthesizer::synth(model, text, pack, voice).await
}
})
}
}

157
rust/vendor/kokoro-tts/src/stream.rs vendored Normal file
View File

@@ -0,0 +1,157 @@
use {
crate::{KokoroError, Voice},
futures::{Sink, SinkExt, Stream},
pin_project::pin_project,
std::{
pin::Pin,
task::{Context, Poll},
time::Duration,
},
tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
};
struct Request<S> {
voice: Voice,
text: S,
}
struct Response {
data: Vec<f32>,
took: Duration,
}
/// 语音合成流
///
/// 该结构体用于通过流式合成来处理更长的文本。它实现了`Stream` trait可以用于异步迭代合成后的音频数据。
#[pin_project]
pub struct SynthStream {
#[pin]
rx: UnboundedReceiver<Response>,
}
impl Stream for SynthStream {
type Item = (Vec<f32>, Duration);
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
Pin::new(&mut self.project().rx)
.poll_recv(cx)
.map(|i| i.map(|Response { data, took }| (data, took)))
}
}
/// 语音合成发送端
///
/// 该结构体用于发送语音合成请求。它实现了`Sink` trait可以用于异步发送合成请求。
#[pin_project]
pub struct SynthSink<S> {
tx: UnboundedSender<Request<S>>,
voice: Voice,
}
impl<S> SynthSink<S> {
/// 设置语音名称
///
/// 该方法用于设置要合成的语音名称。
///
/// # 参数
///
/// * `voice_name` - 语音名称,用于选择要合成的语音。
///
/// # 示例
///
/// ```rust
/// use kokoro_tts::{KokoroTts, Voice};
///
/// #[tokio::main]
/// async fn main() {
/// let Ok(tts) = KokoroTts::new("../kokoro-v1.0.int8.onnx", "../voices.bin").await else {
/// return;
/// };
/// // speed: 1.0
/// let (mut sink, _) = tts.stream::<&str>(Voice::ZfXiaoxiao(1.0));
/// // speed: 1.8
/// sink.set_voice(Voice::ZmYunxi(1.8));
/// }
/// ```
///
pub fn set_voice(&mut self, voice: Voice) {
self.voice = voice
}
/// 发送合成请求
///
/// 该方法用于发送语音合成请求。
///
/// # 参数
///
/// * `text` - 要合成的文本内容。
///
/// # 返回值
///
/// 如果发送成功,将返回`Ok(())`;如果发送失败,将返回一个`KokoroError`类型的错误。
///
/// # 示例
///
/// ```rust
/// use kokoro_tts::{KokoroTts, Voice};
///
/// #[tokio::main]
/// async fn main() {
/// let Ok(tts) = KokoroTts::new("../kokoro-v1.1-zh.onnx", "../voices-v1.1-zh.bin").await else {
/// return;
/// };
/// let (mut sink, _) =tts.stream(Voice::Zf003(2));
/// let _ = sink.synth("hello world.").await;
/// }
/// ```
///
pub async fn synth(&mut self, text: S) -> Result<(), KokoroError> {
self.send((self.voice, text)).await
}
}
impl<S> Sink<(Voice, S)> for SynthSink<S> {
type Error = KokoroError;
fn poll_ready(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
fn start_send(self: Pin<&mut Self>, (voice, text): (Voice, S)) -> Result<(), Self::Error> {
self.tx
.send(Request { voice, text })
.map_err(|e| KokoroError::Send(e.to_string()))
}
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
fn poll_close(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
}
pub(super) fn start_synth_session<F, R, S>(
voice: Voice,
synth_request_callback: F,
) -> (SynthSink<S>, SynthStream)
where
F: Fn(S, Voice) -> R + Send + 'static,
R: Future<Output = Result<(Vec<f32>, Duration), KokoroError>> + Send,
S: AsRef<str> + Send + 'static,
{
let (tx, mut rx) = unbounded_channel::<Request<S>>();
let (tx2, rx2) = unbounded_channel();
tokio::spawn(async move {
while let Some(req) = rx.recv().await {
let (data, took) = synth_request_callback(req.text, req.voice).await?;
tx2.send(Response { data, took })
.map_err(|e| KokoroError::Send(e.to_string()))?;
}
Ok::<_, KokoroError>(())
});
(SynthSink { tx, voice }, SynthStream { rx: rx2 })
}

View File

@@ -0,0 +1,123 @@
use {
crate::{KokoroError, Voice, g2p, get_token_ids},
ndarray::Array,
ort::{
inputs,
session::{RunOptions, Session},
value::TensorRef,
},
std::{
cmp::min,
sync::Weak,
time::{Duration, SystemTime},
},
tokio::sync::Mutex,
};
async fn synth_v10<P, S>(
model: Weak<Mutex<Session>>,
phonemes: S,
pack: P,
speed: f32,
) -> Result<(Vec<f32>, Duration), KokoroError>
where
P: AsRef<Vec<Vec<Vec<f32>>>>,
S: AsRef<str>,
{
let model = model.upgrade().ok_or(KokoroError::ModelReleased)?;
let phonemes = get_token_ids(phonemes.as_ref(), false);
let phonemes = Array::from_shape_vec((1, phonemes.len()), phonemes)?;
let ref_s = pack.as_ref()[phonemes.len() - 1]
.first()
.cloned()
.unwrap_or_default();
let style = Array::from_shape_vec((1, ref_s.len()), ref_s)?;
let speed = Array::from_vec(vec![speed]);
let options = RunOptions::new()?;
let mut model = model.lock().await;
let t = SystemTime::now();
let kokoro_output = model
.run_async(
inputs![
"tokens" => TensorRef::from_array_view(&phonemes)?,
"style" => TensorRef::from_array_view(&style)?,
"speed" => TensorRef::from_array_view(&speed)?,
],
&options,
)?
.await?;
let elapsed = t.elapsed()?;
let (_, audio) = kokoro_output["audio"].try_extract_tensor::<f32>()?;
Ok((audio.to_owned(), elapsed))
}
async fn synth_v11<P, S>(
model: Weak<Mutex<Session>>,
phonemes: S,
pack: P,
speed: i32,
) -> Result<(Vec<f32>, Duration), KokoroError>
where
P: AsRef<Vec<Vec<Vec<f32>>>>,
S: AsRef<str>,
{
let model = model.upgrade().ok_or(KokoroError::ModelReleased)?;
let mut phonemes = get_token_ids(phonemes.as_ref(), true);
let mut ret = Vec::new();
let mut elapsed = Duration::ZERO;
while let p = phonemes.drain(..min(pack.as_ref().len(), phonemes.len()))
&& p.len() != 0
{
let phonemes = Array::from_shape_vec((1, p.len()), p.collect())?;
let ref_s = pack.as_ref()[phonemes.len() - 1]
.first()
.cloned()
.unwrap_or(vec![0.; 256]);
let style = Array::from_shape_vec((1, ref_s.len()), ref_s)?;
let speed = Array::from_vec(vec![speed]);
let options = RunOptions::new()?;
let mut model = model.lock().await;
let t = SystemTime::now();
let kokoro_output = model
.run_async(
inputs![
"input_ids" => TensorRef::from_array_view(&phonemes)?,
"style" => TensorRef::from_array_view(&style)?,
"speed" => TensorRef::from_array_view(&speed)?,
],
&options,
)?
.await?;
elapsed = t.elapsed()?;
let (_, audio) = kokoro_output["waveform"].try_extract_tensor::<f32>()?;
let (_, _duration) = kokoro_output["duration"].try_extract_tensor::<i64>()?;
// let _ = dbg!(duration.len());
ret.extend_from_slice(audio);
}
Ok((ret, elapsed))
}
pub(super) async fn synth<P, S>(
model: Weak<Mutex<Session>>,
text: S,
pack: P,
voice: Voice,
) -> Result<(Vec<f32>, Duration), KokoroError>
where
P: AsRef<Vec<Vec<Vec<f32>>>>,
S: AsRef<str>,
{
let phonemes = g2p(text.as_ref(), voice.is_v11_supported())?;
// #[cfg(debug_assertions)]
// println!("{}", phonemes);
match voice {
v if v.is_v11_supported() => synth_v11(model, phonemes, pack, v.get_speed_v11()?).await,
v if v.is_v10_supported() => synth_v10(model, phonemes, pack, v.get_speed_v10()?).await,
v => Err(KokoroError::VoiceVersionInvalid(v.get_name().to_owned())),
}
}

324
rust/vendor/kokoro-tts/src/tokenizer.rs vendored Normal file
View File

@@ -0,0 +1,324 @@
use {
log::warn,
std::{collections::HashMap, sync::LazyLock},
};
static VOCAB_V10: LazyLock<HashMap<char, u8>> = LazyLock::new(|| {
let mut map = HashMap::new();
map.insert(';', 1);
map.insert(':', 2);
map.insert(',', 3);
map.insert('.', 4);
map.insert('!', 5);
map.insert('?', 6);
map.insert('—', 9);
map.insert('…', 10);
map.insert('"', 11);
map.insert('(', 12);
map.insert(')', 13);
map.insert('“', 14);
map.insert('”', 15);
map.insert(' ', 16);
map.insert('\u{0303}', 17); // Unicode escape for combining tilde
map.insert('ʣ', 18);
map.insert('ʥ', 19);
map.insert('ʦ', 20);
map.insert('ʨ', 21);
map.insert('ᵝ', 22);
map.insert('\u{AB67}', 23); // Unicode escape
map.insert('A', 24);
map.insert('I', 25);
map.insert('O', 31);
map.insert('Q', 33);
map.insert('S', 35);
map.insert('T', 36);
map.insert('W', 39);
map.insert('Y', 41);
map.insert('ᵊ', 42);
map.insert('a', 43);
map.insert('b', 44);
map.insert('c', 45);
map.insert('d', 46);
map.insert('e', 47);
map.insert('f', 48);
map.insert('h', 50);
map.insert('i', 51);
map.insert('j', 52);
map.insert('k', 53);
map.insert('l', 54);
map.insert('m', 55);
map.insert('n', 56);
map.insert('o', 57);
map.insert('p', 58);
map.insert('q', 59);
map.insert('r', 60);
map.insert('s', 61);
map.insert('t', 62);
map.insert('u', 63);
map.insert('v', 64);
map.insert('w', 65);
map.insert('x', 66);
map.insert('y', 67);
map.insert('z', 68);
map.insert('ɑ', 69);
map.insert('ɐ', 70);
map.insert('ɒ', 71);
map.insert('æ', 72);
map.insert('β', 75);
map.insert('ɔ', 76);
map.insert('ɕ', 77);
map.insert('ç', 78);
map.insert('ɖ', 80);
map.insert('ð', 81);
map.insert('ʤ', 82);
map.insert('ə', 83);
map.insert('ɚ', 85);
map.insert('ɛ', 86);
map.insert('ɜ', 87);
map.insert('ɟ', 90);
map.insert('ɡ', 92);
map.insert('ɥ', 99);
map.insert('ɨ', 101);
map.insert('ɪ', 102);
map.insert('ʝ', 103);
map.insert('ɯ', 110);
map.insert('ɰ', 111);
map.insert('ŋ', 112);
map.insert('ɳ', 113);
map.insert('ɲ', 114);
map.insert('ɴ', 115);
map.insert('ø', 116);
map.insert('ɸ', 118);
map.insert('θ', 119);
map.insert('œ', 120);
map.insert('ɹ', 123);
map.insert('ɾ', 125);
map.insert('ɻ', 126);
map.insert('ʁ', 128);
map.insert('ɽ', 129);
map.insert('ʂ', 130);
map.insert('ʃ', 131);
map.insert('ʈ', 132);
map.insert('ʧ', 133);
map.insert('ʊ', 135);
map.insert('ʋ', 136);
map.insert('ʌ', 138);
map.insert('ɣ', 139);
map.insert('ɤ', 140);
map.insert('χ', 142);
map.insert('ʎ', 143);
map.insert('ʒ', 147);
map.insert('ʔ', 148);
map.insert('ˈ', 156);
map.insert('ˌ', 157);
map.insert('ː', 158);
map.insert('ʰ', 162);
map.insert('ʲ', 164);
map.insert('↓', 169);
map.insert('→', 171);
map.insert('↗', 172);
map.insert('↘', 173);
map.insert('ᵻ', 177);
map
});
static VOCAB_V11: LazyLock<HashMap<char, u8>> = LazyLock::new(|| {
let mut map = HashMap::new();
map.insert(';', 1);
map.insert(':', 2);
map.insert(',', 3);
map.insert('.', 4);
map.insert('!', 5);
map.insert('?', 6);
map.insert('/', 7);
map.insert('—', 9);
map.insert('…', 10);
map.insert('"', 11);
map.insert('(', 12);
map.insert(')', 13);
map.insert('“', 14);
map.insert('”', 15);
map.insert(' ', 16);
map.insert('\u{0303}', 17); // Unicode escape for combining tilde
map.insert('ʣ', 18);
map.insert('ʥ', 19);
map.insert('ʦ', 20);
map.insert('ʨ', 21);
map.insert('ᵝ', 22);
map.insert('ㄓ', 23);
map.insert('A', 24);
map.insert('I', 25);
map.insert('ㄅ', 30);
map.insert('O', 31);
map.insert('ㄆ', 32);
map.insert('Q', 33);
map.insert('R', 34);
map.insert('S', 35);
map.insert('T', 36);
map.insert('ㄇ', 37);
map.insert('ㄈ', 38);
map.insert('W', 39);
map.insert('ㄉ', 40);
map.insert('Y', 41);
map.insert('ᵊ', 42);
map.insert('a', 43);
map.insert('b', 44);
map.insert('c', 45);
map.insert('d', 46);
map.insert('e', 47);
map.insert('f', 48);
map.insert('ㄊ', 49);
map.insert('h', 50);
map.insert('i', 51);
map.insert('j', 52);
map.insert('k', 53);
map.insert('l', 54);
map.insert('m', 55);
map.insert('n', 56);
map.insert('o', 57);
map.insert('p', 58);
map.insert('q', 59);
map.insert('r', 60);
map.insert('s', 61);
map.insert('t', 62);
map.insert('u', 63);
map.insert('v', 64);
map.insert('w', 65);
map.insert('x', 66);
map.insert('y', 67);
map.insert('z', 68);
map.insert('ɑ', 69);
map.insert('ɐ', 70);
map.insert('ɒ', 71);
map.insert('æ', 72);
map.insert('ㄋ', 73);
map.insert('ㄌ', 74);
map.insert('β', 75);
map.insert('ɔ', 76);
map.insert('ɕ', 77);
map.insert('ç', 78);
map.insert('ㄍ', 79);
map.insert('ɖ', 80);
map.insert('ð', 81);
map.insert('ʤ', 82);
map.insert('ə', 83);
map.insert('ㄎ', 84);
map.insert('ㄦ', 85);
map.insert('ɛ', 86);
map.insert('ɜ', 87);
map.insert('ㄏ', 88);
map.insert('ㄐ', 89);
map.insert('ɟ', 90);
map.insert('ㄑ', 91);
map.insert('ɡ', 92);
map.insert('ㄒ', 93);
map.insert('ㄔ', 94);
map.insert('ㄕ', 95);
map.insert('ㄗ', 96);
map.insert('ㄘ', 97);
map.insert('ㄙ', 98);
map.insert('月', 99);
map.insert('ㄚ', 100);
map.insert('ɨ', 101);
map.insert('ɪ', 102);
map.insert('ʝ', 103);
map.insert('ㄛ', 104);
map.insert('ㄝ', 105);
map.insert('ㄞ', 106);
map.insert('ㄟ', 107);
map.insert('ㄠ', 108);
map.insert('ㄡ', 109);
map.insert('ɯ', 110);
map.insert('ɰ', 111);
map.insert('ŋ', 112);
map.insert('ɳ', 113);
map.insert('ɲ', 114);
map.insert('ɴ', 115);
map.insert('ø', 116);
map.insert('ㄢ', 117);
map.insert('ɸ', 118);
map.insert('θ', 119);
map.insert('œ', 120);
map.insert('ㄣ', 121);
map.insert('ㄤ', 122);
map.insert('ɹ', 123);
map.insert('ㄥ', 124);
map.insert('ɾ', 125);
map.insert('ㄖ', 126);
map.insert('ㄧ', 127);
map.insert('ʁ', 128);
map.insert('ɽ', 129);
map.insert('ʂ', 130);
map.insert('ʃ', 131);
map.insert('ʈ', 132);
map.insert('ʧ', 133);
map.insert('ㄨ', 134);
map.insert('ʊ', 135);
map.insert('ʋ', 136);
map.insert('ㄩ', 137);
map.insert('ʌ', 138);
map.insert('ɣ', 139);
map.insert('ㄜ', 140);
map.insert('ㄭ', 141);
map.insert('χ', 142);
map.insert('ʎ', 143);
map.insert('十', 144);
map.insert('压', 145);
map.insert('言', 146);
map.insert('ʒ', 147);
map.insert('ʔ', 148);
map.insert('阳', 149);
map.insert('要', 150);
map.insert('阴', 151);
map.insert('应', 152);
map.insert('用', 153);
map.insert('又', 154);
map.insert('中', 155);
map.insert('ˈ', 156);
map.insert('ˌ', 157);
map.insert('ː', 158);
map.insert('穵', 159);
map.insert('外', 160);
map.insert('万', 161);
map.insert('ʰ', 162);
map.insert('王', 163);
map.insert('ʲ', 164);
map.insert('为', 165);
map.insert('文', 166);
map.insert('瓮', 167);
map.insert('我', 168);
map.insert('3', 169);
map.insert('5', 170);
map.insert('1', 171);
map.insert('2', 172);
map.insert('4', 173);
map.insert('元', 175);
map.insert('云', 176);
map.insert('ᵻ', 177);
map
});
pub fn get_token_ids(phonemes: &str, v11: bool) -> Vec<i64> {
let mut tokens = Vec::with_capacity(phonemes.len() + 2);
tokens.push(0);
for i in phonemes.chars() {
let v = if v11 {
VOCAB_V11.get(&i).copied()
} else {
VOCAB_V10.get(&i).copied()
};
match v {
Some(t) => {
tokens.push(t as _);
}
_ => {
warn!("Unknown phone {}, skipped.", i);
}
}
}
tokens.push(0);
tokens
}

View File

@@ -0,0 +1,4 @@
mod en;
mod zh;
pub use {en::*, zh::*};

View File

@@ -0,0 +1,147 @@
use regex::Regex;
use std::{collections::HashMap, sync::LazyLock};
static LETTERS_IPA_MAP: LazyLock<HashMap<char, &'static str>> = LazyLock::new(|| {
let mut map = HashMap::new();
map.insert('a', "ɐ");
map.insert('b', "bˈi");
map.insert('c', "sˈi");
map.insert('d', "dˈi");
map.insert('e', "ˈi");
map.insert('f', "ˈɛf");
map.insert('g', "ʤˈi");
map.insert('h', "ˈ");
map.insert('i', "ˈI");
map.insert('j', "ʤˈA");
map.insert('k', "kˈA");
map.insert('l', "ˈɛl");
map.insert('m', "ˈɛm");
map.insert('n', "ˈɛn");
map.insert('o', "ˈO");
map.insert('p', "pˈi");
map.insert('q', "kjˈu");
map.insert('r', "ˈɑɹ");
map.insert('s', "ˈɛs");
map.insert('t', "tˈi");
map.insert('u', "jˈu");
map.insert('v', "vˈi");
map.insert('w', "dˈʌbᵊlju");
map.insert('x', "ˈɛks");
map.insert('y', "wˈI");
map.insert('z', "zˈi");
map.insert('A', "ˈA");
map.insert('B', "bˈi");
map.insert('C', "sˈi");
map.insert('D', "dˈi");
map.insert('E', "ˈi");
map.insert('F', "ˈɛf");
map.insert('G', "ʤˈi");
map.insert('H', "ˈ");
map.insert('I', "ˈI");
map.insert('J', "ʤˈA");
map.insert('K', "kˈA");
map.insert('L', "ˈɛl");
map.insert('M', "ˈɛm");
map.insert('N', "ˈɛn");
map.insert('O', "ˈO");
map.insert('P', "pˈi");
map.insert('Q', "kjˈu");
map.insert('R', "ˈɑɹ");
map.insert('S', "ˈɛs");
map.insert('T', "tˈi");
map.insert('U', "jˈu");
map.insert('V', "vˈi");
map.insert('W', "dˈʌbᵊlju");
map.insert('X', "ˈɛks");
map.insert('Y', "wˈI");
map.insert('Z', "zˈi");
map
});
static ARPA_IPA_MAP: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
let mut map = HashMap::new();
map.insert("AA", "ɑ");
map.insert("AE", "æ");
map.insert("AH", "ə");
map.insert("AO", "ɔ");
map.insert("AW", "");
map.insert("AY", "aɪ");
map.insert("B", "b");
map.insert("CH", "");
map.insert("D", "d");
map.insert("DH", "ð");
map.insert("EH", "ɛ");
map.insert("ER", "ɝ");
map.insert("EY", "eɪ");
map.insert("F", "f");
map.insert("G", "ɡ");
map.insert("HH", "h");
map.insert("IH", "ɪ");
map.insert("IY", "i");
map.insert("JH", "");
map.insert("K", "k");
map.insert("L", "l");
map.insert("M", "m");
map.insert("N", "n");
map.insert("NG", "ŋ");
map.insert("OW", "");
map.insert("OY", "ɔɪ");
map.insert("P", "p");
map.insert("R", "ɹ");
map.insert("S", "s");
map.insert("SH", "ʃ");
map.insert("T", "t");
map.insert("TH", "θ");
map.insert("UH", "ʊ");
map.insert("UW", "u");
map.insert("V", "v");
map.insert("W", "w");
map.insert("Y", "j");
map.insert("Z", "z");
map.insert("ZH", "ʒ");
map.insert("SIL", "");
map
});
/// 支持2025新增符号吸气音ʘ
const SPECIAL_CASES: [(&str, &str); 3] = [("CLICK!", "ʘ"), ("TSK!", "ǀ"), ("TUT!", "ǁ")];
pub fn arpa_to_ipa(arpa: &str) -> Result<String, regex::Error> {
let re = Regex::new(r"([A-Z!]+)(\d*)")?;
let Some(caps) = re.captures(arpa) else {
return Ok(Default::default());
};
// 处理特殊符号2025新增
if let Some(sc) = SPECIAL_CASES.iter().find(|&&(s, _)| s == &caps[1]) {
return Ok(sc.1.to_string());
}
// 获取IPA映射
let phoneme = ARPA_IPA_MAP
.get(&caps[1])
.map_or_else(|| letters_to_ipa(arpa), |i| i.to_string());
let mut result = String::with_capacity(arpa.len() * 2);
// 添加重音标记(支持三级重音)
result.push(match &caps[2] {
"1" => 'ˈ',
"2" => 'ˌ',
"3" => '˧', // 2025新增中级重音
_ => '\0',
});
result.push_str(&phoneme);
Ok(result)
}
pub fn letters_to_ipa(letters: &str) -> String {
let mut res = String::with_capacity(letters.len());
for i in letters.chars() {
if let Some(p) = LETTERS_IPA_MAP.get(&i) {
res.push_str(p);
}
}
res
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,364 @@
/// 汉语拼音到国际音标的转换
/// 参考了python的misaki库的zh.py。
use std::{collections::HashMap, error::Error, fmt, sync::LazyLock};
const VALID_FINALS: [&str; 37] = [
"i", "u", "ü", "a", "ia", "ua", "o", "uo", "e", "ie", "üe", "ai", "uai", "ei", "uei", "ao",
"iao", "ou", "iou", "an", "ian", "uan", "üan", "en", "in", "uen", "ün", "ang", "iang", "uang",
"eng", "ing", "ueng", "ong", "iong", "er", "ê",
];
const INITIALS: [&str; 21] = [
"zh", "ch", "sh", "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s",
"t", "x", "z",
];
// 错误类型定义
#[derive(Debug)]
pub enum PinyinError {
FinalNotFound(String),
}
impl fmt::Display for PinyinError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
PinyinError::FinalNotFound(tip) => write!(f, "Final not found: {}", tip),
}
}
}
impl Error for PinyinError {}
static INITIAL_MAPPING: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
LazyLock::new(|| {
let mut map = HashMap::new();
map.insert("b", vec![vec!["p"]]);
map.insert("c", vec![vec!["ʦʰ"]]);
map.insert("ch", vec![vec!["ꭧʰ"]]);
map.insert("d", vec![vec!["t"]]);
map.insert("f", vec![vec!["f"]]);
map.insert("g", vec![vec!["k"]]);
map.insert("h", vec![vec!["x"], vec!["h"]]);
map.insert("j", vec![vec!["ʨ"]]);
map.insert("k", vec![vec![""]]);
map.insert("l", vec![vec!["l"]]);
map.insert("m", vec![vec!["m"]]);
map.insert("n", vec![vec!["n"]]);
map.insert("p", vec![vec![""]]);
map.insert("q", vec![vec!["ʨʰ"]]);
map.insert("r", vec![vec!["ɻ"], vec!["ʐ"]]);
map.insert("s", vec![vec!["s"]]);
map.insert("sh", vec![vec!["ʂ"]]);
map.insert("t", vec![vec![""]]);
map.insert("x", vec![vec!["ɕ"]]);
map.insert("z", vec![vec!["ʦ"]]);
map.insert("zh", vec![vec![""]]);
map
});
static SYLLABIC_CONSONANT_MAPPINGS: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
LazyLock::new(|| {
let mut map = HashMap::new();
map.insert("hm", vec![vec!["h", "m0"]]);
map.insert("hng", vec![vec!["h", "ŋ0"]]);
map.insert("m", vec![vec!["m0"]]);
map.insert("n", vec![vec!["n0"]]);
map.insert("ng", vec![vec!["ŋ0"]]);
map
});
static INTERJECTION_MAPPINGS: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
LazyLock::new(|| {
let mut map = HashMap::new();
map.insert("io", vec![vec!["j", "ɔ0"]]);
map.insert("ê", vec![vec!["ɛ0"]]);
map.insert("er", vec![vec!["ɚ0"], vec!["aɚ̯0"]]);
map.insert("o", vec![vec!["ɔ0"]]);
map
});
/// Duanmu (2000, p. 37) and Lin (2007, p. 68f)
/// Diphtongs from Duanmu (2007, p. 40): au, əu, əi, ai
/// Diphthongs from Lin (2007, p. 68f): au̯, ou̯, ei̯, ai̯
static FINAL_MAPPING: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
LazyLock::new(|| {
let mut map = HashMap::new();
map.insert("a", vec![vec!["a0"]]);
map.insert("ai", vec![vec!["ai0"]]);
map.insert("an", vec![vec!["a0", "n"]]);
map.insert("ang", vec![vec!["a0", "ŋ"]]);
map.insert("ao", vec![vec!["au0"]]);
map.insert("e", vec![vec!["ɤ0"]]);
map.insert("ei", vec![vec!["ei0"]]);
map.insert("en", vec![vec!["ə0", "n"]]);
map.insert("eng", vec![vec!["ə0", "ŋ"]]);
map.insert("i", vec![vec!["i0"]]);
map.insert("ia", vec![vec!["j", "a0"]]);
map.insert("ian", vec![vec!["j", "ɛ0", "n"]]);
map.insert("iang", vec![vec!["j", "a0", "ŋ"]]);
map.insert("iao", vec![vec!["j", "au0"]]);
map.insert("ie", vec![vec!["j", "e0"]]);
map.insert("in", vec![vec!["i0", "n"]]);
map.insert("iou", vec![vec!["j", "ou0"]]);
map.insert("ing", vec![vec!["i0", "ŋ"]]);
map.insert("iong", vec![vec!["j", "ʊ0", "ŋ"]]);
map.insert("ong", vec![vec!["ʊ0", "ŋ"]]);
map.insert("ou", vec![vec!["ou0"]]);
map.insert("u", vec![vec!["u0"]]);
map.insert("uei", vec![vec!["w", "ei0"]]);
map.insert("ua", vec![vec!["w", "a0"]]);
map.insert("uai", vec![vec!["w", "ai0"]]);
map.insert("uan", vec![vec!["w", "a0", "n"]]);
map.insert("uen", vec![vec!["w", "ə0", "n"]]);
map.insert("uang", vec![vec!["w", "a0", "ŋ"]]);
map.insert("ueng", vec![vec!["w", "ə0", "ŋ"]]);
map.insert("ui", vec![vec!["w", "ei0"]]);
map.insert("un", vec![vec!["w", "ə0", "n"]]);
map.insert("uo", vec![vec!["w", "o0"]]);
map.insert("o", vec![vec!["w", "o0"]]); // 注意:这里'o'的映射可能与预期不符,根据注释可能需要特殊处理
map.insert("ü", vec![vec!["y0"]]);
map.insert("üe", vec![vec!["ɥ", "e0"]]);
map.insert("üan", vec![vec!["ɥ", "ɛ0", "n"]]);
map.insert("ün", vec![vec!["y0", "n"]]);
map
});
static FINAL_MAPPING_AFTER_ZH_CH_SH_R: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
LazyLock::new(|| {
let mut map = HashMap::new();
map.insert("i", vec![vec!["ɻ0"], vec!["ʐ0"]]);
map
});
static FINAL_MAPPING_AFTER_Z_C_S: LazyLock<HashMap<&'static str, Vec<Vec<&'static str>>>> =
LazyLock::new(|| {
let mut map = HashMap::new();
map.insert("i", vec![vec!["ɹ0"], vec!["z0"]]);
map
});
static TONE_MAPPING: LazyLock<HashMap<u8, &'static str>> = LazyLock::new(|| {
let mut map = HashMap::new();
map.insert(1u8, "˥");
map.insert(2u8, "˧˥");
map.insert(3u8, "˧˩˧");
map.insert(4u8, "˥˩");
map.insert(5u8, "");
map
});
pub(crate) fn split_tone(pinyin: &str) -> (&str, u8) {
if let Some(t) = pinyin
.chars()
.last()
.and_then(|c| c.to_digit(10).map(|n| n as u8))
{
return (&pinyin[..pinyin.len() - 1], t);
}
(pinyin, 5)
}
/// uen 转换,还原原始的韵母
/// iouueiuen前面加声母的时候写成iuuiun。
/// 例如niu(牛)gui(归)lun(论)。
fn convert_uen(s: &str) -> String {
match s.strip_suffix('n') {
Some(stem) if stem.ends_with(['u', 'ū', 'ú', 'ǔ', 'ù']) => {
format!("{}en", stem)
}
_ => s.to_string(),
}
}
/// ü 转换,还原原始的韵母
/// ü行的韵母跟声母jqx拼的时候写成ju(居)qu(区)xu(虚) ü上两点也省略;
/// 但是跟声母nl拼的时候仍然写成nü(女)lü(吕)
fn convert_uv(pinyin: &str) -> String {
let chars = pinyin.chars().collect::<Vec<_>>();
match chars.as_slice() {
[
c @ ('j' | 'q' | 'x'),
tone @ ('u' | 'ū' | 'ú' | 'ǔ' | 'ù'),
rest @ ..,
] => {
let new_tone = match tone {
'u' => 'ü',
'ū' => 'ǖ',
'ú' => 'ǘ',
'ǔ' => 'ǚ',
'ù' => 'ǜ',
_ => unreachable!(),
};
format!("{}{}{}", c, new_tone, rest.iter().collect::<String>())
}
_ => pinyin.to_string(),
}
}
/// iou 转换,还原原始的韵母
/// iouueiuen前面加声母的时候写成iuuiun。
/// 例如niu(牛)gui(归)lun(论)。
fn convert_iou(pinyin: &str) -> String {
let chars = pinyin.chars().collect::<Vec<_>>();
match chars.as_slice() {
// 处理 iu 系列
[.., 'i', u @ ('u' | 'ū' | 'ú' | 'ǔ' | 'ù')] => {
format!("{}o{}", &pinyin[..pinyin.len() - 1], u)
}
// 其他情况保持原样
_ => pinyin.to_string(),
}
}
/// uei 转换,还原原始的韵母
/// iouueiuen前面加声母的时候写成iuuiun。
/// 例如niu(牛)gui(归)lun(论)。
fn convert_uei(pinyin: &str) -> String {
let chars = pinyin.chars().collect::<Vec<_>>();
match chars.as_slice() {
// 处理 ui 系列
[.., 'u', i @ ('i' | 'ī' | 'í' | 'ǐ' | 'ì')] => {
format!("{}e{}", &pinyin[..pinyin.len() - 1], i)
}
// 其他情况保持原样
_ => pinyin.to_string(),
}
}
/// 零声母转换,还原原始的韵母
/// i行的韵母前面没有声母的时候写成yi(衣)ya(呀)ye(耶)yao(腰)you(忧)yan(烟)yin(因)yang(央)ying(英)yong(雍)。
/// u行的韵母前面没有声母的时候写成wu(乌)wa(蛙)wo(窝)wai(歪)wei(威)wan(弯)wen(温)wang(汪)weng(翁)。
/// ü行的韵母前面没有声母的时候写成yu(迂)yue(约)yuan(冤)yun(晕);ü上两点省略。"""
pub(crate) fn convert_zero_consonant(pinyin: &str) -> String {
let mut buffer = String::with_capacity(pinyin.len() + 2);
let chars: Vec<char> = pinyin.chars().collect();
match chars.as_slice() {
// 处理Y系转换
['y', 'u', rest @ ..] => {
buffer.push('ü');
buffer.extend(rest.iter());
}
['y', u @ ('ū' | 'ú' | 'ǔ' | 'ù'), rest @ ..] => {
buffer.push(match u {
'ū' => 'ǖ', // ü 第一声
'ú' => 'ǘ', // ü 第二声
'ǔ' => 'ǚ', // ü 第三声
'ù' => 'ǜ', // ü 第四声
_ => unreachable!(),
});
buffer.extend(rest.iter());
}
['y', i @ ('i' | 'ī' | 'í' | 'ǐ' | 'ì'), rest @ ..] => {
buffer.push(*i);
buffer.extend(rest.iter());
}
['y', rest @ ..] => {
buffer.push('i');
buffer.extend(rest);
}
// 处理W系转换
['w', u @ ('u' | 'ū' | 'ú' | 'ǔ' | 'ù'), rest @ ..] => {
buffer.push(*u);
buffer.extend(rest.iter());
}
['w', rest @ ..] => {
buffer.push('u');
buffer.extend(rest);
}
// 无需转换的情况
_ => return pinyin.to_string(),
}
// 有效性验证
if VALID_FINALS.contains(&buffer.as_str()) {
buffer
} else {
pinyin.to_string()
}
}
pub(crate) fn split_initial(pinyin: &str) -> (&'static str, &str) {
for &initial in &INITIALS {
if let Some(stripped) = pinyin.strip_prefix(initial) {
return (initial, stripped);
}
}
("", pinyin)
}
fn apply_tone(variants: &[Vec<&str>], tone: u8) -> Vec<Vec<String>> {
let tone_str = TONE_MAPPING.get(&tone).unwrap_or(&"");
variants
.iter()
.map(|v| v.iter().map(|s| s.replace("0", tone_str)).collect())
.collect()
}
pub fn pinyin_to_ipa(pinyin: &str) -> Result<Vec<Vec<String>>, PinyinError> {
let (pinyin, tone) = split_tone(pinyin);
let pinyin = convert_zero_consonant(pinyin);
let pinyin = convert_uv(&pinyin);
let pinyin = convert_iou(&pinyin);
let pinyin = convert_uei(&pinyin);
let pinyin = convert_uen(&pinyin);
// 处理特殊成音节辅音和感叹词
if let Some(ipa) = SYLLABIC_CONSONANT_MAPPINGS.get(pinyin.as_str()) {
return Ok(apply_tone(ipa, tone)
.into_iter()
.map(|i| i.into_iter().collect())
.collect());
}
if let Some(ipa) = INTERJECTION_MAPPINGS.get(pinyin.as_str()) {
return Ok(apply_tone(ipa, tone)
.into_iter()
.map(|i| i.into_iter().collect())
.collect());
}
// 分解声母韵母
let (initial_part, final_part) = split_initial(pinyin.as_str());
// 获取韵母IPA
let final_ipa = match initial_part {
"zh" | "ch" | "sh" | "r" if FINAL_MAPPING_AFTER_ZH_CH_SH_R.contains_key(final_part) => {
FINAL_MAPPING_AFTER_ZH_CH_SH_R.get(final_part)
}
"z" | "c" | "s" if FINAL_MAPPING_AFTER_Z_C_S.contains_key(final_part) => {
FINAL_MAPPING_AFTER_Z_C_S.get(final_part)
}
_ => FINAL_MAPPING.get(final_part),
}
.ok_or(PinyinError::FinalNotFound(final_part.to_owned()))?;
// 组合所有可能
let mut result = Vec::<Vec<String>>::new();
let initials = INITIAL_MAPPING
.get(initial_part)
.map_or(vec![vec![Default::default()]], |i| {
i.iter()
.map(|i| i.iter().map(|i| i.to_string()).collect())
.collect()
});
for i in initials.into_iter() {
for j in apply_tone(final_ipa, tone).into_iter() {
result.push(
i.iter()
.chain(j.iter())
.map(|i| i.to_owned())
.collect::<Vec<_>>(),
)
}
}
Ok(result)
}

673
rust/vendor/kokoro-tts/src/voice.rs vendored Normal file
View File

@@ -0,0 +1,673 @@
use crate::KokoroError;
//noinspection SpellCheckingInspection
#[derive(Copy, Clone, Debug)]
pub enum Voice {
// v1.0
ZmYunyang(f32),
ZfXiaoni(f32),
AfJessica(f32),
BfLily(f32),
ZfXiaobei(f32),
ZmYunxia(f32),
AfHeart(f32),
BfEmma(f32),
AmPuck(f32),
BfAlice(f32),
HfAlpha(f32),
BfIsabella(f32),
AfNova(f32),
AmFenrir(f32),
EmAlex(f32),
ImNicola(f32),
PmAlex(f32),
AfAlloy(f32),
ZmYunxi(f32),
AfSarah(f32),
JfNezumi(f32),
BmDaniel(f32),
JfTebukuro(f32),
JfAlpha(f32),
JmKumo(f32),
EmSanta(f32),
AmLiam(f32),
AmSanta(f32),
AmEric(f32),
BmFable(f32),
AfBella(f32),
BmLewis(f32),
PfDora(f32),
AfNicole(f32),
BmGeorge(f32),
AmOnyx(f32),
HmPsi(f32),
HfBeta(f32),
HmOmega(f32),
ZfXiaoxiao(f32),
FfSiwis(f32),
EfDora(f32),
AfAoede(f32),
AmEcho(f32),
AmMichael(f32),
AfKore(f32),
ZfXiaoyi(f32),
JfGongitsune(f32),
AmAdam(f32),
IfSara(f32),
AfSky(f32),
PmSanta(f32),
AfRiver(f32),
ZmYunjian(f32),
// v1.1
Zm029(i32),
Zf048(i32),
Zf008(i32),
Zm014(i32),
Zf003(i32),
Zf047(i32),
Zm080(i32),
Zf094(i32),
Zf046(i32),
Zm054(i32),
Zf001(i32),
Zm062(i32),
BfVale(i32),
Zf044(i32),
Zf005(i32),
Zf028(i32),
Zf059(i32),
Zm030(i32),
Zf074(i32),
Zm009(i32),
Zf004(i32),
Zf021(i32),
Zm095(i32),
Zm041(i32),
Zf087(i32),
Zf039(i32),
Zm031(i32),
Zf007(i32),
Zf038(i32),
Zf092(i32),
Zm056(i32),
Zf099(i32),
Zm010(i32),
Zm069(i32),
Zm016(i32),
Zm068(i32),
Zf083(i32),
Zf093(i32),
Zf006(i32),
Zf026(i32),
Zm053(i32),
Zm064(i32),
AfSol(i32),
Zf042(i32),
Zf084(i32),
Zf073(i32),
Zf067(i32),
Zm025(i32),
Zm020(i32),
Zm050(i32),
Zf070(i32),
Zf002(i32),
Zf032(i32),
Zm091(i32),
Zm066(i32),
Zm089(i32),
Zm034(i32),
Zm100(i32),
Zf086(i32),
Zf040(i32),
Zm011(i32),
Zm098(i32),
Zm015(i32),
Zf051(i32),
Zm065(i32),
Zf076(i32),
Zf036(i32),
Zm033(i32),
Zf018(i32),
Zf017(i32),
Zf049(i32),
AfMaple(i32),
Zm082(i32),
Zm057(i32),
Zf079(i32),
Zf022(i32),
Zm063(i32),
Zf060(i32),
Zf019(i32),
Zm097(i32),
Zm096(i32),
Zf023(i32),
Zf027(i32),
Zf085(i32),
Zf077(i32),
Zm035(i32),
Zf088(i32),
Zf024(i32),
Zf072(i32),
Zm055(i32),
Zm052(i32),
Zf071(i32),
Zm061(i32),
Zf078(i32),
Zm013(i32),
Zm081(i32),
Zm037(i32),
Zf090(i32),
Zf043(i32),
Zm058(i32),
Zm012(i32),
Zm045(i32),
Zf075(i32),
}
impl Voice {
//noinspection SpellCheckingInspection
pub(super) fn get_name(&self) -> &str {
match self {
Self::ZmYunyang(_) => "zm_yunyang",
Self::ZfXiaoni(_) => "zf_xiaoni",
Self::AfJessica(_) => "af_jessica",
Self::BfLily(_) => "bf_lily",
Self::ZfXiaobei(_) => "zf_xiaobei",
Self::ZmYunxia(_) => "zm_yunxia",
Self::AfHeart(_) => "af_heart",
Self::BfEmma(_) => "bf_emma",
Self::AmPuck(_) => "am_puck",
Self::BfAlice(_) => "bf_alice",
Self::HfAlpha(_) => "hf_alpha",
Self::BfIsabella(_) => "bf_isabella",
Self::AfNova(_) => "af_nova",
Self::AmFenrir(_) => "am_fenrir",
Self::EmAlex(_) => "em_alex",
Self::ImNicola(_) => "im_nicola",
Self::PmAlex(_) => "pm_alex",
Self::AfAlloy(_) => "af_alloy",
Self::ZmYunxi(_) => "zm_yunxi",
Self::AfSarah(_) => "af_sarah",
Self::JfNezumi(_) => "jf_nezumi",
Self::BmDaniel(_) => "bm_daniel",
Self::JfTebukuro(_) => "jf_tebukuro",
Self::JfAlpha(_) => "jf_alpha",
Self::JmKumo(_) => "jm_kumo",
Self::EmSanta(_) => "em_santa",
Self::AmLiam(_) => "am_liam",
Self::AmSanta(_) => "am_santa",
Self::AmEric(_) => "am_eric",
Self::BmFable(_) => "bm_fable",
Self::AfBella(_) => "af_bella",
Self::BmLewis(_) => "bm_lewis",
Self::PfDora(_) => "pf_dora",
Self::AfNicole(_) => "af_nicole",
Self::BmGeorge(_) => "bm_george",
Self::AmOnyx(_) => "am_onyx",
Self::HmPsi(_) => "hm_psi",
Self::HfBeta(_) => "hf_beta",
Self::HmOmega(_) => "hm_omega",
Self::ZfXiaoxiao(_) => "zf_xiaoxiao",
Self::FfSiwis(_) => "ff_siwis",
Self::EfDora(_) => "ef_dora",
Self::AfAoede(_) => "af_aoede",
Self::AmEcho(_) => "am_echo",
Self::AmMichael(_) => "am_michael",
Self::AfKore(_) => "af_kore",
Self::ZfXiaoyi(_) => "zf_xiaoyi",
Self::JfGongitsune(_) => "jf_gongitsune",
Self::AmAdam(_) => "am_adam",
Self::IfSara(_) => "if_sara",
Self::AfSky(_) => "af_sky",
Self::PmSanta(_) => "pm_santa",
Self::AfRiver(_) => "af_river",
Self::ZmYunjian(_) => "zm_yunjian",
Self::Zm029(_) => "zm_029",
Self::Zf048(_) => "zf_048",
Self::Zf008(_) => "zf_008",
Self::Zm014(_) => "zm_014",
Self::Zf003(_) => "zf_003",
Self::Zf047(_) => "zf_047",
Self::Zm080(_) => "zm_080",
Self::Zf094(_) => "zf_094",
Self::Zf046(_) => "zf_046",
Self::Zm054(_) => "zm_054",
Self::Zf001(_) => "zf_001",
Self::Zm062(_) => "zm_062",
Self::BfVale(_) => "bf_vale",
Self::Zf044(_) => "zf_044",
Self::Zf005(_) => "zf_005",
Self::Zf028(_) => "zf_028",
Self::Zf059(_) => "zf_059",
Self::Zm030(_) => "zm_030",
Self::Zf074(_) => "zf_074",
Self::Zm009(_) => "zm_009",
Self::Zf004(_) => "zf_004",
Self::Zf021(_) => "zf_021",
Self::Zm095(_) => "zm_095",
Self::Zm041(_) => "zm_041",
Self::Zf087(_) => "zf_087",
Self::Zf039(_) => "zf_039",
Self::Zm031(_) => "zm_031",
Self::Zf007(_) => "zf_007",
Self::Zf038(_) => "zf_038",
Self::Zf092(_) => "zf_092",
Self::Zm056(_) => "zm_056",
Self::Zf099(_) => "zf_099",
Self::Zm010(_) => "zm_010",
Self::Zm069(_) => "zm_069",
Self::Zm016(_) => "zm_016",
Self::Zm068(_) => "zm_068",
Self::Zf083(_) => "zf_083",
Self::Zf093(_) => "zf_093",
Self::Zf006(_) => "zf_006",
Self::Zf026(_) => "zf_026",
Self::Zm053(_) => "zm_053",
Self::Zm064(_) => "zm_064",
Self::AfSol(_) => "af_sol",
Self::Zf042(_) => "zf_042",
Self::Zf084(_) => "zf_084",
Self::Zf073(_) => "zf_073",
Self::Zf067(_) => "zf_067",
Self::Zm025(_) => "zm_025",
Self::Zm020(_) => "zm_020",
Self::Zm050(_) => "zm_050",
Self::Zf070(_) => "zf_070",
Self::Zf002(_) => "zf_002",
Self::Zf032(_) => "zf_032",
Self::Zm091(_) => "zm_091",
Self::Zm066(_) => "zm_066",
Self::Zm089(_) => "zm_089",
Self::Zm034(_) => "zm_034",
Self::Zm100(_) => "zm_100",
Self::Zf086(_) => "zf_086",
Self::Zf040(_) => "zf_040",
Self::Zm011(_) => "zm_011",
Self::Zm098(_) => "zm_098",
Self::Zm015(_) => "zm_015",
Self::Zf051(_) => "zf_051",
Self::Zm065(_) => "zm_065",
Self::Zf076(_) => "zf_076",
Self::Zf036(_) => "zf_036",
Self::Zm033(_) => "zm_033",
Self::Zf018(_) => "zf_018",
Self::Zf017(_) => "zf_017",
Self::Zf049(_) => "zf_049",
Self::AfMaple(_) => "af_maple",
Self::Zm082(_) => "zm_082",
Self::Zm057(_) => "zm_057",
Self::Zf079(_) => "zf_079",
Self::Zf022(_) => "zf_022",
Self::Zm063(_) => "zm_063",
Self::Zf060(_) => "zf_060",
Self::Zf019(_) => "zf_019",
Self::Zm097(_) => "zm_097",
Self::Zm096(_) => "zm_096",
Self::Zf023(_) => "zf_023",
Self::Zf027(_) => "zf_027",
Self::Zf085(_) => "zf_085",
Self::Zf077(_) => "zf_077",
Self::Zm035(_) => "zm_035",
Self::Zf088(_) => "zf_088",
Self::Zf024(_) => "zf_024",
Self::Zf072(_) => "zf_072",
Self::Zm055(_) => "zm_055",
Self::Zm052(_) => "zm_052",
Self::Zf071(_) => "zf_071",
Self::Zm061(_) => "zm_061",
Self::Zf078(_) => "zf_078",
Self::Zm013(_) => "zm_013",
Self::Zm081(_) => "zm_081",
Self::Zm037(_) => "zm_037",
Self::Zf090(_) => "zf_090",
Self::Zf043(_) => "zf_043",
Self::Zm058(_) => "zm_058",
Self::Zm012(_) => "zm_012",
Self::Zm045(_) => "zm_045",
Self::Zf075(_) => "zf_075",
}
}
pub(super) fn is_v10_supported(&self) -> bool {
matches!(
self,
Self::ZmYunyang(_)
| Self::ZfXiaoni(_)
| Self::AfJessica(_)
| Self::BfLily(_)
| Self::ZfXiaobei(_)
| Self::ZmYunxia(_)
| Self::AfHeart(_)
| Self::BfEmma(_)
| Self::AmPuck(_)
| Self::BfAlice(_)
| Self::HfAlpha(_)
| Self::BfIsabella(_)
| Self::AfNova(_)
| Self::AmFenrir(_)
| Self::EmAlex(_)
| Self::ImNicola(_)
| Self::PmAlex(_)
| Self::AfAlloy(_)
| Self::ZmYunxi(_)
| Self::AfSarah(_)
| Self::JfNezumi(_)
| Self::BmDaniel(_)
| Self::JfTebukuro(_)
| Self::JfAlpha(_)
| Self::JmKumo(_)
| Self::EmSanta(_)
| Self::AmLiam(_)
| Self::AmSanta(_)
| Self::AmEric(_)
| Self::BmFable(_)
| Self::AfBella(_)
| Self::BmLewis(_)
| Self::PfDora(_)
| Self::AfNicole(_)
| Self::BmGeorge(_)
| Self::AmOnyx(_)
| Self::HmPsi(_)
| Self::HfBeta(_)
| Self::HmOmega(_)
| Self::ZfXiaoxiao(_)
| Self::FfSiwis(_)
| Self::EfDora(_)
| Self::AfAoede(_)
| Self::AmEcho(_)
| Self::AmMichael(_)
| Self::AfKore(_)
| Self::ZfXiaoyi(_)
| Self::JfGongitsune(_)
| Self::AmAdam(_)
| Self::IfSara(_)
| Self::AfSky(_)
| Self::PmSanta(_)
| Self::AfRiver(_)
| Self::ZmYunjian(_)
)
}
pub(super) fn is_v11_supported(&self) -> bool {
matches!(
self,
Self::Zm029(_)
| Self::Zf048(_)
| Self::Zf008(_)
| Self::Zm014(_)
| Self::Zf003(_)
| Self::Zf047(_)
| Self::Zm080(_)
| Self::Zf094(_)
| Self::Zf046(_)
| Self::Zm054(_)
| Self::Zf001(_)
| Self::Zm062(_)
| Self::BfVale(_)
| Self::Zf044(_)
| Self::Zf005(_)
| Self::Zf028(_)
| Self::Zf059(_)
| Self::Zm030(_)
| Self::Zf074(_)
| Self::Zm009(_)
| Self::Zf004(_)
| Self::Zf021(_)
| Self::Zm095(_)
| Self::Zm041(_)
| Self::Zf087(_)
| Self::Zf039(_)
| Self::Zm031(_)
| Self::Zf007(_)
| Self::Zf038(_)
| Self::Zf092(_)
| Self::Zm056(_)
| Self::Zf099(_)
| Self::Zm010(_)
| Self::Zm069(_)
| Self::Zm016(_)
| Self::Zm068(_)
| Self::Zf083(_)
| Self::Zf093(_)
| Self::Zf006(_)
| Self::Zf026(_)
| Self::Zm053(_)
| Self::Zm064(_)
| Self::AfSol(_)
| Self::Zf042(_)
| Self::Zf084(_)
| Self::Zf073(_)
| Self::Zf067(_)
| Self::Zm025(_)
| Self::Zm020(_)
| Self::Zm050(_)
| Self::Zf070(_)
| Self::Zf002(_)
| Self::Zf032(_)
| Self::Zm091(_)
| Self::Zm066(_)
| Self::Zm089(_)
| Self::Zm034(_)
| Self::Zm100(_)
| Self::Zf086(_)
| Self::Zf040(_)
| Self::Zm011(_)
| Self::Zm098(_)
| Self::Zm015(_)
| Self::Zf051(_)
| Self::Zm065(_)
| Self::Zf076(_)
| Self::Zf036(_)
| Self::Zm033(_)
| Self::Zf018(_)
| Self::Zf017(_)
| Self::Zf049(_)
| Self::AfMaple(_)
| Self::Zm082(_)
| Self::Zm057(_)
| Self::Zf079(_)
| Self::Zf022(_)
| Self::Zm063(_)
| Self::Zf060(_)
| Self::Zf019(_)
| Self::Zm097(_)
| Self::Zm096(_)
| Self::Zf023(_)
| Self::Zf027(_)
| Self::Zf085(_)
| Self::Zf077(_)
| Self::Zm035(_)
| Self::Zf088(_)
| Self::Zf024(_)
| Self::Zf072(_)
| Self::Zm055(_)
| Self::Zm052(_)
| Self::Zf071(_)
| Self::Zm061(_)
| Self::Zf078(_)
| Self::Zm013(_)
| Self::Zm081(_)
| Self::Zm037(_)
| Self::Zf090(_)
| Self::Zf043(_)
| Self::Zm058(_)
| Self::Zm012(_)
| Self::Zm045(_)
| Self::Zf075(_)
)
}
pub(super) fn get_speed_v10(&self) -> Result<f32, KokoroError> {
match self {
Self::ZmYunyang(v)
| Self::ZfXiaoni(v)
| Self::AfJessica(v)
| Self::BfLily(v)
| Self::ZfXiaobei(v)
| Self::ZmYunxia(v)
| Self::AfHeart(v)
| Self::BfEmma(v)
| Self::AmPuck(v)
| Self::BfAlice(v)
| Self::HfAlpha(v)
| Self::BfIsabella(v)
| Self::AfNova(v)
| Self::AmFenrir(v)
| Self::EmAlex(v)
| Self::ImNicola(v)
| Self::PmAlex(v)
| Self::AfAlloy(v)
| Self::ZmYunxi(v)
| Self::AfSarah(v)
| Self::JfNezumi(v)
| Self::BmDaniel(v)
| Self::JfTebukuro(v)
| Self::JfAlpha(v)
| Self::JmKumo(v)
| Self::EmSanta(v)
| Self::AmLiam(v)
| Self::AmSanta(v)
| Self::AmEric(v)
| Self::BmFable(v)
| Self::AfBella(v)
| Self::BmLewis(v)
| Self::PfDora(v)
| Self::AfNicole(v)
| Self::BmGeorge(v)
| Self::AmOnyx(v)
| Self::HmPsi(v)
| Self::HfBeta(v)
| Self::HmOmega(v)
| Self::ZfXiaoxiao(v)
| Self::FfSiwis(v)
| Self::EfDora(v)
| Self::AfAoede(v)
| Self::AmEcho(v)
| Self::AmMichael(v)
| Self::AfKore(v)
| Self::ZfXiaoyi(v)
| Self::JfGongitsune(v)
| Self::AmAdam(v)
| Self::IfSara(v)
| Self::AfSky(v)
| Self::PmSanta(v)
| Self::AfRiver(v)
| Self::ZmYunjian(v) => Ok(*v),
_ => Err(KokoroError::VoiceVersionInvalid(
"Expect version 1.0".to_owned(),
)),
}
}
pub(super) fn get_speed_v11(&self) -> Result<i32, KokoroError> {
match self {
Self::Zm029(v)
| Self::Zf048(v)
| Self::Zf008(v)
| Self::Zm014(v)
| Self::Zf003(v)
| Self::Zf047(v)
| Self::Zm080(v)
| Self::Zf094(v)
| Self::Zf046(v)
| Self::Zm054(v)
| Self::Zf001(v)
| Self::Zm062(v)
| Self::BfVale(v)
| Self::Zf044(v)
| Self::Zf005(v)
| Self::Zf028(v)
| Self::Zf059(v)
| Self::Zm030(v)
| Self::Zf074(v)
| Self::Zm009(v)
| Self::Zf004(v)
| Self::Zf021(v)
| Self::Zm095(v)
| Self::Zm041(v)
| Self::Zf087(v)
| Self::Zf039(v)
| Self::Zm031(v)
| Self::Zf007(v)
| Self::Zf038(v)
| Self::Zf092(v)
| Self::Zm056(v)
| Self::Zf099(v)
| Self::Zm010(v)
| Self::Zm069(v)
| Self::Zm016(v)
| Self::Zm068(v)
| Self::Zf083(v)
| Self::Zf093(v)
| Self::Zf006(v)
| Self::Zf026(v)
| Self::Zm053(v)
| Self::Zm064(v)
| Self::AfSol(v)
| Self::Zf042(v)
| Self::Zf084(v)
| Self::Zf073(v)
| Self::Zf067(v)
| Self::Zm025(v)
| Self::Zm020(v)
| Self::Zm050(v)
| Self::Zf070(v)
| Self::Zf002(v)
| Self::Zf032(v)
| Self::Zm091(v)
| Self::Zm066(v)
| Self::Zm089(v)
| Self::Zm034(v)
| Self::Zm100(v)
| Self::Zf086(v)
| Self::Zf040(v)
| Self::Zm011(v)
| Self::Zm098(v)
| Self::Zm015(v)
| Self::Zf051(v)
| Self::Zm065(v)
| Self::Zf076(v)
| Self::Zf036(v)
| Self::Zm033(v)
| Self::Zf018(v)
| Self::Zf017(v)
| Self::Zf049(v)
| Self::AfMaple(v)
| Self::Zm082(v)
| Self::Zm057(v)
| Self::Zf079(v)
| Self::Zf022(v)
| Self::Zm063(v)
| Self::Zf060(v)
| Self::Zf019(v)
| Self::Zm097(v)
| Self::Zm096(v)
| Self::Zf023(v)
| Self::Zf027(v)
| Self::Zf085(v)
| Self::Zf077(v)
| Self::Zm035(v)
| Self::Zf088(v)
| Self::Zf024(v)
| Self::Zf072(v)
| Self::Zm055(v)
| Self::Zm052(v)
| Self::Zf071(v)
| Self::Zm061(v)
| Self::Zf078(v)
| Self::Zm013(v)
| Self::Zm081(v)
| Self::Zm037(v)
| Self::Zf090(v)
| Self::Zf043(v)
| Self::Zm058(v)
| Self::Zm012(v)
| Self::Zm045(v)
| Self::Zf075(v) => Ok(*v),
_ => Err(KokoroError::VoiceVersionInvalid(
"Expect version 1.1".to_owned(),
)),
}
}
}

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: 'siprouter',
version: '1.25.1',
version: '1.25.2',
description: 'undefined'
}

View File

@@ -28,6 +28,24 @@ export function registerProxyEventHandlers(options: IRegisterProxyEventHandlersO
onCloseWebRtcSession,
} = options;
const legMediaDetails = (data: {
codec?: string | null;
remoteMedia?: string | null;
rtpPort?: number | null;
}): string => {
const parts: string[] = [];
if (data.codec) {
parts.push(`codec=${data.codec}`);
}
if (data.remoteMedia) {
parts.push(`remote=${data.remoteMedia}`);
}
if (data.rtpPort !== undefined && data.rtpPort !== null) {
parts.push(`rtp=${data.rtpPort}`);
}
return parts.length ? ` ${parts.join(' ')}` : '';
};
onProxyEvent('provider_registered', (data) => {
const previous = statusStore.noteProviderRegistered(data);
if (previous) {
@@ -128,7 +146,9 @@ export function registerProxyEventHandlers(options: IRegisterProxyEventHandlersO
});
onProxyEvent('leg_added', (data) => {
log(`[leg] added: call=${data.call_id} leg=${data.leg_id} kind=${data.kind} state=${data.state}`);
log(
`[leg] added: call=${data.call_id} leg=${data.leg_id} kind=${data.kind} state=${data.state}${legMediaDetails(data)}`,
);
statusStore.noteLegAdded(data);
});
@@ -138,7 +158,9 @@ export function registerProxyEventHandlers(options: IRegisterProxyEventHandlersO
});
onProxyEvent('leg_state_changed', (data) => {
log(`[leg] state: call=${data.call_id} leg=${data.leg_id} -> ${data.state}`);
log(
`[leg] state: call=${data.call_id} leg=${data.leg_id} -> ${data.state}${legMediaDetails(data)}`,
);
statusStore.noteLegStateChanged(data);
});

View File

@@ -213,6 +213,10 @@ export class StatusStore {
legs: [...call.legs.values()].map((leg) => ({
id: leg.id,
type: leg.type,
state: leg.state,
codec: leg.codec,
rtpPort: leg.rtpPort,
remoteMedia: leg.remoteMedia,
metadata: leg.metadata || {},
})),
});
@@ -255,6 +259,15 @@ export class StatusStore {
const existingLeg = call.legs.get(data.leg_id);
if (existingLeg) {
existingLeg.state = data.state;
if (data.codec !== undefined) {
existingLeg.codec = data.codec;
}
if (data.rtpPort !== undefined) {
existingLeg.rtpPort = data.rtpPort;
}
if (data.remoteMedia !== undefined) {
existingLeg.remoteMedia = data.remoteMedia;
}
if (data.metadata) {
existingLeg.metadata = data.metadata;
}
@@ -265,9 +278,9 @@ export class StatusStore {
id: data.leg_id,
type: this.inferLegType(data.leg_id),
state: data.state,
codec: null,
rtpPort: null,
remoteMedia: null,
codec: data.codec ?? null,
rtpPort: data.rtpPort ?? null,
remoteMedia: data.remoteMedia ?? null,
metadata: data.metadata || {},
});
}

View File

@@ -80,6 +80,9 @@ export interface ILegStateChangedEvent {
call_id: string;
leg_id: string;
state: string;
codec?: string | null;
rtpPort?: number | null;
remoteMedia?: string | null;
metadata?: Record<string, unknown>;
}

View File

@@ -43,7 +43,11 @@ export interface IActiveCall {
export interface IHistoryLeg {
id: string;
type: string;
type: TLegType;
state: string;
codec: string | null;
rtpPort: number | null;
remoteMedia: string | null;
metadata: Record<string, unknown>;
}

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: 'siprouter',
version: '1.25.1',
version: '1.25.2',
description: 'undefined'
}

View File

@@ -32,8 +32,32 @@ const LEG_TYPE_LABELS: Record<string, string> = {
'sip-device': 'SIP Device',
'sip-provider': 'SIP Provider',
'webrtc': 'WebRTC',
'tool': 'Tool',
};
function renderHistoryLegs(legs: ICallHistoryEntry['legs']): TemplateResult {
if (!legs.length) {
return html`<span style="color:#64748b">-</span>`;
}
return html`
<div style="display:flex;flex-direction:column;gap:6px;font-size:.72rem;line-height:1.35;">
${legs.map(
(leg) => html`
<div>
<span class="badge" style="${legTypeBadgeStyle(leg.type)}">${LEG_TYPE_LABELS[leg.type] || leg.type}</span>
<span style="margin-left:6px;font-family:'JetBrains Mono',monospace;">${leg.codec || '--'}</span>
<span style="margin-left:6px;color:#94a3b8;">${STATE_LABELS[leg.state] || leg.state}</span>
${leg.remoteMedia
? html`<span style="display:block;color:#64748b;font-family:'JetBrains Mono',monospace;">${leg.remoteMedia}</span>`
: ''}
</div>
`,
)}
</div>
`;
}
function directionIcon(dir: string): string {
if (dir === 'inbound') return '\u2199';
if (dir === 'outbound') return '\u2197';
@@ -226,8 +250,8 @@ export class SipproxyViewCalls extends DeesElement {
`,
];
connectedCallback() {
super.connectedCallback();
async connectedCallback(): Promise<void> {
await super.connectedCallback();
this.rxSubscriptions.push({
unsubscribe: appState.subscribe((s) => {
this.appData = s;
@@ -490,6 +514,11 @@ export class SipproxyViewCalls extends DeesElement {
renderer: (val: number) =>
html`<span style="font-family:'JetBrains Mono',monospace;font-size:.75rem">${fmtDuration(val)}</span>`,
},
{
key: 'legs',
header: 'Legs',
renderer: (val: ICallHistoryEntry['legs']) => renderHistoryLegs(val),
},
];
}
@@ -551,9 +580,7 @@ export class SipproxyViewCalls extends DeesElement {
</span>
</td>
<td>
${leg.remoteMedia
? `${leg.remoteMedia.address}:${leg.remoteMedia.port}`
: '--'}
${leg.remoteMedia || '--'}
</td>
<td>${leg.rtpPort ?? '--'}</td>
<td>

View File

@@ -18,6 +18,12 @@ interface IVoicemailMessage {
heard: boolean;
}
interface IVoiceboxRow {
id: string;
unheardCount: number;
selected: boolean;
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
@@ -61,19 +67,6 @@ export class SipproxyViewVoicemail extends DeesElement {
.view-section {
margin-bottom: 24px;
}
.box-selector {
display: flex;
align-items: center;
gap: 12px;
margin-bottom: 24px;
}
.box-selector label {
font-size: 0.85rem;
font-weight: 600;
color: #94a3b8;
text-transform: uppercase;
letter-spacing: 0.04em;
}
.audio-player {
display: flex;
align-items: center;
@@ -135,10 +128,11 @@ export class SipproxyViewVoicemail extends DeesElement {
const cfg = await appState.apiGetConfig();
const boxes: { id: string }[] = cfg.voiceboxes || [];
this.voiceboxIds = boxes.map((b) => b.id);
if (this.voiceboxIds.length > 0 && !this.selectedBoxId) {
this.selectedBoxId = this.voiceboxIds[0];
await this.loadMessages();
}
const nextSelectedBoxId = this.voiceboxIds.includes(this.selectedBoxId)
? this.selectedBoxId
: (this.voiceboxIds[0] || '');
this.selectedBoxId = nextSelectedBoxId;
await this.loadMessages();
} catch {
// Config unavailable.
}
@@ -161,11 +155,22 @@ export class SipproxyViewVoicemail extends DeesElement {
}
private async selectBox(boxId: string) {
if (boxId === this.selectedBoxId) {
return;
}
this.selectedBoxId = boxId;
this.stopAudio();
await this.loadMessages();
}
private getVoiceboxRows(): IVoiceboxRow[] {
return this.voiceboxIds.map((id) => ({
id,
unheardCount: this.appData.voicemailCounts[id] || 0,
selected: id === this.selectedBoxId,
}));
}
// ---- audio playback ------------------------------------------------------
private playMessage(msg: IVoicemailMessage) {
@@ -341,6 +346,43 @@ export class SipproxyViewVoicemail extends DeesElement {
];
}
private getVoiceboxColumns() {
return [
{
key: 'id',
header: 'Voicebox',
sortable: true,
renderer: (val: string, row: IVoiceboxRow) => html`
<div style="display:flex;align-items:center;gap:10px;">
<span style="font-family:'JetBrains Mono',monospace;font-size:.85rem;">${val}</span>
${row.selected ? html`
<span style="display:inline-block;padding:2px 8px;border-radius:4px;font-size:.7rem;font-weight:600;text-transform:uppercase;background:#1e3a5f;color:#60a5fa">Viewing</span>
` : ''}
</div>
`,
},
{
key: 'unheardCount',
header: 'Unheard',
sortable: true,
renderer: (val: number) => {
const hasUnheard = val > 0;
return html`
<span style="display:inline-block;padding:2px 8px;border-radius:4px;font-size:.75rem;font-weight:600;background:${hasUnheard ? '#422006' : '#1f2937'};color:${hasUnheard ? '#f59e0b' : '#94a3b8'}">${val}</span>
`;
},
},
{
key: 'selected',
header: 'Status',
value: (row: IVoiceboxRow) => (row.selected ? 'Open' : 'Available'),
renderer: (val: string, row: IVoiceboxRow) => html`
<span style="color:${row.selected ? '#60a5fa' : '#94a3b8'};font-size:.8rem;">${val}</span>
`,
},
];
}
// ---- table actions -------------------------------------------------------
private getDataActions() {
@@ -390,21 +432,43 @@ export class SipproxyViewVoicemail extends DeesElement {
];
}
private getVoiceboxActions() {
return [
{
name: 'View Messages',
iconName: 'lucide:folder-open',
type: ['inRow'] as any,
actionFunc: async ({ item }: { item: IVoiceboxRow }) => {
await this.selectBox(item.id);
},
},
{
name: 'Refresh Boxes',
iconName: 'lucide:refreshCw',
type: ['header'] as any,
actionFunc: async () => {
await this.loadVoiceboxes();
deesCatalog.DeesToast.success('Voiceboxes refreshed');
},
},
];
}
// ---- render --------------------------------------------------------------
public render(): TemplateResult {
return html`
${this.voiceboxIds.length > 1 ? html`
<div class="box-selector">
<label>Voicebox</label>
<dees-input-dropdown
.key=${'voicebox'}
.selectedOption=${{ option: this.selectedBoxId, key: this.selectedBoxId }}
.options=${this.voiceboxIds.map((id) => ({ option: id, key: id }))}
@selectedOption=${(e: CustomEvent) => { this.selectBox(e.detail.key); }}
></dees-input-dropdown>
</div>
` : ''}
<div class="view-section">
<dees-table
heading1="Voiceboxes"
heading2="${this.voiceboxIds.length} configured"
dataName="voiceboxes"
.data=${this.getVoiceboxRows()}
.rowKey=${'id'}
.columns=${this.getVoiceboxColumns()}
.dataActions=${this.getVoiceboxActions()}
></dees-table>
</div>
<div class="view-section">
<dees-statsgrid