feat(routing): add rule-based SIP routing for inbound and outbound calls with dashboard route management

This commit is contained in:
2026-04-10 08:22:12 +00:00
parent f3e1c96872
commit fd3a408cc2
13 changed files with 893 additions and 114 deletions

View File

@@ -1,10 +1,13 @@
/**
* TTS announcement module — pre-generates audio announcements using Kokoro TTS
* TTS announcement module — pre-generates audio announcements using espeak-ng
* and caches them as encoded RTP packets for playback during call setup.
*
* On startup, generates the announcement WAV via the Rust tts-engine binary
* (Kokoro neural TTS), encodes each 20ms frame to G.722 (for SIP) and Opus
* (for WebRTC) via the Rust transcoder, and caches the packets.
* On startup, generates the announcement WAV via espeak-ng (formant-based TTS
* with highly accurate pronunciation), encodes each 20ms frame to G.722 (for
* SIP) and Opus (for WebRTC) via the Rust transcoder, and caches the packets.
*
* Falls back to the Rust tts-engine (Kokoro neural TTS) if espeak-ng is not
* installed, and disables announcements if neither is available.
*/
import { execSync } from 'node:child_process';
@@ -35,35 +38,62 @@ export interface IAnnouncementCache {
let cachedAnnouncement: IAnnouncementCache | null = null;
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
const KOKORO_MODEL = 'kokoro-v1.0.onnx';
const KOKORO_VOICES = 'voices.bin';
const KOKORO_VOICE = 'af_bella'; // American female, clear and natural
const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now.";
const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav');
// Kokoro fallback constants.
const KOKORO_MODEL = 'kokoro-v1.0.onnx';
const KOKORO_VOICES = 'voices.bin';
const KOKORO_VOICE = 'af_bella';
// ---------------------------------------------------------------------------
// Initialization
// ---------------------------------------------------------------------------
/**
* Pre-generate the announcement audio and encode to G.722 frames.
* Must be called after the codec bridge is initialized.
* Check if espeak-ng is available on the system.
*/
export async function initAnnouncement(log: (msg: string) => void): Promise<boolean> {
function isEspeakAvailable(): boolean {
try {
execSync('which espeak-ng', { stdio: 'pipe' });
return true;
} catch {
return false;
}
}
/**
* Generate announcement WAV via espeak-ng (primary engine).
* Returns true on success.
*/
function generateViaEspeak(wavPath: string, text: string, log: (msg: string) => void): boolean {
log('[tts] generating announcement audio via espeak-ng...');
try {
execSync(
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
{ timeout: 10000, stdio: 'pipe' },
);
log('[tts] espeak-ng WAV generated');
return true;
} catch (e: any) {
log(`[tts] espeak-ng failed: ${e.message}`);
return false;
}
}
/**
* Generate announcement WAV via Kokoro TTS (fallback engine).
* Returns true on success.
*/
function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): boolean {
const modelPath = path.join(TTS_DIR, KOKORO_MODEL);
const voicesPath = path.join(TTS_DIR, KOKORO_VOICES);
// Check if Kokoro model files exist.
if (!fs.existsSync(modelPath)) {
log('[tts] Kokoro model not found at ' + modelPath + ' — announcements disabled');
return false;
}
if (!fs.existsSync(voicesPath)) {
log('[tts] Kokoro voices not found at ' + voicesPath + ' — announcements disabled');
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) {
log('[tts] Kokoro model/voices not found — Kokoro fallback unavailable');
return false;
}
// Find tts-engine binary.
const root = process.cwd();
const ttsBinPaths = [
path.join(root, 'dist_rust', 'tts-engine'),
@@ -72,53 +102,117 @@ export async function initAnnouncement(log: (msg: string) => void): Promise<bool
];
const ttsBin = ttsBinPaths.find((p) => fs.existsSync(p));
if (!ttsBin) {
log('[tts] tts-engine binary not found — announcements disabled');
log('[tts] tts-engine binary not found — Kokoro fallback unavailable');
return false;
}
log('[tts] generating announcement audio via Kokoro TTS (fallback)...');
try {
execSync(
`"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${KOKORO_VOICE}" --output "${wavPath}" --text "${text}"`,
{ timeout: 120000, stdio: 'pipe' },
);
log('[tts] Kokoro WAV generated');
return true;
} catch (e: any) {
log(`[tts] Kokoro failed: ${e.message}`);
return false;
}
}
/**
* Read a WAV file and detect its sample rate from the fmt chunk.
* Returns { pcm, sampleRate } or null on failure.
*/
function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
const wav = fs.readFileSync(wavPath);
if (wav.length < 44) return null;
if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
let sampleRate = 22050; // default
let offset = 12;
let pcm: Buffer | null = null;
while (offset < wav.length - 8) {
const chunkId = wav.toString('ascii', offset, offset + 4);
const chunkSize = wav.readUInt32LE(offset + 4);
if (chunkId === 'fmt ') {
sampleRate = wav.readUInt32LE(offset + 12);
}
if (chunkId === 'data') {
pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
}
offset += 8 + chunkSize;
if (offset % 2 !== 0) offset++;
}
if (!pcm) return null;
return { pcm, sampleRate };
}
/**
* Pre-generate the announcement audio and encode to G.722 + Opus frames.
* Must be called after the codec bridge is initialized.
*
* Engine priority: espeak-ng → Kokoro → disabled.
*/
export async function initAnnouncement(log: (msg: string) => void): Promise<boolean> {
fs.mkdirSync(TTS_DIR, { recursive: true });
try {
// Generate WAV if not cached.
if (!fs.existsSync(CACHE_WAV)) {
log('[tts] generating announcement audio via Kokoro TTS...');
execSync(
`"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${KOKORO_VOICE}" --output "${CACHE_WAV}" --text "${ANNOUNCEMENT_TEXT}"`,
{ timeout: 120000, stdio: 'pipe' },
);
log('[tts] announcement WAV generated');
let generated = false;
// Try espeak-ng first.
if (isEspeakAvailable()) {
generated = generateViaEspeak(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
} else {
log('[tts] espeak-ng not installed — trying Kokoro fallback');
}
// Fall back to Kokoro.
if (!generated) {
generated = generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
}
if (!generated) {
log('[tts] no TTS engine available — announcements disabled');
return false;
}
}
// Read WAV and extract raw PCM.
const wav = fs.readFileSync(CACHE_WAV);
const pcm = extractPcmFromWav(wav);
if (!pcm) {
// Read WAV and extract raw PCM + sample rate.
const result = readWavWithRate(CACHE_WAV);
if (!result) {
log('[tts] failed to parse WAV file');
return false;
}
const { pcm, sampleRate } = result;
// Wait for codec bridge to be ready.
if (!isCodecReady()) {
log('[tts] codec bridge not ready — will retry');
return false;
}
// Kokoro outputs 24000 Hz, 16-bit mono.
// We encode in chunks: 20ms at 24000 Hz = 480 samples = 960 bytes of PCM.
// The Rust encoder will resample to 16kHz internally for G.722.
const SAMPLE_RATE = 24000;
const FRAME_SAMPLES = Math.floor(SAMPLE_RATE * 0.02); // 480 samples per 20ms
// Encode in 20ms chunks. The Rust encoder resamples to each codec's native rate.
const FRAME_SAMPLES = Math.floor(sampleRate * 0.02);
const FRAME_BYTES = FRAME_SAMPLES * 2; // 16-bit = 2 bytes per sample
const totalFrames = Math.floor(pcm.length / FRAME_BYTES);
const g722Frames: Buffer[] = [];
const opusFrames: Buffer[] = [];
log(`[tts] encoding ${totalFrames} frames (${FRAME_SAMPLES} samples/frame @ ${SAMPLE_RATE}Hz)...`);
log(`[tts] encoding ${totalFrames} frames (${FRAME_SAMPLES} samples/frame @ ${sampleRate}Hz)...`);
for (let i = 0; i < totalFrames; i++) {
const framePcm = pcm.subarray(i * FRAME_BYTES, (i + 1) * FRAME_BYTES);
const pcmBuf = Buffer.from(framePcm);
const [g722, opus] = await Promise.all([
encodePcm(pcmBuf, SAMPLE_RATE, 9), // G.722 for SIP devices
encodePcm(pcmBuf, SAMPLE_RATE, 111), // Opus for WebRTC browsers
encodePcm(pcmBuf, sampleRate, 9), // G.722 for SIP devices
encodePcm(pcmBuf, sampleRate, 111), // Opus for WebRTC browsers
]);
if (g722) g722Frames.push(g722);
if (opus) opusFrames.push(opus);
@@ -236,26 +330,3 @@ export function isAnnouncementReady(): boolean {
return cachedAnnouncement !== null && cachedAnnouncement.g722Frames.length > 0;
}
// ---------------------------------------------------------------------------
// WAV parsing
// ---------------------------------------------------------------------------
function extractPcmFromWav(wav: Buffer): Buffer | null {
// Minimal WAV parser — find the "data" chunk.
if (wav.length < 44) return null;
if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
let offset = 12;
while (offset < wav.length - 8) {
const chunkId = wav.toString('ascii', offset, offset + 4);
const chunkSize = wav.readUInt32LE(offset + 4);
if (chunkId === 'data') {
return wav.subarray(offset + 8, offset + 8 + chunkSize);
}
offset += 8 + chunkSize;
// Word-align.
if (offset % 2 !== 0) offset++;
}
return null;
}