feat(call, voicemail, ivr): add voicemail and IVR call flows with DTMF handling, prompt playback, recording, and dashboard management

This commit is contained in:
2026-04-10 08:54:46 +00:00
parent 6ecd3f434c
commit e6bd64a534
25 changed files with 3892 additions and 10 deletions

404
ts/call/prompt-cache.ts Normal file
View File

@@ -0,0 +1,404 @@
/**
* PromptCache — manages multiple named audio prompts for IVR and voicemail.
*
* Each prompt is pre-encoded as both G.722 frames (for SIP legs) and Opus
* frames (for WebRTC legs), ready for 20ms RTP playback.
*
* Supports three sources:
* 1. TTS generation via espeak-ng (primary) or Kokoro (fallback)
* 2. Loading from a pre-existing WAV file
* 3. Programmatic tone generation (beep, etc.)
*
* The existing announcement.ts system continues to work independently;
* this module provides generalized prompt management for IVR/voicemail.
*/
import { execSync } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import { Buffer } from 'node:buffer';
import { buildRtpHeader, rtpClockIncrement } from './leg.ts';
import { encodePcm, isCodecReady } from '../opusbridge.ts';
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
/** A pre-encoded prompt ready for RTP playback. */
export interface ICachedPrompt {
/** Unique prompt identifier. */
id: string;
/** G.722 encoded frames (20ms each, no RTP header). */
g722Frames: Buffer[];
/** Opus encoded frames (20ms each, no RTP header). */
opusFrames: Buffer[];
/** Total duration in milliseconds. */
durationMs: number;
}
// ---------------------------------------------------------------------------
// TTS helpers
// ---------------------------------------------------------------------------
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
/** Check if espeak-ng is available. */
function isEspeakAvailable(): boolean {
try {
execSync('which espeak-ng', { stdio: 'pipe' });
return true;
} catch {
return false;
}
}
/** Generate WAV via espeak-ng. */
function generateViaEspeak(wavPath: string, text: string): boolean {
try {
execSync(
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
{ timeout: 10000, stdio: 'pipe' },
);
return true;
} catch {
return false;
}
}
/** Generate WAV via Kokoro TTS. */
function generateViaKokoro(wavPath: string, text: string, voice: string): boolean {
const modelPath = path.join(TTS_DIR, 'kokoro-v1.0.onnx');
const voicesPath = path.join(TTS_DIR, 'voices.bin');
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) return false;
const root = process.cwd();
const ttsBin = [
path.join(root, 'dist_rust', 'tts-engine'),
path.join(root, 'rust', 'target', 'release', 'tts-engine'),
path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
].find((p) => fs.existsSync(p));
if (!ttsBin) return false;
try {
execSync(
`"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${voice}" --output "${wavPath}" --text "${text}"`,
{ timeout: 120000, stdio: 'pipe' },
);
return true;
} catch {
return false;
}
}
/** Read a WAV file and return raw PCM + sample rate. */
function readWavWithRate(wavPath: string): { pcm: Buffer; sampleRate: number } | null {
const wav = fs.readFileSync(wavPath);
if (wav.length < 44) return null;
if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
let sampleRate = 22050;
let pcm: Buffer | null = null;
let offset = 12;
while (offset < wav.length - 8) {
const chunkId = wav.toString('ascii', offset, offset + 4);
const chunkSize = wav.readUInt32LE(offset + 4);
if (chunkId === 'fmt ') {
sampleRate = wav.readUInt32LE(offset + 12);
}
if (chunkId === 'data') {
pcm = wav.subarray(offset + 8, offset + 8 + chunkSize);
}
offset += 8 + chunkSize;
if (offset % 2 !== 0) offset++;
}
return pcm ? { pcm, sampleRate } : null;
}
/** Encode raw PCM frames to G.722 + Opus. */
async function encodePcmFrames(
pcm: Buffer,
sampleRate: number,
log: (msg: string) => void,
): Promise<{ g722Frames: Buffer[]; opusFrames: Buffer[] } | null> {
if (!isCodecReady()) return null;
const frameSamples = Math.floor(sampleRate * 0.02); // 20ms
const frameBytes = frameSamples * 2; // 16-bit
const totalFrames = Math.floor(pcm.length / frameBytes);
const g722Frames: Buffer[] = [];
const opusFrames: Buffer[] = [];
for (let i = 0; i < totalFrames; i++) {
const framePcm = Buffer.from(pcm.subarray(i * frameBytes, (i + 1) * frameBytes));
const [g722, opus] = await Promise.all([
encodePcm(framePcm, sampleRate, 9), // G.722
encodePcm(framePcm, sampleRate, 111), // Opus
]);
if (g722) g722Frames.push(g722);
if (opus) opusFrames.push(opus);
}
return { g722Frames, opusFrames };
}
// ---------------------------------------------------------------------------
// PromptCache
// ---------------------------------------------------------------------------
export class PromptCache {
private prompts = new Map<string, ICachedPrompt>();
private log: (msg: string) => void;
private espeakAvailable: boolean | null = null;
constructor(log: (msg: string) => void) {
this.log = log;
}
// -------------------------------------------------------------------------
// Public API
// -------------------------------------------------------------------------
/** Get a cached prompt by ID. */
get(id: string): ICachedPrompt | null {
return this.prompts.get(id) ?? null;
}
/** Check if a prompt is cached. */
has(id: string): boolean {
return this.prompts.has(id);
}
/** List all cached prompt IDs. */
listIds(): string[] {
return [...this.prompts.keys()];
}
/**
* Generate a TTS prompt and cache it.
* Uses espeak-ng (primary) or Kokoro (fallback).
*/
async generatePrompt(id: string, text: string, voice = 'af_bella'): Promise<ICachedPrompt | null> {
fs.mkdirSync(TTS_DIR, { recursive: true });
const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`);
// Check espeak availability once.
if (this.espeakAvailable === null) {
this.espeakAvailable = isEspeakAvailable();
}
// Generate WAV.
let generated = false;
if (!fs.existsSync(wavPath)) {
if (this.espeakAvailable) {
generated = generateViaEspeak(wavPath, text);
}
if (!generated) {
generated = generateViaKokoro(wavPath, text, voice);
}
if (!generated) {
this.log(`[prompt-cache] failed to generate TTS for "${id}"`);
return null;
}
this.log(`[prompt-cache] generated WAV for "${id}"`);
}
return this.loadWavPrompt(id, wavPath);
}
/**
* Load a WAV file as a prompt and cache it.
*/
async loadWavPrompt(id: string, wavPath: string): Promise<ICachedPrompt | null> {
if (!fs.existsSync(wavPath)) {
this.log(`[prompt-cache] WAV not found: ${wavPath}`);
return null;
}
const result = readWavWithRate(wavPath);
if (!result) {
this.log(`[prompt-cache] failed to parse WAV: ${wavPath}`);
return null;
}
const encoded = await encodePcmFrames(result.pcm, result.sampleRate, this.log);
if (!encoded) {
this.log(`[prompt-cache] encoding failed for "${id}" (codec bridge not ready?)`);
return null;
}
const durationMs = encoded.g722Frames.length * 20;
const prompt: ICachedPrompt = {
id,
g722Frames: encoded.g722Frames,
opusFrames: encoded.opusFrames,
durationMs,
};
this.prompts.set(id, prompt);
this.log(`[prompt-cache] cached "${id}": ${encoded.g722Frames.length} frames (${(durationMs / 1000).toFixed(1)}s)`);
return prompt;
}
/**
* Generate a beep tone prompt (sine wave).
* @param id - prompt ID
* @param freqHz - tone frequency (default 1000 Hz)
* @param durationMs - tone duration (default 500ms)
* @param amplitude - 16-bit amplitude (default 8000)
*/
async generateBeep(
id: string,
freqHz = 1000,
durationMs = 500,
amplitude = 8000,
): Promise<ICachedPrompt | null> {
// Generate at 16kHz for decent quality.
const sampleRate = 16000;
const totalSamples = Math.floor((sampleRate * durationMs) / 1000);
const pcm = Buffer.alloc(totalSamples * 2);
for (let i = 0; i < totalSamples; i++) {
const t = i / sampleRate;
// Apply a short fade-in/fade-out to avoid click artifacts.
const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade
let envelope = 1.0;
if (i < fadeLen) envelope = i / fadeLen;
else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen;
const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope);
pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
}
const encoded = await encodePcmFrames(pcm, sampleRate, this.log);
if (!encoded) {
this.log(`[prompt-cache] beep encoding failed for "${id}"`);
return null;
}
const actualDuration = encoded.g722Frames.length * 20;
const prompt: ICachedPrompt = {
id,
g722Frames: encoded.g722Frames,
opusFrames: encoded.opusFrames,
durationMs: actualDuration,
};
this.prompts.set(id, prompt);
this.log(`[prompt-cache] beep "${id}" cached: ${actualDuration}ms @ ${freqHz}Hz`);
return prompt;
}
/**
* Remove a prompt from the cache.
*/
remove(id: string): void {
this.prompts.delete(id);
}
/**
* Clear all cached prompts.
*/
clear(): void {
this.prompts.clear();
}
}
// ---------------------------------------------------------------------------
// Standalone playback helpers (for use by SystemLeg)
// ---------------------------------------------------------------------------
/**
* Play a cached prompt's G.722 frames as RTP packets at 20ms intervals.
*
* @param prompt - the cached prompt to play
* @param sendPacket - function to send a raw RTP packet (12-byte header + payload)
* @param ssrc - SSRC for RTP headers
* @param onDone - called when playback finishes
* @returns cancel function, or null if prompt has no G.722 frames
*/
export function playPromptG722(
prompt: ICachedPrompt,
sendPacket: (pkt: Buffer) => void,
ssrc: number,
onDone?: () => void,
): (() => void) | null {
if (prompt.g722Frames.length === 0) {
onDone?.();
return null;
}
const frames = prompt.g722Frames;
const PT = 9;
let frameIdx = 0;
let seq = Math.floor(Math.random() * 0xffff);
let rtpTs = Math.floor(Math.random() * 0xffffffff);
const timer = setInterval(() => {
if (frameIdx >= frames.length) {
clearInterval(timer);
onDone?.();
return;
}
const payload = frames[frameIdx];
const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
const pkt = Buffer.concat([hdr, payload]);
sendPacket(pkt);
seq++;
rtpTs += rtpClockIncrement(PT);
frameIdx++;
}, 20);
return () => clearInterval(timer);
}
/**
* Play a cached prompt's Opus frames as RTP packets at 20ms intervals.
*
* @param prompt - the cached prompt to play
* @param sendPacket - function to send a raw RTP packet
* @param ssrc - SSRC for RTP headers
* @param counters - shared seq/ts counters (mutated in place for seamless transitions)
* @param onDone - called when playback finishes
* @returns cancel function, or null if prompt has no Opus frames
*/
export function playPromptOpus(
prompt: ICachedPrompt,
sendPacket: (pkt: Buffer) => void,
ssrc: number,
counters: { seq: number; ts: number },
onDone?: () => void,
): (() => void) | null {
if (prompt.opusFrames.length === 0) {
onDone?.();
return null;
}
const frames = prompt.opusFrames;
const PT = 111;
let frameIdx = 0;
const timer = setInterval(() => {
if (frameIdx >= frames.length) {
clearInterval(timer);
onDone?.();
return;
}
const payload = frames[frameIdx];
const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
const pkt = Buffer.concat([hdr, payload]);
sendPacket(pkt);
counters.seq++;
counters.ts += 960; // Opus 48kHz: 960 samples per 20ms
frameIdx++;
}, 20);
return () => clearInterval(timer);
}