2026-04-10 08:54:46 +00:00
|
|
|
/**
|
2026-04-10 15:21:44 +00:00
|
|
|
* PromptCache — manages named audio prompt WAV files for IVR and voicemail.
|
2026-04-10 08:54:46 +00:00
|
|
|
*
|
2026-04-10 15:21:44 +00:00
|
|
|
* Generates WAV files via espeak-ng (primary) or Kokoro TTS through the
|
|
|
|
|
* proxy-engine (fallback). Also supports loading pre-existing WAV files
|
|
|
|
|
* and programmatic tone generation.
|
2026-04-10 08:54:46 +00:00
|
|
|
*
|
2026-04-10 15:21:44 +00:00
|
|
|
* All audio playback happens in Rust (audio_player / start_interaction).
|
|
|
|
|
* This module only manages WAV files on disk.
|
2026-04-10 08:54:46 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
import { execSync } from 'node:child_process';
|
|
|
|
|
import fs from 'node:fs';
|
|
|
|
|
import path from 'node:path';
|
|
|
|
|
import { Buffer } from 'node:buffer';
|
2026-04-10 15:21:44 +00:00
|
|
|
import { sendProxyCommand, isProxyReady } from '../proxybridge.ts';
|
2026-04-10 11:36:18 +00:00
|
|
|
|
2026-04-10 08:54:46 +00:00
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// Types
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
/** A cached prompt — just a WAV file path and metadata. */
|
2026-04-10 08:54:46 +00:00
|
|
|
export interface ICachedPrompt {
|
|
|
|
|
/** Unique prompt identifier. */
|
|
|
|
|
id: string;
|
2026-04-10 15:21:44 +00:00
|
|
|
/** Path to the WAV file on disk. */
|
|
|
|
|
wavPath: string;
|
|
|
|
|
/** Total duration in milliseconds (approximate, from WAV header). */
|
2026-04-10 08:54:46 +00:00
|
|
|
durationMs: number;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// TTS helpers
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
|
|
|
|
|
|
|
|
|
|
/** Check if espeak-ng is available. */
|
|
|
|
|
function isEspeakAvailable(): boolean {
|
|
|
|
|
try {
|
|
|
|
|
execSync('which espeak-ng', { stdio: 'pipe' });
|
|
|
|
|
return true;
|
|
|
|
|
} catch {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Generate WAV via espeak-ng. */
|
|
|
|
|
function generateViaEspeak(wavPath: string, text: string): boolean {
|
|
|
|
|
try {
|
|
|
|
|
execSync(
|
|
|
|
|
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
|
|
|
|
|
{ timeout: 10000, stdio: 'pipe' },
|
|
|
|
|
);
|
|
|
|
|
return true;
|
|
|
|
|
} catch {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
/** Generate WAV via Kokoro TTS (runs inside proxy-engine). */
|
|
|
|
|
async function generateViaKokoro(wavPath: string, text: string, voice: string): Promise<boolean> {
|
2026-04-10 08:54:46 +00:00
|
|
|
const modelPath = path.join(TTS_DIR, 'kokoro-v1.0.onnx');
|
|
|
|
|
const voicesPath = path.join(TTS_DIR, 'voices.bin');
|
|
|
|
|
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) return false;
|
2026-04-10 15:21:44 +00:00
|
|
|
if (!isProxyReady()) return false;
|
2026-04-10 08:54:46 +00:00
|
|
|
|
|
|
|
|
try {
|
2026-04-10 15:21:44 +00:00
|
|
|
await sendProxyCommand('generate_tts', {
|
|
|
|
|
model: modelPath,
|
|
|
|
|
voices: voicesPath,
|
|
|
|
|
voice,
|
|
|
|
|
text,
|
|
|
|
|
output: wavPath,
|
|
|
|
|
});
|
2026-04-10 08:54:46 +00:00
|
|
|
return true;
|
|
|
|
|
} catch {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
/** Read a WAV file's duration from its header. */
|
|
|
|
|
function getWavDurationMs(wavPath: string): number {
|
|
|
|
|
try {
|
|
|
|
|
const wav = fs.readFileSync(wavPath);
|
|
|
|
|
if (wav.length < 44) return 0;
|
|
|
|
|
if (wav.toString('ascii', 0, 4) !== 'RIFF') return 0;
|
|
|
|
|
|
|
|
|
|
let sampleRate = 16000;
|
|
|
|
|
let dataSize = 0;
|
|
|
|
|
let bitsPerSample = 16;
|
|
|
|
|
let channels = 1;
|
|
|
|
|
let offset = 12;
|
|
|
|
|
|
|
|
|
|
while (offset < wav.length - 8) {
|
|
|
|
|
const chunkId = wav.toString('ascii', offset, offset + 4);
|
|
|
|
|
const chunkSize = wav.readUInt32LE(offset + 4);
|
|
|
|
|
if (chunkId === 'fmt ') {
|
|
|
|
|
channels = wav.readUInt16LE(offset + 10);
|
|
|
|
|
sampleRate = wav.readUInt32LE(offset + 12);
|
|
|
|
|
bitsPerSample = wav.readUInt16LE(offset + 22);
|
|
|
|
|
}
|
|
|
|
|
if (chunkId === 'data') {
|
|
|
|
|
dataSize = chunkSize;
|
|
|
|
|
}
|
|
|
|
|
offset += 8 + chunkSize;
|
|
|
|
|
if (offset % 2 !== 0) offset++;
|
2026-04-10 08:54:46 +00:00
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
const bytesPerSample = (bitsPerSample / 8) * channels;
|
|
|
|
|
const totalSamples = bytesPerSample > 0 ? dataSize / bytesPerSample : 0;
|
|
|
|
|
return sampleRate > 0 ? Math.round((totalSamples / sampleRate) * 1000) : 0;
|
|
|
|
|
} catch {
|
|
|
|
|
return 0;
|
2026-04-10 08:54:46 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// PromptCache
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
export class PromptCache {
|
|
|
|
|
private prompts = new Map<string, ICachedPrompt>();
|
|
|
|
|
private log: (msg: string) => void;
|
|
|
|
|
private espeakAvailable: boolean | null = null;
|
|
|
|
|
|
|
|
|
|
constructor(log: (msg: string) => void) {
|
|
|
|
|
this.log = log;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
// Public API
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/** Get a cached prompt by ID. */
|
|
|
|
|
get(id: string): ICachedPrompt | null {
|
|
|
|
|
return this.prompts.get(id) ?? null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Check if a prompt is cached. */
|
|
|
|
|
has(id: string): boolean {
|
|
|
|
|
return this.prompts.has(id);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** List all cached prompt IDs. */
|
|
|
|
|
listIds(): string[] {
|
|
|
|
|
return [...this.prompts.keys()];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2026-04-10 15:21:44 +00:00
|
|
|
* Generate a TTS prompt WAV and cache its path.
|
2026-04-10 08:54:46 +00:00
|
|
|
* Uses espeak-ng (primary) or Kokoro (fallback).
|
|
|
|
|
*/
|
|
|
|
|
async generatePrompt(id: string, text: string, voice = 'af_bella'): Promise<ICachedPrompt | null> {
|
|
|
|
|
fs.mkdirSync(TTS_DIR, { recursive: true });
|
|
|
|
|
const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`);
|
|
|
|
|
|
|
|
|
|
// Check espeak availability once.
|
|
|
|
|
if (this.espeakAvailable === null) {
|
|
|
|
|
this.espeakAvailable = isEspeakAvailable();
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
// Generate WAV if not already on disk.
|
2026-04-10 08:54:46 +00:00
|
|
|
if (!fs.existsSync(wavPath)) {
|
2026-04-10 15:21:44 +00:00
|
|
|
let generated = false;
|
2026-04-10 08:54:46 +00:00
|
|
|
if (this.espeakAvailable) {
|
|
|
|
|
generated = generateViaEspeak(wavPath, text);
|
|
|
|
|
}
|
|
|
|
|
if (!generated) {
|
2026-04-10 15:21:44 +00:00
|
|
|
generated = await generateViaKokoro(wavPath, text, voice);
|
2026-04-10 08:54:46 +00:00
|
|
|
}
|
|
|
|
|
if (!generated) {
|
|
|
|
|
this.log(`[prompt-cache] failed to generate TTS for "${id}"`);
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
this.log(`[prompt-cache] generated WAV for "${id}"`);
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
return this.registerWav(id, wavPath);
|
2026-04-10 08:54:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2026-04-10 15:21:44 +00:00
|
|
|
* Load a pre-existing WAV file as a prompt.
|
2026-04-10 08:54:46 +00:00
|
|
|
*/
|
|
|
|
|
async loadWavPrompt(id: string, wavPath: string): Promise<ICachedPrompt | null> {
|
|
|
|
|
if (!fs.existsSync(wavPath)) {
|
|
|
|
|
this.log(`[prompt-cache] WAV not found: ${wavPath}`);
|
|
|
|
|
return null;
|
|
|
|
|
}
|
2026-04-10 15:21:44 +00:00
|
|
|
return this.registerWav(id, wavPath);
|
2026-04-10 08:54:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2026-04-10 15:21:44 +00:00
|
|
|
* Generate a beep tone WAV and cache it.
|
2026-04-10 08:54:46 +00:00
|
|
|
*/
|
|
|
|
|
async generateBeep(
|
|
|
|
|
id: string,
|
|
|
|
|
freqHz = 1000,
|
|
|
|
|
durationMs = 500,
|
|
|
|
|
amplitude = 8000,
|
|
|
|
|
): Promise<ICachedPrompt | null> {
|
2026-04-10 15:21:44 +00:00
|
|
|
fs.mkdirSync(TTS_DIR, { recursive: true });
|
|
|
|
|
const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`);
|
2026-04-10 08:54:46 +00:00
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
if (!fs.existsSync(wavPath)) {
|
|
|
|
|
// Generate 16kHz 16-bit mono sine wave WAV.
|
|
|
|
|
const sampleRate = 16000;
|
|
|
|
|
const totalSamples = Math.floor((sampleRate * durationMs) / 1000);
|
|
|
|
|
const pcm = Buffer.alloc(totalSamples * 2);
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < totalSamples; i++) {
|
|
|
|
|
const t = i / sampleRate;
|
|
|
|
|
const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade
|
|
|
|
|
let envelope = 1.0;
|
|
|
|
|
if (i < fadeLen) envelope = i / fadeLen;
|
|
|
|
|
else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen;
|
|
|
|
|
|
|
|
|
|
const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope);
|
|
|
|
|
pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
|
|
|
|
|
}
|
2026-04-10 08:54:46 +00:00
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
// Write WAV file.
|
|
|
|
|
const headerSize = 44;
|
|
|
|
|
const dataSize = pcm.length;
|
|
|
|
|
const wav = Buffer.alloc(headerSize + dataSize);
|
|
|
|
|
|
|
|
|
|
// RIFF header
|
|
|
|
|
wav.write('RIFF', 0);
|
|
|
|
|
wav.writeUInt32LE(36 + dataSize, 4);
|
|
|
|
|
wav.write('WAVE', 8);
|
|
|
|
|
|
|
|
|
|
// fmt chunk
|
|
|
|
|
wav.write('fmt ', 12);
|
|
|
|
|
wav.writeUInt32LE(16, 16); // chunk size
|
|
|
|
|
wav.writeUInt16LE(1, 20); // PCM format
|
|
|
|
|
wav.writeUInt16LE(1, 22); // mono
|
|
|
|
|
wav.writeUInt32LE(sampleRate, 24);
|
|
|
|
|
wav.writeUInt32LE(sampleRate * 2, 28); // byte rate
|
|
|
|
|
wav.writeUInt16LE(2, 32); // block align
|
|
|
|
|
wav.writeUInt16LE(16, 34); // bits per sample
|
|
|
|
|
|
|
|
|
|
// data chunk
|
|
|
|
|
wav.write('data', 36);
|
|
|
|
|
wav.writeUInt32LE(dataSize, 40);
|
|
|
|
|
pcm.copy(wav, 44);
|
|
|
|
|
|
|
|
|
|
fs.writeFileSync(wavPath, wav);
|
|
|
|
|
this.log(`[prompt-cache] beep WAV generated for "${id}"`);
|
|
|
|
|
}
|
2026-04-10 08:54:46 +00:00
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
return this.registerWav(id, wavPath);
|
2026-04-10 08:54:46 +00:00
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
/** Remove a prompt from the cache. */
|
2026-04-10 08:54:46 +00:00
|
|
|
remove(id: string): void {
|
|
|
|
|
this.prompts.delete(id);
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
/** Clear all cached prompts. */
|
2026-04-10 08:54:46 +00:00
|
|
|
clear(): void {
|
|
|
|
|
this.prompts.clear();
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
// Internal
|
|
|
|
|
// -------------------------------------------------------------------------
|
2026-04-10 08:54:46 +00:00
|
|
|
|
2026-04-10 15:21:44 +00:00
|
|
|
private registerWav(id: string, wavPath: string): ICachedPrompt {
|
|
|
|
|
const durationMs = getWavDurationMs(wavPath);
|
|
|
|
|
const prompt: ICachedPrompt = { id, wavPath, durationMs };
|
|
|
|
|
this.prompts.set(id, prompt);
|
|
|
|
|
this.log(`[prompt-cache] cached "${id}": ${wavPath} (${(durationMs / 1000).toFixed(1)}s)`);
|
|
|
|
|
return prompt;
|
2026-04-10 08:54:46 +00:00
|
|
|
}
|
|
|
|
|
}
|