/** * PromptCache — manages named audio prompt WAV files for IVR and voicemail. * * Generates WAV files via espeak-ng (primary) or Kokoro TTS through the * proxy-engine (fallback). Also supports loading pre-existing WAV files * and programmatic tone generation. * * All audio playback happens in Rust (audio_player / start_interaction). * This module only manages WAV files on disk. */ import { execSync } from 'node:child_process'; import fs from 'node:fs'; import path from 'node:path'; import { Buffer } from 'node:buffer'; import { sendProxyCommand, isProxyReady } from '../proxybridge.ts'; // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- /** A cached prompt — just a WAV file path and metadata. */ export interface ICachedPrompt { /** Unique prompt identifier. */ id: string; /** Path to the WAV file on disk. */ wavPath: string; /** Total duration in milliseconds (approximate, from WAV header). */ durationMs: number; } // --------------------------------------------------------------------------- // TTS helpers // --------------------------------------------------------------------------- const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts'); /** Check if espeak-ng is available. */ function isEspeakAvailable(): boolean { try { execSync('which espeak-ng', { stdio: 'pipe' }); return true; } catch { return false; } } /** Generate WAV via espeak-ng. */ function generateViaEspeak(wavPath: string, text: string): boolean { try { execSync( `espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`, { timeout: 10000, stdio: 'pipe' }, ); return true; } catch { return false; } } /** Generate WAV via Kokoro TTS (runs inside proxy-engine). */ async function generateViaKokoro(wavPath: string, text: string, voice: string): Promise { const modelPath = path.join(TTS_DIR, 'kokoro-v1.0.onnx'); const voicesPath = path.join(TTS_DIR, 'voices.bin'); if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) return false; if (!isProxyReady()) return false; try { await sendProxyCommand('generate_tts', { model: modelPath, voices: voicesPath, voice, text, output: wavPath, }); return true; } catch { return false; } } /** Read a WAV file's duration from its header. */ function getWavDurationMs(wavPath: string): number { try { const wav = fs.readFileSync(wavPath); if (wav.length < 44) return 0; if (wav.toString('ascii', 0, 4) !== 'RIFF') return 0; let sampleRate = 16000; let dataSize = 0; let bitsPerSample = 16; let channels = 1; let offset = 12; while (offset < wav.length - 8) { const chunkId = wav.toString('ascii', offset, offset + 4); const chunkSize = wav.readUInt32LE(offset + 4); if (chunkId === 'fmt ') { channels = wav.readUInt16LE(offset + 10); sampleRate = wav.readUInt32LE(offset + 12); bitsPerSample = wav.readUInt16LE(offset + 22); } if (chunkId === 'data') { dataSize = chunkSize; } offset += 8 + chunkSize; if (offset % 2 !== 0) offset++; } const bytesPerSample = (bitsPerSample / 8) * channels; const totalSamples = bytesPerSample > 0 ? dataSize / bytesPerSample : 0; return sampleRate > 0 ? Math.round((totalSamples / sampleRate) * 1000) : 0; } catch { return 0; } } // --------------------------------------------------------------------------- // PromptCache // --------------------------------------------------------------------------- export class PromptCache { private prompts = new Map(); private log: (msg: string) => void; private espeakAvailable: boolean | null = null; constructor(log: (msg: string) => void) { this.log = log; } // ------------------------------------------------------------------------- // Public API // ------------------------------------------------------------------------- /** Get a cached prompt by ID. */ get(id: string): ICachedPrompt | null { return this.prompts.get(id) ?? null; } /** Check if a prompt is cached. */ has(id: string): boolean { return this.prompts.has(id); } /** List all cached prompt IDs. */ listIds(): string[] { return [...this.prompts.keys()]; } /** * Generate a TTS prompt WAV and cache its path. * Uses espeak-ng (primary) or Kokoro (fallback). */ async generatePrompt(id: string, text: string, voice = 'af_bella'): Promise { fs.mkdirSync(TTS_DIR, { recursive: true }); const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`); // Check espeak availability once. if (this.espeakAvailable === null) { this.espeakAvailable = isEspeakAvailable(); } // Generate WAV if not already on disk. if (!fs.existsSync(wavPath)) { let generated = false; if (this.espeakAvailable) { generated = generateViaEspeak(wavPath, text); } if (!generated) { generated = await generateViaKokoro(wavPath, text, voice); } if (!generated) { this.log(`[prompt-cache] failed to generate TTS for "${id}"`); return null; } this.log(`[prompt-cache] generated WAV for "${id}"`); } return this.registerWav(id, wavPath); } /** * Load a pre-existing WAV file as a prompt. */ async loadWavPrompt(id: string, wavPath: string): Promise { if (!fs.existsSync(wavPath)) { this.log(`[prompt-cache] WAV not found: ${wavPath}`); return null; } return this.registerWav(id, wavPath); } /** * Generate a beep tone WAV and cache it. */ async generateBeep( id: string, freqHz = 1000, durationMs = 500, amplitude = 8000, ): Promise { fs.mkdirSync(TTS_DIR, { recursive: true }); const wavPath = path.join(TTS_DIR, `prompt-${id}.wav`); if (!fs.existsSync(wavPath)) { // Generate 16kHz 16-bit mono sine wave WAV. const sampleRate = 16000; const totalSamples = Math.floor((sampleRate * durationMs) / 1000); const pcm = Buffer.alloc(totalSamples * 2); for (let i = 0; i < totalSamples; i++) { const t = i / sampleRate; const fadeLen = Math.floor(sampleRate * 0.01); // 10ms fade let envelope = 1.0; if (i < fadeLen) envelope = i / fadeLen; else if (i > totalSamples - fadeLen) envelope = (totalSamples - i) / fadeLen; const sample = Math.round(Math.sin(2 * Math.PI * freqHz * t) * amplitude * envelope); pcm.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2); } // Write WAV file. const headerSize = 44; const dataSize = pcm.length; const wav = Buffer.alloc(headerSize + dataSize); // RIFF header wav.write('RIFF', 0); wav.writeUInt32LE(36 + dataSize, 4); wav.write('WAVE', 8); // fmt chunk wav.write('fmt ', 12); wav.writeUInt32LE(16, 16); // chunk size wav.writeUInt16LE(1, 20); // PCM format wav.writeUInt16LE(1, 22); // mono wav.writeUInt32LE(sampleRate, 24); wav.writeUInt32LE(sampleRate * 2, 28); // byte rate wav.writeUInt16LE(2, 32); // block align wav.writeUInt16LE(16, 34); // bits per sample // data chunk wav.write('data', 36); wav.writeUInt32LE(dataSize, 40); pcm.copy(wav, 44); fs.writeFileSync(wavPath, wav); this.log(`[prompt-cache] beep WAV generated for "${id}"`); } return this.registerWav(id, wavPath); } /** Remove a prompt from the cache. */ remove(id: string): void { this.prompts.delete(id); } /** Clear all cached prompts. */ clear(): void { this.prompts.clear(); } // ------------------------------------------------------------------------- // Internal // ------------------------------------------------------------------------- private registerWav(id: string, wavPath: string): ICachedPrompt { const durationMs = getWavDurationMs(wavPath); const prompt: ICachedPrompt = { id, wavPath, durationMs }; this.prompts.set(id, prompt); this.log(`[prompt-cache] cached "${id}": ${wavPath} (${(durationMs / 1000).toFixed(1)}s)`); return prompt; } }