/** * TTS announcement module — generates announcement WAV files at startup. * * Engine priority: espeak-ng (formant TTS, fast) → Kokoro neural TTS via * proxy-engine → disabled. * * The generated WAV is left on disk for Rust's audio_player / start_interaction * to play during calls. No encoding or RTP playback happens in TypeScript. */ import { execSync } from 'node:child_process'; import fs from 'node:fs'; import path from 'node:path'; import { sendProxyCommand, isProxyReady } from './proxybridge.ts'; // --------------------------------------------------------------------------- // State // --------------------------------------------------------------------------- const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts'); const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now."; const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav'); // Kokoro fallback constants. const KOKORO_MODEL = 'kokoro-v1.0.onnx'; const KOKORO_VOICES = 'voices.bin'; const KOKORO_VOICE = 'af_bella'; // --------------------------------------------------------------------------- // TTS generators // --------------------------------------------------------------------------- /** Check if espeak-ng is available on the system. */ function isEspeakAvailable(): boolean { try { execSync('which espeak-ng', { stdio: 'pipe' }); return true; } catch { return false; } } /** Generate announcement WAV via espeak-ng (primary engine). */ function generateViaEspeak(wavPath: string, text: string, log: (msg: string) => void): boolean { log('[tts] generating announcement audio via espeak-ng...'); try { execSync( `espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`, { timeout: 10000, stdio: 'pipe' }, ); log('[tts] espeak-ng WAV generated'); return true; } catch (e: any) { log(`[tts] espeak-ng failed: ${e.message}`); return false; } } /** Generate announcement WAV via Kokoro TTS (fallback, runs inside proxy-engine). */ async function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): Promise { const modelPath = path.join(TTS_DIR, KOKORO_MODEL); const voicesPath = path.join(TTS_DIR, KOKORO_VOICES); if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) { log('[tts] Kokoro model/voices not found — Kokoro fallback unavailable'); return false; } if (!isProxyReady()) { log('[tts] proxy-engine not ready — Kokoro fallback unavailable'); return false; } log('[tts] generating announcement audio via Kokoro TTS (fallback)...'); try { await sendProxyCommand('generate_tts', { model: modelPath, voices: voicesPath, voice: KOKORO_VOICE, text, output: wavPath, }); log('[tts] Kokoro WAV generated (via proxy-engine)'); return true; } catch (e: any) { log(`[tts] Kokoro failed: ${e.message}`); return false; } } // --------------------------------------------------------------------------- // Initialization // --------------------------------------------------------------------------- /** * Pre-generate the announcement WAV file. * Must be called after the proxy engine is initialized. * * Engine priority: espeak-ng → Kokoro → disabled. */ export async function initAnnouncement(log: (msg: string) => void): Promise { fs.mkdirSync(TTS_DIR, { recursive: true }); try { if (!fs.existsSync(CACHE_WAV)) { let generated = false; // Try espeak-ng first. if (isEspeakAvailable()) { generated = generateViaEspeak(CACHE_WAV, ANNOUNCEMENT_TEXT, log); } else { log('[tts] espeak-ng not installed — trying Kokoro fallback'); } // Fall back to Kokoro (via proxy-engine). if (!generated) { generated = await generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log); } if (!generated) { log('[tts] no TTS engine available — announcements disabled'); return false; } } log('[tts] announcement WAV ready'); return true; } catch (e: any) { log(`[tts] init error: ${e.message}`); return false; } } /** Get the path to the cached announcement WAV, or null if not generated. */ export function getAnnouncementWavPath(): string | null { return fs.existsSync(CACHE_WAV) ? CACHE_WAV : null; }