138 lines
4.2 KiB
TypeScript
138 lines
4.2 KiB
TypeScript
/**
|
|
* TTS announcement module — generates announcement WAV files at startup.
|
|
*
|
|
* Engine priority: espeak-ng (formant TTS, fast) → Kokoro neural TTS via
|
|
* proxy-engine → disabled.
|
|
*
|
|
* The generated WAV is left on disk for Rust's audio_player / start_interaction
|
|
* to play during calls. No encoding or RTP playback happens in TypeScript.
|
|
*/
|
|
|
|
import { execSync } from 'node:child_process';
|
|
import fs from 'node:fs';
|
|
import path from 'node:path';
|
|
import { sendProxyCommand, isProxyReady } from './proxybridge.ts';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// State
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
|
|
const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now.";
|
|
const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav');
|
|
|
|
// Kokoro fallback constants.
|
|
const KOKORO_MODEL = 'kokoro-v1.0.onnx';
|
|
const KOKORO_VOICES = 'voices.bin';
|
|
const KOKORO_VOICE = 'af_bella';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// TTS generators
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/** Check if espeak-ng is available on the system. */
|
|
function isEspeakAvailable(): boolean {
|
|
try {
|
|
execSync('which espeak-ng', { stdio: 'pipe' });
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/** Generate announcement WAV via espeak-ng (primary engine). */
|
|
function generateViaEspeak(wavPath: string, text: string, log: (msg: string) => void): boolean {
|
|
log('[tts] generating announcement audio via espeak-ng...');
|
|
try {
|
|
execSync(
|
|
`espeak-ng -v en-us -s 150 -w "${wavPath}" "${text}"`,
|
|
{ timeout: 10000, stdio: 'pipe' },
|
|
);
|
|
log('[tts] espeak-ng WAV generated');
|
|
return true;
|
|
} catch (e: any) {
|
|
log(`[tts] espeak-ng failed: ${e.message}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/** Generate announcement WAV via Kokoro TTS (fallback, runs inside proxy-engine). */
|
|
async function generateViaKokoro(wavPath: string, text: string, log: (msg: string) => void): Promise<boolean> {
|
|
const modelPath = path.join(TTS_DIR, KOKORO_MODEL);
|
|
const voicesPath = path.join(TTS_DIR, KOKORO_VOICES);
|
|
|
|
if (!fs.existsSync(modelPath) || !fs.existsSync(voicesPath)) {
|
|
log('[tts] Kokoro model/voices not found — Kokoro fallback unavailable');
|
|
return false;
|
|
}
|
|
|
|
if (!isProxyReady()) {
|
|
log('[tts] proxy-engine not ready — Kokoro fallback unavailable');
|
|
return false;
|
|
}
|
|
|
|
log('[tts] generating announcement audio via Kokoro TTS (fallback)...');
|
|
try {
|
|
await sendProxyCommand('generate_tts', {
|
|
model: modelPath,
|
|
voices: voicesPath,
|
|
voice: KOKORO_VOICE,
|
|
text,
|
|
output: wavPath,
|
|
});
|
|
log('[tts] Kokoro WAV generated (via proxy-engine)');
|
|
return true;
|
|
} catch (e: any) {
|
|
log(`[tts] Kokoro failed: ${e.message}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Initialization
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Pre-generate the announcement WAV file.
|
|
* Must be called after the proxy engine is initialized.
|
|
*
|
|
* Engine priority: espeak-ng → Kokoro → disabled.
|
|
*/
|
|
export async function initAnnouncement(log: (msg: string) => void): Promise<boolean> {
|
|
fs.mkdirSync(TTS_DIR, { recursive: true });
|
|
|
|
try {
|
|
if (!fs.existsSync(CACHE_WAV)) {
|
|
let generated = false;
|
|
|
|
// Try espeak-ng first.
|
|
if (isEspeakAvailable()) {
|
|
generated = generateViaEspeak(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
|
} else {
|
|
log('[tts] espeak-ng not installed — trying Kokoro fallback');
|
|
}
|
|
|
|
// Fall back to Kokoro (via proxy-engine).
|
|
if (!generated) {
|
|
generated = await generateViaKokoro(CACHE_WAV, ANNOUNCEMENT_TEXT, log);
|
|
}
|
|
|
|
if (!generated) {
|
|
log('[tts] no TTS engine available — announcements disabled');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
log('[tts] announcement WAV ready');
|
|
return true;
|
|
} catch (e: any) {
|
|
log(`[tts] init error: ${e.message}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/** Get the path to the cached announcement WAV, or null if not generated. */
|
|
export function getAnnouncementWavPath(): string | null {
|
|
return fs.existsSync(CACHE_WAV) ? CACHE_WAV : null;
|
|
}
|