initial commit — SIP B2BUA + WebRTC bridge with Rust codec engine

Full-featured SIP router with multi-provider trunking, browser softphone via WebRTC, real-time Opus/G.722/PCM transcoding in Rust, RNNoise ML noise suppression, Kokoro neural TTS announcements, and a Lit-based web dashboard with live call monitoring and REST API.
2026-04-09 23:03:55 +00:00
commit f3e1c96872
59 changed files with 18377 additions and 0 deletions
--- a/ts/announcement.ts
+++ b/ts/announcement.ts
@@ -0,0 +1,261 @@
+/**
+ * TTS announcement module — pre-generates audio announcements using Kokoro TTS
+ * and caches them as encoded RTP packets for playback during call setup.
+ *
+ * On startup, generates the announcement WAV via the Rust tts-engine binary
+ * (Kokoro neural TTS), encodes each 20ms frame to G.722 (for SIP) and Opus
+ * (for WebRTC) via the Rust transcoder, and caches the packets.
+ */
+
+import { execSync } from 'node:child_process';
+import fs from 'node:fs';
+import path from 'node:path';
+import { Buffer } from 'node:buffer';
+import { buildRtpHeader, rtpClockIncrement } from './call/leg.ts';
+import { encodePcm, isCodecReady } from './opusbridge.ts';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+/** A pre-encoded announcement ready for RTP playback. */
+export interface IAnnouncementCache {
+  /** G.722 encoded frames (each is a 20ms frame payload, no RTP header). */
+  g722Frames: Buffer[];
+  /** Opus encoded frames for WebRTC playback. */
+  opusFrames: Buffer[];
+  /** Total duration in milliseconds. */
+  durationMs: number;
+}
+
+// ---------------------------------------------------------------------------
+// State
+// ---------------------------------------------------------------------------
+
+let cachedAnnouncement: IAnnouncementCache | null = null;
+
+const TTS_DIR = path.join(process.cwd(), '.nogit', 'tts');
+const KOKORO_MODEL = 'kokoro-v1.0.onnx';
+const KOKORO_VOICES = 'voices.bin';
+const KOKORO_VOICE = 'af_bella'; // American female, clear and natural
+const ANNOUNCEMENT_TEXT = "Hello. I'm connecting your call now.";
+const CACHE_WAV = path.join(TTS_DIR, 'announcement.wav');
+
+// ---------------------------------------------------------------------------
+// Initialization
+// ---------------------------------------------------------------------------
+
+/**
+ * Pre-generate the announcement audio and encode to G.722 frames.
+ * Must be called after the codec bridge is initialized.
+ */
+export async function initAnnouncement(log: (msg: string) => void): Promise<boolean> {
+  const modelPath = path.join(TTS_DIR, KOKORO_MODEL);
+  const voicesPath = path.join(TTS_DIR, KOKORO_VOICES);
+
+  // Check if Kokoro model files exist.
+  if (!fs.existsSync(modelPath)) {
+    log('[tts] Kokoro model not found at ' + modelPath + ' — announcements disabled');
+    return false;
+  }
+  if (!fs.existsSync(voicesPath)) {
+    log('[tts] Kokoro voices not found at ' + voicesPath + ' — announcements disabled');
+    return false;
+  }
+
+  // Find tts-engine binary.
+  const root = process.cwd();
+  const ttsBinPaths = [
+    path.join(root, 'dist_rust', 'tts-engine'),
+    path.join(root, 'rust', 'target', 'release', 'tts-engine'),
+    path.join(root, 'rust', 'target', 'debug', 'tts-engine'),
+  ];
+  const ttsBin = ttsBinPaths.find((p) => fs.existsSync(p));
+  if (!ttsBin) {
+    log('[tts] tts-engine binary not found — announcements disabled');
+    return false;
+  }
+
+  try {
+    // Generate WAV if not cached.
+    if (!fs.existsSync(CACHE_WAV)) {
+      log('[tts] generating announcement audio via Kokoro TTS...');
+      execSync(
+        `"${ttsBin}" --model "${modelPath}" --voices "${voicesPath}" --voice "${KOKORO_VOICE}" --output "${CACHE_WAV}" --text "${ANNOUNCEMENT_TEXT}"`,
+        { timeout: 120000, stdio: 'pipe' },
+      );
+      log('[tts] announcement WAV generated');
+    }
+
+    // Read WAV and extract raw PCM.
+    const wav = fs.readFileSync(CACHE_WAV);
+    const pcm = extractPcmFromWav(wav);
+    if (!pcm) {
+      log('[tts] failed to parse WAV file');
+      return false;
+    }
+
+    // Wait for codec bridge to be ready.
+    if (!isCodecReady()) {
+      log('[tts] codec bridge not ready — will retry');
+      return false;
+    }
+
+    // Kokoro outputs 24000 Hz, 16-bit mono.
+    // We encode in chunks: 20ms at 24000 Hz = 480 samples = 960 bytes of PCM.
+    // The Rust encoder will resample to 16kHz internally for G.722.
+    const SAMPLE_RATE = 24000;
+    const FRAME_SAMPLES = Math.floor(SAMPLE_RATE * 0.02); // 480 samples per 20ms
+    const FRAME_BYTES = FRAME_SAMPLES * 2; // 16-bit = 2 bytes per sample
+    const totalFrames = Math.floor(pcm.length / FRAME_BYTES);
+
+    const g722Frames: Buffer[] = [];
+    const opusFrames: Buffer[] = [];
+
+    log(`[tts] encoding ${totalFrames} frames (${FRAME_SAMPLES} samples/frame @ ${SAMPLE_RATE}Hz)...`);
+    for (let i = 0; i < totalFrames; i++) {
+      const framePcm = pcm.subarray(i * FRAME_BYTES, (i + 1) * FRAME_BYTES);
+      const pcmBuf = Buffer.from(framePcm);
+      const [g722, opus] = await Promise.all([
+        encodePcm(pcmBuf, SAMPLE_RATE, 9),   // G.722 for SIP devices
+        encodePcm(pcmBuf, SAMPLE_RATE, 111),  // Opus for WebRTC browsers
+      ]);
+      if (g722) g722Frames.push(g722);
+      if (opus) opusFrames.push(opus);
+      if (!g722 && !opus && i < 3) log(`[tts] frame ${i} encode failed`);
+    }
+
+    cachedAnnouncement = {
+      g722Frames,
+      opusFrames,
+      durationMs: totalFrames * 20,
+    };
+
+    log(`[tts] announcement cached: ${g722Frames.length} frames (${(totalFrames * 20 / 1000).toFixed(1)}s)`);
+    return true;
+  } catch (e: any) {
+    log(`[tts] init error: ${e.message}`);
+    return false;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Playback
+// ---------------------------------------------------------------------------
+
+/**
+ * Play the pre-cached announcement to an RTP endpoint.
+ *
+ * @param sendPacket - function to send a raw RTP packet
+ * @param ssrc - SSRC to use in RTP headers
+ * @param onDone - called when the announcement finishes
+ * @returns a cancel function, or null if no announcement is cached
+ */
+export function playAnnouncement(
+  sendPacket: (pkt: Buffer) => void,
+  ssrc: number,
+  onDone?: () => void,
+): (() => void) | null {
+  if (!cachedAnnouncement || cachedAnnouncement.g722Frames.length === 0) {
+    onDone?.();
+    return null;
+  }
+
+  const frames = cachedAnnouncement.g722Frames;
+  const PT = 9; // G.722
+  let frameIdx = 0;
+  let seq = Math.floor(Math.random() * 0xffff);
+  let rtpTs = Math.floor(Math.random() * 0xffffffff);
+
+  const timer = setInterval(() => {
+    if (frameIdx >= frames.length) {
+      clearInterval(timer);
+      onDone?.();
+      return;
+    }
+
+    const payload = frames[frameIdx];
+    const hdr = buildRtpHeader(PT, seq & 0xffff, rtpTs >>> 0, ssrc >>> 0, frameIdx === 0);
+    const pkt = Buffer.concat([hdr, payload]);
+    sendPacket(pkt);
+
+    seq++;
+    rtpTs += rtpClockIncrement(PT);
+    frameIdx++;
+  }, 20);
+
+  // Return cancel function.
+  return () => clearInterval(timer);
+}
+
+/**
+ * Play pre-cached Opus announcement to a WebRTC PeerConnection sender.
+ *
+ * @param sendRtpPacket - function to send a raw RTP packet via sender.sendRtp()
+ * @param ssrc - SSRC to use in RTP headers
+ * @param onDone - called when announcement finishes
+ * @returns cancel function, or null if no announcement cached
+ */
+export function playAnnouncementToWebRtc(
+  sendRtpPacket: (pkt: Buffer) => void,
+  ssrc: number,
+  counters: { seq: number; ts: number },
+  onDone?: () => void,
+): (() => void) | null {
+  if (!cachedAnnouncement || cachedAnnouncement.opusFrames.length === 0) {
+    onDone?.();
+    return null;
+  }
+
+  const frames = cachedAnnouncement.opusFrames;
+  const PT = 111; // Opus
+  let frameIdx = 0;
+
+  const timer = setInterval(() => {
+    if (frameIdx >= frames.length) {
+      clearInterval(timer);
+      onDone?.();
+      return;
+    }
+
+    const payload = frames[frameIdx];
+    const hdr = buildRtpHeader(PT, counters.seq & 0xffff, counters.ts >>> 0, ssrc >>> 0, frameIdx === 0);
+    const pkt = Buffer.concat([hdr, payload]);
+    sendRtpPacket(pkt);
+
+    counters.seq++;
+    counters.ts += 960; // Opus at 48kHz: 960 samples per 20ms
+    frameIdx++;
+  }, 20);
+
+  return () => clearInterval(timer);
+}
+
+/** Check if an announcement is cached and ready. */
+export function isAnnouncementReady(): boolean {
+  return cachedAnnouncement !== null && cachedAnnouncement.g722Frames.length > 0;
+}
+
+// ---------------------------------------------------------------------------
+// WAV parsing
+// ---------------------------------------------------------------------------
+
+function extractPcmFromWav(wav: Buffer): Buffer | null {
+  // Minimal WAV parser — find the "data" chunk.
+  if (wav.length < 44) return null;
+  if (wav.toString('ascii', 0, 4) !== 'RIFF') return null;
+  if (wav.toString('ascii', 8, 12) !== 'WAVE') return null;
+
+  let offset = 12;
+  while (offset < wav.length - 8) {
+    const chunkId = wav.toString('ascii', offset, offset + 4);
+    const chunkSize = wav.readUInt32LE(offset + 4);
+    if (chunkId === 'data') {
+      return wav.subarray(offset + 8, offset + 8 + chunkSize);
+    }
+    offset += 8 + chunkSize;
+    // Word-align.
+    if (offset % 2 !== 0) offset++;
+  }
+  return null;
+}