Files
siprouter/ts/call/audio-recorder.ts

324 lines
10 KiB
TypeScript
Raw Normal View History

/**
* Audio recorder captures RTP packets from a single direction,
* decodes them to PCM, and writes a WAV file.
*
* Uses the Rust codec bridge to transcode incoming audio (G.722, Opus,
* PCMU, PCMA) to PCMU, then decodes mu-law to 16-bit PCM in TypeScript.
* Output: 8kHz 16-bit mono WAV (standard telephony quality).
*
* Supports:
* - Max recording duration limit
* - Silence detection (stop after N seconds of silence)
* - Manual stop
* - DTMF packets (PT 101) are automatically skipped
*/
import { Buffer } from 'node:buffer';
import fs from 'node:fs';
import path from 'node:path';
import { WavWriter } from './wav-writer.ts';
import type { IWavWriterResult } from './wav-writer.ts';
import { transcode, createSession, destroySession } from '../opusbridge.ts';
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface IRecordingOptions {
/** Output directory for WAV files. */
outputDir: string;
/** Target sample rate for the WAV output (default 8000). */
sampleRate?: number;
/** Maximum recording duration in seconds. 0 = unlimited. Default 120. */
maxDurationSec?: number;
/** Stop after this many consecutive seconds of silence. 0 = disabled. Default 5. */
silenceTimeoutSec?: number;
/** Silence threshold: max PCM amplitude below this is "silent". Default 200. */
silenceThreshold?: number;
/** Logging function. */
log: (msg: string) => void;
}
export interface IRecordingResult {
/** Full path to the WAV file. */
filePath: string;
/** Duration in milliseconds. */
durationMs: number;
/** Sample rate of the WAV. */
sampleRate: number;
/** Size of the WAV file in bytes. */
fileSize: number;
/** Why the recording was stopped. */
stopReason: TRecordingStopReason;
}
export type TRecordingStopReason = 'manual' | 'max-duration' | 'silence' | 'cancelled';
// ---------------------------------------------------------------------------
// Mu-law decode table (ITU-T G.711)
// ---------------------------------------------------------------------------
/** Pre-computed mu-law → 16-bit linear PCM lookup table (256 entries). */
const MULAW_DECODE: Int16Array = buildMulawDecodeTable();
function buildMulawDecodeTable(): Int16Array {
const table = new Int16Array(256);
for (let i = 0; i < 256; i++) {
// Invert all bits per mu-law standard.
let mu = ~i & 0xff;
const sign = mu & 0x80;
const exponent = (mu >> 4) & 0x07;
const mantissa = mu & 0x0f;
let magnitude = ((mantissa << 1) + 33) << (exponent + 2);
magnitude -= 0x84; // Bias adjustment
table[i] = sign ? -magnitude : magnitude;
}
return table;
}
/** Decode a PCMU payload to 16-bit LE PCM. */
function decodeMulaw(mulaw: Buffer): Buffer {
const pcm = Buffer.alloc(mulaw.length * 2);
for (let i = 0; i < mulaw.length; i++) {
pcm.writeInt16LE(MULAW_DECODE[mulaw[i]], i * 2);
}
return pcm;
}
// ---------------------------------------------------------------------------
// AudioRecorder
// ---------------------------------------------------------------------------
export class AudioRecorder {
/** Current state. */
state: 'idle' | 'recording' | 'stopped' = 'idle';
/** Called when recording stops automatically (silence or max duration). */
onStopped: ((result: IRecordingResult) => void) | null = null;
private outputDir: string;
private sampleRate: number;
private maxDurationSec: number;
private silenceTimeoutSec: number;
private silenceThreshold: number;
private log: (msg: string) => void;
private wavWriter: WavWriter | null = null;
private filePath: string = '';
private codecSessionId: string | null = null;
private stopReason: TRecordingStopReason = 'manual';
// Silence detection.
private consecutiveSilentFrames = 0;
/** Number of 20ms frames that constitute silence timeout. */
private silenceFrameThreshold = 0;
// Max duration timer.
private maxDurationTimer: ReturnType<typeof setTimeout> | null = null;
// Processing queue to avoid concurrent transcodes.
private processQueue: Promise<void> = Promise.resolve();
constructor(options: IRecordingOptions) {
this.outputDir = options.outputDir;
this.sampleRate = options.sampleRate ?? 8000;
this.maxDurationSec = options.maxDurationSec ?? 120;
this.silenceTimeoutSec = options.silenceTimeoutSec ?? 5;
this.silenceThreshold = options.silenceThreshold ?? 200;
this.log = options.log;
}
/**
* Start recording. Creates the output directory, WAV file, and codec session.
* @param fileId - unique ID for the recording file name
*/
async start(fileId?: string): Promise<void> {
if (this.state !== 'idle') return;
// Ensure output directory exists.
if (!fs.existsSync(this.outputDir)) {
fs.mkdirSync(this.outputDir, { recursive: true });
}
// Generate file path.
const id = fileId ?? `rec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
this.filePath = path.join(this.outputDir, `${id}.wav`);
// Create a codec session for isolated decoding.
this.codecSessionId = `recorder-${id}`;
await createSession(this.codecSessionId);
// Open WAV writer.
this.wavWriter = new WavWriter({
filePath: this.filePath,
sampleRate: this.sampleRate,
});
this.wavWriter.open();
// Silence detection threshold: frames in timeout period.
this.silenceFrameThreshold = this.silenceTimeoutSec > 0
? Math.ceil((this.silenceTimeoutSec * 1000) / 20)
: 0;
this.consecutiveSilentFrames = 0;
// Max duration timer.
if (this.maxDurationSec > 0) {
this.maxDurationTimer = setTimeout(() => {
if (this.state === 'recording') {
this.stopReason = 'max-duration';
this.log(`[recorder] max duration reached (${this.maxDurationSec}s)`);
this.stop().then((result) => this.onStopped?.(result));
}
}, this.maxDurationSec * 1000);
}
this.state = 'recording';
this.stopReason = 'manual';
this.log(`[recorder] started → ${this.filePath}`);
}
/**
* Feed an RTP packet. Strips the 12-byte header, transcodes the payload
* to PCMU via the Rust bridge, decodes to PCM, and writes to WAV.
* Skips telephone-event (DTMF) and comfort noise packets.
*/
processRtp(data: Buffer): void {
if (this.state !== 'recording') return;
if (data.length < 13) return; // too short
const pt = data[1] & 0x7f;
// Skip DTMF (telephone-event) and comfort noise.
if (pt === 101 || pt === 13) return;
const payload = data.subarray(12);
if (payload.length === 0) return;
// Queue processing to avoid concurrent transcodes corrupting codec state.
this.processQueue = this.processQueue.then(() => this.decodeAndWrite(payload, pt));
}
/** Decode a single RTP payload to PCM and write to WAV. */
private async decodeAndWrite(payload: Buffer, pt: number): Promise<void> {
if (this.state !== 'recording' || !this.wavWriter) return;
let pcm: Buffer;
if (pt === 0) {
// PCMU: decode directly in TypeScript (no Rust round-trip needed).
pcm = decodeMulaw(payload);
} else {
// All other codecs: transcode to PCMU via Rust, then decode mu-law.
const mulaw = await transcode(payload, pt, 0, this.codecSessionId ?? undefined);
if (!mulaw) return;
pcm = decodeMulaw(mulaw);
}
// Silence detection.
if (this.silenceFrameThreshold > 0) {
if (isSilent(pcm, this.silenceThreshold)) {
this.consecutiveSilentFrames++;
if (this.consecutiveSilentFrames >= this.silenceFrameThreshold) {
this.stopReason = 'silence';
this.log(`[recorder] silence detected (${this.silenceTimeoutSec}s)`);
this.stop().then((result) => this.onStopped?.(result));
return;
}
} else {
this.consecutiveSilentFrames = 0;
}
}
this.wavWriter.write(pcm);
}
/**
* Stop recording and finalize the WAV file.
*/
async stop(): Promise<IRecordingResult> {
if (this.state === 'stopped' || this.state === 'idle') {
return {
filePath: this.filePath,
durationMs: 0,
sampleRate: this.sampleRate,
fileSize: 0,
stopReason: this.stopReason,
};
}
this.state = 'stopped';
// Wait for pending decode operations to finish.
await this.processQueue;
// Clear timers.
if (this.maxDurationTimer) {
clearTimeout(this.maxDurationTimer);
this.maxDurationTimer = null;
}
// Finalize WAV.
let wavResult: IWavWriterResult | null = null;
if (this.wavWriter) {
wavResult = this.wavWriter.close();
this.wavWriter = null;
}
// Destroy codec session.
if (this.codecSessionId) {
await destroySession(this.codecSessionId);
this.codecSessionId = null;
}
const result: IRecordingResult = {
filePath: this.filePath,
durationMs: wavResult?.durationMs ?? 0,
sampleRate: this.sampleRate,
fileSize: wavResult?.fileSize ?? 0,
stopReason: this.stopReason,
};
this.log(`[recorder] stopped (${result.stopReason}): ${result.durationMs}ms → ${this.filePath}`);
return result;
}
/** Cancel recording — stops and deletes the WAV file. */
async cancel(): Promise<void> {
this.stopReason = 'cancelled';
await this.stop();
// Delete the incomplete file.
try {
if (fs.existsSync(this.filePath)) {
fs.unlinkSync(this.filePath);
this.log(`[recorder] cancelled — deleted ${this.filePath}`);
}
} catch { /* best effort */ }
}
/** Clean up all resources. */
destroy(): void {
if (this.state === 'recording') {
this.cancel();
}
this.onStopped = null;
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** Check if a PCM buffer is "silent" (max amplitude below threshold). */
function isSilent(pcm: Buffer, threshold: number): boolean {
let maxAmp = 0;
for (let i = 0; i < pcm.length - 1; i += 2) {
const sample = pcm.readInt16LE(i);
const abs = sample < 0 ? -sample : sample;
if (abs > maxAmp) maxAmp = abs;
// Early exit: already above threshold.
if (maxAmp >= threshold) return false;
}
return true;
}