324 lines
10 KiB
TypeScript
324 lines
10 KiB
TypeScript
/**
|
|
* Audio recorder — captures RTP packets from a single direction,
|
|
* decodes them to PCM, and writes a WAV file.
|
|
*
|
|
* Uses the Rust codec bridge to transcode incoming audio (G.722, Opus,
|
|
* PCMU, PCMA) to PCMU, then decodes mu-law to 16-bit PCM in TypeScript.
|
|
* Output: 8kHz 16-bit mono WAV (standard telephony quality).
|
|
*
|
|
* Supports:
|
|
* - Max recording duration limit
|
|
* - Silence detection (stop after N seconds of silence)
|
|
* - Manual stop
|
|
* - DTMF packets (PT 101) are automatically skipped
|
|
*/
|
|
|
|
import { Buffer } from 'node:buffer';
|
|
import fs from 'node:fs';
|
|
import path from 'node:path';
|
|
import { WavWriter } from './wav-writer.ts';
|
|
import type { IWavWriterResult } from './wav-writer.ts';
|
|
import { transcode, createSession, destroySession } from '../opusbridge.ts';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Types
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export interface IRecordingOptions {
|
|
/** Output directory for WAV files. */
|
|
outputDir: string;
|
|
/** Target sample rate for the WAV output (default 8000). */
|
|
sampleRate?: number;
|
|
/** Maximum recording duration in seconds. 0 = unlimited. Default 120. */
|
|
maxDurationSec?: number;
|
|
/** Stop after this many consecutive seconds of silence. 0 = disabled. Default 5. */
|
|
silenceTimeoutSec?: number;
|
|
/** Silence threshold: max PCM amplitude below this is "silent". Default 200. */
|
|
silenceThreshold?: number;
|
|
/** Logging function. */
|
|
log: (msg: string) => void;
|
|
}
|
|
|
|
export interface IRecordingResult {
|
|
/** Full path to the WAV file. */
|
|
filePath: string;
|
|
/** Duration in milliseconds. */
|
|
durationMs: number;
|
|
/** Sample rate of the WAV. */
|
|
sampleRate: number;
|
|
/** Size of the WAV file in bytes. */
|
|
fileSize: number;
|
|
/** Why the recording was stopped. */
|
|
stopReason: TRecordingStopReason;
|
|
}
|
|
|
|
export type TRecordingStopReason = 'manual' | 'max-duration' | 'silence' | 'cancelled';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Mu-law decode table (ITU-T G.711)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/** Pre-computed mu-law → 16-bit linear PCM lookup table (256 entries). */
|
|
const MULAW_DECODE: Int16Array = buildMulawDecodeTable();
|
|
|
|
function buildMulawDecodeTable(): Int16Array {
|
|
const table = new Int16Array(256);
|
|
for (let i = 0; i < 256; i++) {
|
|
// Invert all bits per mu-law standard.
|
|
let mu = ~i & 0xff;
|
|
const sign = mu & 0x80;
|
|
const exponent = (mu >> 4) & 0x07;
|
|
const mantissa = mu & 0x0f;
|
|
let magnitude = ((mantissa << 1) + 33) << (exponent + 2);
|
|
magnitude -= 0x84; // Bias adjustment
|
|
table[i] = sign ? -magnitude : magnitude;
|
|
}
|
|
return table;
|
|
}
|
|
|
|
/** Decode a PCMU payload to 16-bit LE PCM. */
|
|
function decodeMulaw(mulaw: Buffer): Buffer {
|
|
const pcm = Buffer.alloc(mulaw.length * 2);
|
|
for (let i = 0; i < mulaw.length; i++) {
|
|
pcm.writeInt16LE(MULAW_DECODE[mulaw[i]], i * 2);
|
|
}
|
|
return pcm;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// AudioRecorder
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export class AudioRecorder {
|
|
/** Current state. */
|
|
state: 'idle' | 'recording' | 'stopped' = 'idle';
|
|
|
|
/** Called when recording stops automatically (silence or max duration). */
|
|
onStopped: ((result: IRecordingResult) => void) | null = null;
|
|
|
|
private outputDir: string;
|
|
private sampleRate: number;
|
|
private maxDurationSec: number;
|
|
private silenceTimeoutSec: number;
|
|
private silenceThreshold: number;
|
|
private log: (msg: string) => void;
|
|
|
|
private wavWriter: WavWriter | null = null;
|
|
private filePath: string = '';
|
|
private codecSessionId: string | null = null;
|
|
private stopReason: TRecordingStopReason = 'manual';
|
|
|
|
// Silence detection.
|
|
private consecutiveSilentFrames = 0;
|
|
/** Number of 20ms frames that constitute silence timeout. */
|
|
private silenceFrameThreshold = 0;
|
|
|
|
// Max duration timer.
|
|
private maxDurationTimer: ReturnType<typeof setTimeout> | null = null;
|
|
|
|
// Processing queue to avoid concurrent transcodes.
|
|
private processQueue: Promise<void> = Promise.resolve();
|
|
|
|
constructor(options: IRecordingOptions) {
|
|
this.outputDir = options.outputDir;
|
|
this.sampleRate = options.sampleRate ?? 8000;
|
|
this.maxDurationSec = options.maxDurationSec ?? 120;
|
|
this.silenceTimeoutSec = options.silenceTimeoutSec ?? 5;
|
|
this.silenceThreshold = options.silenceThreshold ?? 200;
|
|
this.log = options.log;
|
|
}
|
|
|
|
/**
|
|
* Start recording. Creates the output directory, WAV file, and codec session.
|
|
* @param fileId - unique ID for the recording file name
|
|
*/
|
|
async start(fileId?: string): Promise<void> {
|
|
if (this.state !== 'idle') return;
|
|
|
|
// Ensure output directory exists.
|
|
if (!fs.existsSync(this.outputDir)) {
|
|
fs.mkdirSync(this.outputDir, { recursive: true });
|
|
}
|
|
|
|
// Generate file path.
|
|
const id = fileId ?? `rec-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
this.filePath = path.join(this.outputDir, `${id}.wav`);
|
|
|
|
// Create a codec session for isolated decoding.
|
|
this.codecSessionId = `recorder-${id}`;
|
|
await createSession(this.codecSessionId);
|
|
|
|
// Open WAV writer.
|
|
this.wavWriter = new WavWriter({
|
|
filePath: this.filePath,
|
|
sampleRate: this.sampleRate,
|
|
});
|
|
this.wavWriter.open();
|
|
|
|
// Silence detection threshold: frames in timeout period.
|
|
this.silenceFrameThreshold = this.silenceTimeoutSec > 0
|
|
? Math.ceil((this.silenceTimeoutSec * 1000) / 20)
|
|
: 0;
|
|
this.consecutiveSilentFrames = 0;
|
|
|
|
// Max duration timer.
|
|
if (this.maxDurationSec > 0) {
|
|
this.maxDurationTimer = setTimeout(() => {
|
|
if (this.state === 'recording') {
|
|
this.stopReason = 'max-duration';
|
|
this.log(`[recorder] max duration reached (${this.maxDurationSec}s)`);
|
|
this.stop().then((result) => this.onStopped?.(result));
|
|
}
|
|
}, this.maxDurationSec * 1000);
|
|
}
|
|
|
|
this.state = 'recording';
|
|
this.stopReason = 'manual';
|
|
this.log(`[recorder] started → ${this.filePath}`);
|
|
}
|
|
|
|
/**
|
|
* Feed an RTP packet. Strips the 12-byte header, transcodes the payload
|
|
* to PCMU via the Rust bridge, decodes to PCM, and writes to WAV.
|
|
* Skips telephone-event (DTMF) and comfort noise packets.
|
|
*/
|
|
processRtp(data: Buffer): void {
|
|
if (this.state !== 'recording') return;
|
|
if (data.length < 13) return; // too short
|
|
|
|
const pt = data[1] & 0x7f;
|
|
|
|
// Skip DTMF (telephone-event) and comfort noise.
|
|
if (pt === 101 || pt === 13) return;
|
|
|
|
const payload = data.subarray(12);
|
|
if (payload.length === 0) return;
|
|
|
|
// Queue processing to avoid concurrent transcodes corrupting codec state.
|
|
this.processQueue = this.processQueue.then(() => this.decodeAndWrite(payload, pt));
|
|
}
|
|
|
|
/** Decode a single RTP payload to PCM and write to WAV. */
|
|
private async decodeAndWrite(payload: Buffer, pt: number): Promise<void> {
|
|
if (this.state !== 'recording' || !this.wavWriter) return;
|
|
|
|
let pcm: Buffer;
|
|
|
|
if (pt === 0) {
|
|
// PCMU: decode directly in TypeScript (no Rust round-trip needed).
|
|
pcm = decodeMulaw(payload);
|
|
} else {
|
|
// All other codecs: transcode to PCMU via Rust, then decode mu-law.
|
|
const mulaw = await transcode(payload, pt, 0, this.codecSessionId ?? undefined);
|
|
if (!mulaw) return;
|
|
pcm = decodeMulaw(mulaw);
|
|
}
|
|
|
|
// Silence detection.
|
|
if (this.silenceFrameThreshold > 0) {
|
|
if (isSilent(pcm, this.silenceThreshold)) {
|
|
this.consecutiveSilentFrames++;
|
|
if (this.consecutiveSilentFrames >= this.silenceFrameThreshold) {
|
|
this.stopReason = 'silence';
|
|
this.log(`[recorder] silence detected (${this.silenceTimeoutSec}s)`);
|
|
this.stop().then((result) => this.onStopped?.(result));
|
|
return;
|
|
}
|
|
} else {
|
|
this.consecutiveSilentFrames = 0;
|
|
}
|
|
}
|
|
|
|
this.wavWriter.write(pcm);
|
|
}
|
|
|
|
/**
|
|
* Stop recording and finalize the WAV file.
|
|
*/
|
|
async stop(): Promise<IRecordingResult> {
|
|
if (this.state === 'stopped' || this.state === 'idle') {
|
|
return {
|
|
filePath: this.filePath,
|
|
durationMs: 0,
|
|
sampleRate: this.sampleRate,
|
|
fileSize: 0,
|
|
stopReason: this.stopReason,
|
|
};
|
|
}
|
|
|
|
this.state = 'stopped';
|
|
|
|
// Wait for pending decode operations to finish.
|
|
await this.processQueue;
|
|
|
|
// Clear timers.
|
|
if (this.maxDurationTimer) {
|
|
clearTimeout(this.maxDurationTimer);
|
|
this.maxDurationTimer = null;
|
|
}
|
|
|
|
// Finalize WAV.
|
|
let wavResult: IWavWriterResult | null = null;
|
|
if (this.wavWriter) {
|
|
wavResult = this.wavWriter.close();
|
|
this.wavWriter = null;
|
|
}
|
|
|
|
// Destroy codec session.
|
|
if (this.codecSessionId) {
|
|
await destroySession(this.codecSessionId);
|
|
this.codecSessionId = null;
|
|
}
|
|
|
|
const result: IRecordingResult = {
|
|
filePath: this.filePath,
|
|
durationMs: wavResult?.durationMs ?? 0,
|
|
sampleRate: this.sampleRate,
|
|
fileSize: wavResult?.fileSize ?? 0,
|
|
stopReason: this.stopReason,
|
|
};
|
|
|
|
this.log(`[recorder] stopped (${result.stopReason}): ${result.durationMs}ms → ${this.filePath}`);
|
|
return result;
|
|
}
|
|
|
|
/** Cancel recording — stops and deletes the WAV file. */
|
|
async cancel(): Promise<void> {
|
|
this.stopReason = 'cancelled';
|
|
await this.stop();
|
|
|
|
// Delete the incomplete file.
|
|
try {
|
|
if (fs.existsSync(this.filePath)) {
|
|
fs.unlinkSync(this.filePath);
|
|
this.log(`[recorder] cancelled — deleted ${this.filePath}`);
|
|
}
|
|
} catch { /* best effort */ }
|
|
}
|
|
|
|
/** Clean up all resources. */
|
|
destroy(): void {
|
|
if (this.state === 'recording') {
|
|
this.cancel();
|
|
}
|
|
this.onStopped = null;
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/** Check if a PCM buffer is "silent" (max amplitude below threshold). */
|
|
function isSilent(pcm: Buffer, threshold: number): boolean {
|
|
let maxAmp = 0;
|
|
for (let i = 0; i < pcm.length - 1; i += 2) {
|
|
const sample = pcm.readInt16LE(i);
|
|
const abs = sample < 0 ? -sample : sample;
|
|
if (abs > maxAmp) maxAmp = abs;
|
|
// Early exit: already above threshold.
|
|
if (maxAmp >= threshold) return false;
|
|
}
|
|
return true;
|
|
}
|