smartai/ts/provider.elevenlabs.ts

import * as plugins from './plugins.js';

import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
  ChatOptions,
  ChatResponse,
  ResearchOptions,
  ResearchResponse,
  ImageGenerateOptions,
  ImageEditOptions,
  ImageResponse
} from './abstract.classes.multimodal.js';

export interface IElevenLabsProviderOptions {
  elevenlabsToken: string;
  defaultVoiceId?: string;
  defaultModelId?: string;
}

export interface IElevenLabsVoiceSettings {
  stability?: number;
  similarity_boost?: number;
  style?: number;
  use_speaker_boost?: boolean;
}

export class ElevenLabsProvider extends MultiModalModel {
  private options: IElevenLabsProviderOptions;
  private baseUrl: string = 'https://api.elevenlabs.io/v1';

  constructor(optionsArg: IElevenLabsProviderOptions) {
    super();
    this.options = optionsArg;
  }

  public async start() {
    await super.start();
  }

  public async stop() {
    await super.stop();
  }

  public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
    throw new Error('ElevenLabs does not support chat functionality. This provider is specialized for text-to-speech only.');
  }

  public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
    throw new Error('ElevenLabs does not support chat streaming functionality. This provider is specialized for text-to-speech only.');
  }

  public async audio(optionsArg: {
    message: string;
    voiceId?: string;
    modelId?: string;
    voiceSettings?: IElevenLabsVoiceSettings;
  }): Promise<NodeJS.ReadableStream> {
    // Use Samara voice as default fallback
    const voiceId = optionsArg.voiceId || this.options.defaultVoiceId || '19STyYD15bswVz51nqLf';

    const modelId = optionsArg.modelId || this.options.defaultModelId || 'eleven_v3';

    const url = `${this.baseUrl}/text-to-speech/${voiceId}`;

    const requestBody: any = {
      text: optionsArg.message,
      model_id: modelId,
    };

    if (optionsArg.voiceSettings) {
      requestBody.voice_settings = optionsArg.voiceSettings;
    }

    const response = await plugins.smartrequest.SmartRequest.create()
      .url(url)
      .header('xi-api-key', this.options.elevenlabsToken)
      .json(requestBody)
      .autoDrain(false)
      .post();

    if (!response.ok) {
      const errorText = await response.text();
      throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText} - ${errorText}`);
    }

    const nodeStream = response.streamNode();
    return nodeStream;
  }

  public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
    throw new Error('ElevenLabs does not support vision functionality. This provider is specialized for text-to-speech only.');
  }

  public async document(optionsArg: {
    systemMessage: string;
    userMessage: string;
    pdfDocuments: Uint8Array[];
    messageHistory: any[];
  }): Promise<{ message: any }> {
    throw new Error('ElevenLabs does not support document processing. This provider is specialized for text-to-speech only.');
  }

  public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
    throw new Error('ElevenLabs does not support research capabilities. This provider is specialized for text-to-speech only.');
  }

  public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
    throw new Error('ElevenLabs does not support image generation. This provider is specialized for text-to-speech only.');
  }

  public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
    throw new Error('ElevenLabs does not support image editing. This provider is specialized for text-to-speech only.');
  }
}