update

2025-10-08 22:49:08 +00:00
parent 3a24c2c4bd
commit 28bb13dc0c
8 changed files with 2155 additions and 695 deletions
--- a/package.json
+++ b/package.json
@@ -15,22 +15,23 @@
    "buildDocs": "(tsdoc)"
  },
  "devDependencies": {
-    "@git.zone/tsbuild": "^2.6.4",
+    "@git.zone/tsbuild": "^2.6.8",
    "@git.zone/tsbundle": "^2.5.1",
    "@git.zone/tsrun": "^1.3.3",
-    "@git.zone/tstest": "^2.3.2",
-    "@push.rocks/qenv": "^6.1.0",
+    "@git.zone/tstest": "^2.3.8",
+    "@push.rocks/qenv": "^6.1.3",
    "@push.rocks/tapbundle": "^6.0.3",
-    "@types/node": "^22.15.17"
+    "@types/node": "^22.15.17",
+    "typescript": "^5.9.3"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.59.0",
+    "@anthropic-ai/sdk": "^0.65.0",
    "@push.rocks/smartarray": "^1.1.0",
-    "@push.rocks/smartfile": "^11.2.5",
+    "@push.rocks/smartfile": "^11.2.7",
    "@push.rocks/smartpath": "^6.0.0",
    "@push.rocks/smartpdf": "^4.1.1",
    "@push.rocks/smartpromise": "^4.2.3",
-    "@push.rocks/smartrequest": "^4.2.1",
+    "@push.rocks/smartrequest": "^4.3.1",
    "@push.rocks/webstream": "^1.0.10",
    "openai": "^5.12.2"
  },
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/readme.md
+++ b/readme.md
@@ -5,7 +5,7 @@
 [![TypeScript](https://img.shields.io/badge/TypeScript-5.x-blue.svg)](https://www.typescriptlang.org/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)

-SmartAI unifies the world's leading AI providers - OpenAI, Anthropic, Perplexity, Ollama, Groq, XAI, and Exo - under a single, elegant TypeScript interface. Build AI applications at lightning speed without vendor lock-in. 
+SmartAI unifies the world's leading AI providers - OpenAI, Anthropic, Perplexity, Ollama, Groq, XAI, Exo, and ElevenLabs - under a single, elegant TypeScript interface. Build AI applications at lightning speed without vendor lock-in. 

 ## 🎯 Why SmartAI?

@@ -28,7 +28,11 @@ import { SmartAi } from '@push.rocks/smartai';
 // Initialize with your favorite providers
 const ai = new SmartAi({
  openaiToken: 'sk-...',
-  anthropicToken: 'sk-ant-...'
+  anthropicToken: 'sk-ant-...',
+  elevenlabsToken: 'sk-...',
+  elevenlabs: {
+    defaultVoiceId: '19STyYD15bswVz51nqLf' // Optional: Samara voice
+  }
 });

 await ai.start();
@@ -49,6 +53,7 @@ Choose the right provider for your use case:
 |----------|:----:|:---------:|:---:|:------:|:---------:|:--------:|:------:|------------|
 | **OpenAI** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | • gpt-image-1<br>• DALL-E 3<br>• Deep research API |
 | **Anthropic** | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | • Claude Sonnet 4.5<br>• Superior reasoning<br>• Web search API |
+| **ElevenLabs** | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | • Premium TTS<br>• 32 languages<br>• Natural voices |
 | **Ollama** | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | • 100% local<br>• Privacy-first<br>• No API costs |
 | **XAI** | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | • Grok models<br>• Real-time data<br>• Uncensored |
 | **Perplexity** | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | • Web-aware<br>• Research-focused<br>• Sonar Pro models |
@@ -105,13 +110,27 @@ while (true) {

 ### 🎙️ Text-to-Speech

-Generate natural voices with OpenAI:
+Generate natural voices with OpenAI or ElevenLabs:

 ```typescript
+// OpenAI TTS
 const audioStream = await ai.openaiProvider.audio({
  message: 'Welcome to the future of AI development!'
 });

+// ElevenLabs TTS - Premium quality, natural voices
+const elevenLabsAudio = await ai.elevenlabsProvider.audio({
+  message: 'Experience the most lifelike text to speech technology.',
+  voiceId: '19STyYD15bswVz51nqLf', // Optional: Samara voice
+  modelId: 'eleven_multilingual_v2', // Optional: defaults to eleven_multilingual_v2
+  voiceSettings: { // Optional: fine-tune voice characteristics
+    stability: 0.5,        // 0-1: Speech consistency
+    similarity_boost: 0.8, // 0-1: Voice similarity to original
+    style: 0.0,            // 0-1: Expressiveness (higher = more expressive)
+    use_speaker_boost: true // Enhanced clarity
+  }
+});
+
 // Stream directly to speakers
 audioStream.pipe(speakerOutput);

@@ -548,6 +567,7 @@ npm install @push.rocks/smartai
 export OPENAI_API_KEY=sk-...
 export ANTHROPIC_API_KEY=sk-ant-...
 export PERPLEXITY_API_KEY=pplx-...
+export ELEVENLABS_API_KEY=sk-...
 # ... etc
 ```

@@ -574,6 +594,7 @@ export PERPLEXITY_API_KEY=pplx-...
 | **Complex Reasoning** | Anthropic | Superior logical thinking, safer outputs |
 | **Research & Facts** | Perplexity | Web-aware, provides citations |
 | **Deep Research** | OpenAI | Deep Research API with comprehensive analysis |
+| **Premium TTS** | ElevenLabs | Most natural voices, 32 languages, superior quality |
 | **Speed Critical** | Groq | 10x faster inference, sub-second responses |
 | **Privacy Critical** | Ollama | 100% local, no data leaves your servers |
 | **Real-time Data** | XAI | Access to current information |
--- a/test/test.audio.elevenlabs.ts
+++ b/test/test.audio.elevenlabs.ts
@@ -0,0 +1,54 @@
+import { expect, tap } from '@push.rocks/tapbundle';
+import * as qenv from '@push.rocks/qenv';
+import * as smartfile from '@push.rocks/smartfile';
+
+const testQenv = new qenv.Qenv('./', './.nogit/');
+
+import * as smartai from '../ts/index.js';
+
+let testSmartai: smartai.SmartAi;
+
+tap.test('ElevenLabs Audio: should create a smartai instance with ElevenLabs provider', async () => {
+  testSmartai = new smartai.SmartAi({
+    elevenlabsToken: await testQenv.getEnvVarOnDemand('ELEVENLABS_TOKEN'),
+    elevenlabs: {
+      defaultVoiceId: '19STyYD15bswVz51nqLf',
+    },
+  });
+  await testSmartai.start();
+});
+
+tap.test('ElevenLabs Audio: should create audio response', async () => {
+  const audioStream = await testSmartai.elevenlabsProvider.audio({
+    message: 'Welcome to SmartAI, the unified interface for the world\'s leading artificial intelligence providers. SmartAI brings together OpenAI, Anthropic, Perplexity, and ElevenLabs under a single elegant TypeScript API. Whether you need text generation, vision analysis, document processing, or premium text-to-speech capabilities, SmartAI provides a consistent and powerful interface for all your AI needs. Build intelligent applications at lightning speed without vendor lock-in.',
+  });
+  const chunks: Uint8Array[] = [];
+  for await (const chunk of audioStream) {
+    chunks.push(chunk as Uint8Array);
+  }
+  const audioBuffer = Buffer.concat(chunks);
+  await smartfile.fs.toFs(audioBuffer, './.nogit/testoutput_elevenlabs.mp3');
+  console.log(`Audio Buffer length: ${audioBuffer.length}`);
+  expect(audioBuffer.length).toBeGreaterThan(0);
+});
+
+tap.test('ElevenLabs Audio: should create audio with custom voice', async () => {
+  const audioStream = await testSmartai.elevenlabsProvider.audio({
+    message: 'Testing with a different voice.',
+    voiceId: 'JBFqnCBsd6RMkjVDRZzb',
+  });
+  const chunks: Uint8Array[] = [];
+  for await (const chunk of audioStream) {
+    chunks.push(chunk as Uint8Array);
+  }
+  const audioBuffer = Buffer.concat(chunks);
+  await smartfile.fs.toFs(audioBuffer, './.nogit/testoutput_elevenlabs_custom.mp3');
+  console.log(`Audio Buffer length (custom voice): ${audioBuffer.length}`);
+  expect(audioBuffer.length).toBeGreaterThan(0);
+});
+
+tap.test('ElevenLabs Audio: should stop the smartai instance', async () => {
+  await testSmartai.stop();
+});
+
+export default tap.start();
--- a/ts/classes.conversation.ts
+++ b/ts/classes.conversation.ts
@@ -96,6 +96,18 @@ export class Conversation {
    return conversation;
  }

+  public static async createWithElevenlabs(smartaiRefArg: SmartAi) {
+    if (!smartaiRefArg.elevenlabsProvider) {
+      throw new Error('ElevenLabs provider not available');
+    }
+    const conversation = new Conversation(smartaiRefArg, {
+      processFunction: async (input) => {
+        return '' // TODO implement proper streaming
+      }
+    });
+    return conversation;
+  }
+
  // INSTANCE
  smartaiRef: SmartAi
  private systemMessage: string;
--- a/ts/classes.smartai.ts
+++ b/ts/classes.smartai.ts
@@ -1,6 +1,7 @@
 import { Conversation } from './classes.conversation.js';
 import * as plugins from './plugins.js';
 import { AnthropicProvider } from './provider.anthropic.js';
+import { ElevenLabsProvider } from './provider.elevenlabs.js';
 import { OllamaProvider } from './provider.ollama.js';
 import { OpenAiProvider } from './provider.openai.js';
 import { PerplexityProvider } from './provider.perplexity.js';
@@ -15,6 +16,7 @@ export interface ISmartAiOptions {
  perplexityToken?: string;
  groqToken?: string;
  xaiToken?: string;
+  elevenlabsToken?: string;
  exo?: {
    baseUrl?: string;
    apiKey?: string;
@@ -24,9 +26,13 @@ export interface ISmartAiOptions {
    model?: string;
    visionModel?: string;
  };
+  elevenlabs?: {
+    defaultVoiceId?: string;
+    defaultModelId?: string;
+  };
 }

-export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'xai';
+export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'xai' | 'elevenlabs';

 export class SmartAi {
  public options: ISmartAiOptions;
@@ -38,6 +44,7 @@ export class SmartAi {
  public exoProvider: ExoProvider;
  public groqProvider: GroqProvider;
  public xaiProvider: XAIProvider;
+  public elevenlabsProvider: ElevenLabsProvider;

  constructor(optionsArg: ISmartAiOptions) {
    this.options = optionsArg;
@@ -74,6 +81,14 @@ export class SmartAi {
      });
      await this.xaiProvider.start();
    }
+    if (this.options.elevenlabsToken) {
+      this.elevenlabsProvider = new ElevenLabsProvider({
+        elevenlabsToken: this.options.elevenlabsToken,
+        defaultVoiceId: this.options.elevenlabs?.defaultVoiceId,
+        defaultModelId: this.options.elevenlabs?.defaultModelId,
+      });
+      await this.elevenlabsProvider.start();
+    }
    if (this.options.ollama) {
      this.ollamaProvider = new OllamaProvider({
        baseUrl: this.options.ollama.baseUrl,
@@ -107,6 +122,9 @@ export class SmartAi {
    if (this.xaiProvider) {
      await this.xaiProvider.stop();
    }
+    if (this.elevenlabsProvider) {
+      await this.elevenlabsProvider.stop();
+    }
    if (this.ollamaProvider) {
      await this.ollamaProvider.stop();
    }
@@ -134,6 +152,8 @@ export class SmartAi {
        return Conversation.createWithGroq(this);
      case 'xai':
        return Conversation.createWithXai(this);
+      case 'elevenlabs':
+        return Conversation.createWithElevenlabs(this);
      default:
        throw new Error('Provider not available');
    }
--- a/ts/index.ts
+++ b/ts/index.ts
@@ -7,3 +7,4 @@ export * from './provider.groq.js';
 export * from './provider.ollama.js';
 export * from './provider.xai.js';
 export * from './provider.exo.js';
+export * from './provider.elevenlabs.js';
--- a/ts/provider.elevenlabs.ts
+++ b/ts/provider.elevenlabs.ts
@@ -0,0 +1,117 @@
+import * as plugins from './plugins.js';
+
+import { MultiModalModel } from './abstract.classes.multimodal.js';
+import type {
+  ChatOptions,
+  ChatResponse,
+  ResearchOptions,
+  ResearchResponse,
+  ImageGenerateOptions,
+  ImageEditOptions,
+  ImageResponse
+} from './abstract.classes.multimodal.js';
+
+export interface IElevenLabsProviderOptions {
+  elevenlabsToken: string;
+  defaultVoiceId?: string;
+  defaultModelId?: string;
+}
+
+export interface IElevenLabsVoiceSettings {
+  stability?: number;
+  similarity_boost?: number;
+  style?: number;
+  use_speaker_boost?: boolean;
+}
+
+export class ElevenLabsProvider extends MultiModalModel {
+  private options: IElevenLabsProviderOptions;
+  private baseUrl: string = 'https://api.elevenlabs.io/v1';
+
+  constructor(optionsArg: IElevenLabsProviderOptions) {
+    super();
+    this.options = optionsArg;
+  }
+
+  public async start() {
+    await super.start();
+  }
+
+  public async stop() {
+    await super.stop();
+  }
+
+  public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
+    throw new Error('ElevenLabs does not support chat functionality. This provider is specialized for text-to-speech only.');
+  }
+
+  public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
+    throw new Error('ElevenLabs does not support chat streaming functionality. This provider is specialized for text-to-speech only.');
+  }
+
+  public async audio(optionsArg: { 
+    message: string; 
+    voiceId?: string; 
+    modelId?: string;
+    voiceSettings?: IElevenLabsVoiceSettings;
+  }): Promise<NodeJS.ReadableStream> {
+    const voiceId = optionsArg.voiceId || this.options.defaultVoiceId;
+    
+    if (!voiceId) {
+      throw new Error('Voice ID is required for ElevenLabs TTS. Please provide voiceId in the method call or set defaultVoiceId in provider options.');
+    }
+
+    const modelId = optionsArg.modelId || this.options.defaultModelId || 'eleven_multilingual_v2';
+
+    const url = `${this.baseUrl}/text-to-speech/${voiceId}`;
+    
+    const requestBody: any = {
+      text: optionsArg.message,
+      model_id: modelId,
+    };
+
+    if (optionsArg.voiceSettings) {
+      requestBody.voice_settings = optionsArg.voiceSettings;
+    }
+
+    const response = await plugins.smartrequest.SmartRequest.create()
+      .url(url)
+      .header('xi-api-key', this.options.elevenlabsToken)
+      .json(requestBody)
+      .autoDrain(false)
+      .post();
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText} - ${errorText}`);
+    }
+
+    const nodeStream = response.streamNode();
+    return nodeStream;
+  }
+
+  public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
+    throw new Error('ElevenLabs does not support vision functionality. This provider is specialized for text-to-speech only.');
+  }
+
+  public async document(optionsArg: {
+    systemMessage: string;
+    userMessage: string;
+    pdfDocuments: Uint8Array[];
+    messageHistory: any[];
+  }): Promise<{ message: any }> {
+    throw new Error('ElevenLabs does not support document processing. This provider is specialized for text-to-speech only.');
+  }
+
+  public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
+    throw new Error('ElevenLabs does not support research capabilities. This provider is specialized for text-to-speech only.');
+  }
+
+  public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
+    throw new Error('ElevenLabs does not support image generation. This provider is specialized for text-to-speech only.');
+  }
+
+  public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
+    throw new Error('ElevenLabs does not support image editing. This provider is specialized for text-to-speech only.');
+  }
+}