feat(integration-xai): Add support for X.AI provider with chat and document processing capabilities.

2025-02-05 14:09:06 +01:00 · 2025-02-05 14:09:06 +01:00 · 4fc1e029e4
commit 4fc1e029e4
parent d0a4151a2b
5 changed files with 219 additions and 1 deletions
--- a/changelog.md
+++ b/changelog.md
@ -1,5 +1,12 @@
 # Changelog

+## 2025-02-05 - 0.3.0 - feat(integration-xai)
+Add support for X.AI provider with chat and document processing capabilities.
+
+- Introduced XAIProvider class for integrating X.AI features.
+- Implemented chat streaming and synchronous chat for X.AI.
+- Enabled document processing capabilities with PDF conversion in X.AI.
+
 ## 2025-02-03 - 0.2.0 - feat(provider.anthropic)
 Add support for vision and document processing in Anthropic provider

--- a/19
+++ b/19
@ -0,0 +1,19 @@
+Copyright (c) 2024 Task Venture Capital GmbH (hello@task.vc)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/readme.md
+++ b/readme.md
@ -24,6 +24,14 @@ This command installs the package and adds it to your project's dependencies.
  openaiToken: 'your-openai-token'
  ```

+### X.AI
+- Models: Grok-2-latest
+- Features: Chat, Streaming, Document Processing
+- Configuration:
+  ```typescript
+  xaiToken: 'your-xai-token'
+  ```
+
 ### Anthropic
 - Models: Claude-3-opus-20240229
 - Features: Chat, Streaming, Vision, Document Processing
@ -75,6 +83,7 @@ import { SmartAi } from '@push.rocks/smartai';

 const smartAi = new SmartAi({
  openaiToken: 'your-openai-token',
+  xaiToken: 'your-xai-token',
  anthropicToken: 'your-anthropic-token',
  perplexityToken: 'your-perplexity-token',
  groqToken: 'your-groq-token',
--- a/ts/00_commitinfo_data.ts
+++ b/ts/00_commitinfo_data.ts
@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: '@push.rocks/smartai',
-  version: '0.2.0',
+  version: '0.3.0',
  description: 'A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.'
 }
--- a/ts/provider.xai.ts
+++ b/ts/provider.xai.ts
@ -0,0 +1,183 @@
+import * as plugins from './plugins.js';
+import * as paths from './paths.js';
+import { MultiModalModel } from './abstract.classes.multimodal.js';
+import type { ChatOptions, ChatResponse, ChatMessage } from './abstract.classes.multimodal.js';
+import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
+
+export interface IXAIProviderOptions {
+  xaiToken: string;
+}
+
+export class XAIProvider extends MultiModalModel {
+  private options: IXAIProviderOptions;
+  public openAiApiClient: plugins.openai.default;
+  public smartpdfInstance: plugins.smartpdf.SmartPdf;
+
+  constructor(optionsArg: IXAIProviderOptions) {
+    super();
+    this.options = optionsArg;
+  }
+
+  public async start() {
+    this.openAiApiClient = new plugins.openai.default({
+      apiKey: this.options.xaiToken,
+      baseURL: 'https://api.x.ai/v1',
+    });
+    this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
+  }
+
+  public async stop() {}
+
+  public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
+    // Create a TextDecoder to handle incoming chunks
+    const decoder = new TextDecoder();
+    let buffer = '';
+    let currentMessage: { role: string; content: string; } | null = null;
+
+    // Create a TransformStream to process the input
+    const transform = new TransformStream<Uint8Array, string>({
+      async transform(chunk, controller) {
+        buffer += decoder.decode(chunk, { stream: true });
+
+        // Try to parse complete JSON messages from the buffer
+        while (true) {
+          const newlineIndex = buffer.indexOf('\n');
+          if (newlineIndex === -1) break;
+
+          const line = buffer.slice(0, newlineIndex);
+          buffer = buffer.slice(newlineIndex + 1);
+
+          if (line.trim()) {
+            try {
+              const message = JSON.parse(line);
+              currentMessage = {
+                role: message.role || 'user',
+                content: message.content || '',
+              };
+            } catch (e) {
+              console.error('Failed to parse message:', e);
+            }
+          }
+        }
+
+        // If we have a complete message, send it to X.AI
+        if (currentMessage) {
+          const stream = await this.openAiApiClient.chat.completions.create({
+            model: 'grok-2-latest',
+            messages: [{ role: currentMessage.role, content: currentMessage.content }],
+            stream: true,
+          });
+
+          // Process each chunk from X.AI
+          for await (const chunk of stream) {
+            const content = chunk.choices[0]?.delta?.content;
+            if (content) {
+              controller.enqueue(content);
+            }
+          }
+
+          currentMessage = null;
+        }
+      },
+
+      flush(controller) {
+        if (buffer) {
+          try {
+            const message = JSON.parse(buffer);
+            controller.enqueue(message.content || '');
+          } catch (e) {
+            console.error('Failed to parse remaining buffer:', e);
+          }
+        }
+      }
+    });
+
+    // Connect the input to our transform stream
+    return input.pipeThrough(transform);
+  }
+
+  public async chat(optionsArg: {
+    systemMessage: string;
+    userMessage: string;
+    messageHistory: { role: string; content: string; }[];
+  }): Promise<{ role: 'assistant'; message: string; }> {
+    // Prepare messages array with system message, history, and user message
+    const messages: ChatCompletionMessageParam[] = [
+      { role: 'system', content: optionsArg.systemMessage },
+      ...optionsArg.messageHistory.map(msg => ({
+        role: msg.role as 'system' | 'user' | 'assistant',
+        content: msg.content
+      })),
+      { role: 'user', content: optionsArg.userMessage }
+    ];
+
+    // Call X.AI's chat completion API
+    const completion = await this.openAiApiClient.chat.completions.create({
+      model: 'grok-2-latest',
+      messages: messages,
+      stream: false,
+    });
+
+    // Return the assistant's response
+    return {
+      role: 'assistant',
+      message: completion.choices[0]?.message?.content || ''
+    };
+  }
+
+  public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
+    throw new Error('Audio generation is not supported by X.AI');
+  }
+
+  public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
+    throw new Error('Vision tasks are not supported by X.AI');
+  }
+
+  public async document(optionsArg: {
+    systemMessage: string;
+    userMessage: string;
+    pdfDocuments: Uint8Array[];
+    messageHistory: { role: string; content: string; }[];
+  }): Promise<{ message: any }> {
+    // First convert PDF documents to images
+    let pdfDocumentImageBytesArray: Uint8Array[] = [];
+    
+    for (const pdfDocument of optionsArg.pdfDocuments) {
+      const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
+      pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
+    }
+
+    // Convert images to base64 for inclusion in the message
+    const imageBase64Array = pdfDocumentImageBytesArray.map(bytes => 
+      Buffer.from(bytes).toString('base64')
+    );
+
+    // Combine document images into the user message
+    const enhancedUserMessage = `
+      ${optionsArg.userMessage}
+      
+      Document contents (as images):
+      ${imageBase64Array.map((img, i) => `Image ${i + 1}: <image data>`).join('\n')}
+    `;
+
+    // Use chat completion to analyze the documents
+    const messages: ChatCompletionMessageParam[] = [
+      { role: 'system', content: optionsArg.systemMessage },
+      ...optionsArg.messageHistory.map(msg => ({
+        role: msg.role as 'system' | 'user' | 'assistant',
+        content: msg.content
+      })),
+      { role: 'user', content: enhancedUserMessage }
+    ];
+
+    const completion = await this.openAiApiClient.chat.completions.create({
+      model: 'grok-2-latest',
+      messages: messages,
+      stream: false,
+    });
+
+    return {
+      message: completion.choices[0]?.message?.content || ''
+    };
+  }
+}