smartai/ts/provider.xai.ts

import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type { ChatOptions, ChatResponse, ChatMessage } from './abstract.classes.multimodal.js';
import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';

export interface IXAIProviderOptions {
  xaiToken: string;
}

export class XAIProvider extends MultiModalModel {
  private options: IXAIProviderOptions;
  public openAiApiClient: plugins.openai.default;
  public smartpdfInstance: plugins.smartpdf.SmartPdf;

  constructor(optionsArg: IXAIProviderOptions) {
    super();
    this.options = optionsArg;
  }

  public async start() {
    this.openAiApiClient = new plugins.openai.default({
      apiKey: this.options.xaiToken,
      baseURL: 'https://api.x.ai/v1',
    });
    this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
  }

  public async stop() {}

  public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
    // Create a TextDecoder to handle incoming chunks
    const decoder = new TextDecoder();
    let buffer = '';
    let currentMessage: { role: string; content: string; } | null = null;

    // Create a TransformStream to process the input
    const transform = new TransformStream<Uint8Array, string>({
      async transform(chunk, controller) {
        buffer += decoder.decode(chunk, { stream: true });

        // Try to parse complete JSON messages from the buffer
        while (true) {
          const newlineIndex = buffer.indexOf('\n');
          if (newlineIndex === -1) break;

          const line = buffer.slice(0, newlineIndex);
          buffer = buffer.slice(newlineIndex + 1);

          if (line.trim()) {
            try {
              const message = JSON.parse(line);
              currentMessage = {
                role: message.role || 'user',
                content: message.content || '',
              };
            } catch (e) {
              console.error('Failed to parse message:', e);
            }
          }
        }

        // If we have a complete message, send it to X.AI
        if (currentMessage) {
          const stream = await this.openAiApiClient.chat.completions.create({
            model: 'grok-2-latest',
            messages: [{ role: currentMessage.role, content: currentMessage.content }],
            stream: true,
          });

          // Process each chunk from X.AI
          for await (const chunk of stream) {
            const content = chunk.choices[0]?.delta?.content;
            if (content) {
              controller.enqueue(content);
            }
          }

          currentMessage = null;
        }
      },

      flush(controller) {
        if (buffer) {
          try {
            const message = JSON.parse(buffer);
            controller.enqueue(message.content || '');
          } catch (e) {
            console.error('Failed to parse remaining buffer:', e);
          }
        }
      }
    });

    // Connect the input to our transform stream
    return input.pipeThrough(transform);
  }

  public async chat(optionsArg: {
    systemMessage: string;
    userMessage: string;
    messageHistory: { role: string; content: string; }[];
  }): Promise<{ role: 'assistant'; message: string; }> {
    // Prepare messages array with system message, history, and user message
    const messages: ChatCompletionMessageParam[] = [
      { role: 'system', content: optionsArg.systemMessage },
      ...optionsArg.messageHistory.map(msg => ({
        role: msg.role as 'system' | 'user' | 'assistant',
        content: msg.content
      })),
      { role: 'user', content: optionsArg.userMessage }
    ];

    // Call X.AI's chat completion API
    const completion = await this.openAiApiClient.chat.completions.create({
      model: 'grok-2-latest',
      messages: messages,
      stream: false,
    });

    // Return the assistant's response
    return {
      role: 'assistant',
      message: completion.choices[0]?.message?.content || ''
    };
  }

  public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
    throw new Error('Audio generation is not supported by X.AI');
  }

  public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
    throw new Error('Vision tasks are not supported by X.AI');
  }

  public async document(optionsArg: {
    systemMessage: string;
    userMessage: string;
    pdfDocuments: Uint8Array[];
    messageHistory: { role: string; content: string; }[];
  }): Promise<{ message: any }> {
    // First convert PDF documents to images
    let pdfDocumentImageBytesArray: Uint8Array[] = [];

    for (const pdfDocument of optionsArg.pdfDocuments) {
      const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
      pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
    }

    // Convert images to base64 for inclusion in the message
    const imageBase64Array = pdfDocumentImageBytesArray.map(bytes =>
      Buffer.from(bytes).toString('base64')
    );

    // Combine document images into the user message
    const enhancedUserMessage = `
      ${optionsArg.userMessage}

      Document contents (as images):
      ${imageBase64Array.map((img, i) => `Image ${i + 1}: <image data>`).join('\n')}
    `;

    // Use chat completion to analyze the documents
    const messages: ChatCompletionMessageParam[] = [
      { role: 'system', content: optionsArg.systemMessage },
      ...optionsArg.messageHistory.map(msg => ({
        role: msg.role as 'system' | 'user' | 'assistant',
        content: msg.content
      })),
      { role: 'user', content: enhancedUserMessage }
    ];

    const completion = await this.openAiApiClient.chat.completions.create({
      model: 'grok-2-latest',
      messages: messages,
      stream: false,
    });

    return {
      message: completion.choices[0]?.message?.content || ''
    };
  }
}