smartai/ts/provider.openai.ts

import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { Readable } from 'stream';

// Custom type definition for chat completion messages
export type TChatCompletionRequestMessage = {
  role: "system" | "user" | "assistant";
  content: string;
};

import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
  ResearchOptions,
  ResearchResponse,
  ImageGenerateOptions,
  ImageEditOptions,
  ImageResponse
} from './abstract.classes.multimodal.js';

export interface IOpenaiProviderOptions {
  openaiToken: string;
  chatModel?: string;
  audioModel?: string;
  visionModel?: string;
  researchModel?: string;
  imageModel?: string;
  enableWebSearch?: boolean;
}

export class OpenAiProvider extends MultiModalModel {
  private options: IOpenaiProviderOptions;
  public openAiApiClient: plugins.openai.default;

  constructor(optionsArg: IOpenaiProviderOptions) {
    super();
    this.options = optionsArg;
  }

  public async start() {
    await super.start();
    this.openAiApiClient = new plugins.openai.default({
      apiKey: this.options.openaiToken,
      dangerouslyAllowBrowser: true,
    });
  }

  public async stop() {
    await super.stop();
  }

  public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
    // Create a TextDecoder to handle incoming chunks
    const decoder = new TextDecoder();
    let buffer = '';
    let currentMessage: {
      role: "function" | "user" | "system" | "assistant" | "tool" | "developer";
      content: string;
    } | null = null;

    // Create a TransformStream to process the input
    const transform = new TransformStream<Uint8Array, string>({
      transform: async (chunk, controller) => {
        buffer += decoder.decode(chunk, { stream: true });

        // Try to parse complete JSON messages from the buffer
        while (true) {
          const newlineIndex = buffer.indexOf('\n');
          if (newlineIndex === -1) break;

          const line = buffer.slice(0, newlineIndex);
          buffer = buffer.slice(newlineIndex + 1);

          if (line.trim()) {
            try {
              const message = JSON.parse(line);
              currentMessage = {
                role: (message.role || 'user') as "function" | "user" | "system" | "assistant" | "tool" | "developer",
                content: message.content || '',
              };
            } catch (e) {
              console.error('Failed to parse message:', e);
            }
          }
        }

        // If we have a complete message, send it to OpenAI
        if (currentMessage) {
          const messageToSend = { role: "user" as const, content: currentMessage.content };
          const chatModel = this.options.chatModel ?? 'gpt-5-mini';
          const requestParams: any = {
            model: chatModel,
            messages: [messageToSend],
            stream: true,
          };
          // Temperature is omitted since the model does not support it.
          const stream = await this.openAiApiClient.chat.completions.create(requestParams);
          // Explicitly cast the stream as an async iterable to satisfy TypeScript.
          const streamAsyncIterable = stream as unknown as AsyncIterableIterator<any>;
          // Process each chunk from OpenAI
          for await (const chunk of streamAsyncIterable) {
            const content = chunk.choices[0]?.delta?.content;
            if (content) {
              controller.enqueue(content);
            }
          }
          currentMessage = null;
        }
      },

      flush(controller) {
        if (buffer) {
          try {
            const message = JSON.parse(buffer);
            controller.enqueue(message.content || '');
          } catch (e) {
            console.error('Failed to parse remaining buffer:', e);
          }
        }
      }
    });

    // Connect the input to our transform stream
    return input.pipeThrough(transform);
  }

  // Implementing the synchronous chat interaction
  public async chat(optionsArg: {
    systemMessage: string;
    userMessage: string;
    messageHistory: {
      role: 'assistant' | 'user';
      content: string;
    }[];
  }) {
    const chatModel = this.options.chatModel ?? 'gpt-5-mini';
    const requestParams: any = {
      model: chatModel,
      messages: [
        { role: 'system', content: optionsArg.systemMessage },
        ...optionsArg.messageHistory,
        { role: 'user', content: optionsArg.userMessage },
      ],
    };
    // Temperature parameter removed to avoid unsupported error.
    const result = await this.openAiApiClient.chat.completions.create(requestParams);
    return {
      role: result.choices[0].message.role as 'assistant',
      message: result.choices[0].message.content,
    };
  }

  public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
    const done = plugins.smartpromise.defer<NodeJS.ReadableStream>();
    const result = await this.openAiApiClient.audio.speech.create({
      model: this.options.audioModel ?? 'tts-1-hd',
      input: optionsArg.message,
      voice: 'nova',
      response_format: 'mp3',
      speed: 1,
    });
    const stream = result.body;
    const nodeStream = Readable.fromWeb(stream as any);
    done.resolve(nodeStream);
    return done.promise;
  }

  public async document(optionsArg: {
    systemMessage: string;
    userMessage: string;
    pdfDocuments: Uint8Array[];
    messageHistory: {
      role: 'assistant' | 'user';
      content: any;
    }[];
  }) {
    let pdfDocumentImageBytesArray: Uint8Array[] = [];

    // Convert each PDF into one or more image byte arrays.
    for (const pdfDocument of optionsArg.pdfDocuments) {
      const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
      pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
    }

    console.log(`image smartfile array`);
    console.log(pdfDocumentImageBytesArray.map((smartfile) => smartfile.length));

    // Filter out any empty buffers to avoid sending invalid image URLs.
    const validImageBytesArray = pdfDocumentImageBytesArray.filter(imageBytes => imageBytes && imageBytes.length > 0);
    const imageAttachments = validImageBytesArray.map(imageBytes => ({
      type: 'image_url',
      image_url: {
        url: 'data:image/png;base64,' + Buffer.from(imageBytes).toString('base64'),
      },
    }));

    const chatModel = this.options.chatModel ?? 'gpt-5-mini';
    const requestParams: any = {
      model: chatModel,
      messages: [
        { role: 'system', content: optionsArg.systemMessage },
        ...optionsArg.messageHistory,
        {
          role: 'user',
          content: [
            { type: 'text', text: optionsArg.userMessage },
            ...imageAttachments,
          ],
        },
      ],
    };
    // Temperature parameter removed.
    const result = await this.openAiApiClient.chat.completions.create(requestParams);
    return {
      message: result.choices[0].message,
    };
  }

  public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
    const visionModel = this.options.visionModel ?? '04-mini';
    const requestParams: any = {
      model: visionModel,
      messages: [
        {
          role: 'user',
          content: [
            { type: 'text', text: optionsArg.prompt },
            {
              type: 'image_url',
              image_url: {
                url: `data:image/jpeg;base64,${optionsArg.image.toString('base64')}`
              }
            }
          ]
        }
      ],
      max_tokens: 300
    };
    const result = await this.openAiApiClient.chat.completions.create(requestParams);
    return result.choices[0].message.content || '';
  }

  public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
    // Determine which model to use - Deep Research API requires specific models
    let model: string;
    if (optionsArg.searchDepth === 'deep') {
      model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
    } else {
      // For basic/advanced, still use deep research models if web search is needed
      if (optionsArg.includeWebSearch) {
        model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
      } else {
        model = this.options.chatModel || 'gpt-5-mini';
      }
    }

    const systemMessage = 'You are a research assistant. Provide comprehensive answers with citations and sources when available.';

    // Prepare request parameters using Deep Research API format
    const requestParams: any = {
      model,
      instructions: systemMessage,
      input: optionsArg.query
    };

    // Add web search tool if requested
    if (optionsArg.includeWebSearch || optionsArg.searchDepth === 'deep') {
      requestParams.tools = [
        {
          type: 'web_search_preview',
          search_context_size: optionsArg.searchDepth === 'deep' ? 'high' :
                              optionsArg.searchDepth === 'advanced' ? 'medium' : 'low'
        }
      ];
    }

    // Add background flag for deep research
    if (optionsArg.background && optionsArg.searchDepth === 'deep') {
      requestParams.background = true;
    }

    try {
      // Execute the research request using Deep Research API
      const result = await this.openAiApiClient.responses.create(requestParams);

      // Extract the answer from output items
      let answer = '';
      const sources: Array<{ url: string; title: string; snippet: string }> = [];
      const searchQueries: string[] = [];

      // Process output items
      for (const item of result.output || []) {
        // Extract message content
        if (item.type === 'message' && 'content' in item) {
          const messageItem = item as any;
          for (const contentItem of messageItem.content || []) {
            if (contentItem.type === 'output_text' && 'text' in contentItem) {
              answer += contentItem.text;
            }
          }
        }

        // Extract web search queries
        if (item.type === 'web_search_call' && 'action' in item) {
          const searchItem = item as any;
          if (searchItem.action && searchItem.action.type === 'search' && 'query' in searchItem.action) {
            searchQueries.push(searchItem.action.query);
          }
        }
      }

      // Parse sources from markdown links in the answer
      const urlRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
      let match: RegExpExecArray | null;

      while ((match = urlRegex.exec(answer)) !== null) {
        sources.push({
          title: match[1],
          url: match[2],
          snippet: ''
        });
      }

      return {
        answer,
        sources,
        searchQueries: searchQueries.length > 0 ? searchQueries : undefined,
        metadata: {
          model,
          searchDepth: optionsArg.searchDepth || 'basic',
          tokensUsed: result.usage?.total_tokens
        }
      };
    } catch (error) {
      console.error('Research API error:', error);
      throw new Error(`Failed to perform research: ${error.message}`);
    }
  }

  /**
   * Image generation using OpenAI's gpt-image-1 or DALL-E models
   */
  public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
    const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';

    try {
      const requestParams: any = {
        model,
        prompt: optionsArg.prompt,
        n: optionsArg.n || 1,
      };

      // Add gpt-image-1 specific parameters
      if (model === 'gpt-image-1') {
        if (optionsArg.quality) requestParams.quality = optionsArg.quality;
        if (optionsArg.size) requestParams.size = optionsArg.size;
        if (optionsArg.background) requestParams.background = optionsArg.background;
        if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
        if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
        if (optionsArg.moderation) requestParams.moderation = optionsArg.moderation;
        if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
        if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
      } else if (model === 'dall-e-3') {
        // DALL-E 3 specific parameters
        if (optionsArg.quality) requestParams.quality = optionsArg.quality;
        if (optionsArg.size) requestParams.size = optionsArg.size;
        if (optionsArg.style) requestParams.style = optionsArg.style;
        requestParams.response_format = 'b64_json'; // Always use base64 for consistency
      } else if (model === 'dall-e-2') {
        // DALL-E 2 specific parameters
        if (optionsArg.size) requestParams.size = optionsArg.size;
        requestParams.response_format = 'b64_json';
      }

      const result = await this.openAiApiClient.images.generate(requestParams);

      const images = (result.data || []).map(img => ({
        b64_json: img.b64_json,
        url: img.url,
        revisedPrompt: img.revised_prompt
      }));

      return {
        images,
        metadata: {
          model,
          quality: result.quality,
          size: result.size,
          outputFormat: result.output_format,
          tokensUsed: result.usage?.total_tokens
        }
      };
    } catch (error) {
      console.error('Image generation error:', error);
      throw new Error(`Failed to generate image: ${error.message}`);
    }
  }

  /**
   * Image editing using OpenAI's gpt-image-1 or DALL-E 2 models
   */
  public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
    const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';

    try {
      const requestParams: any = {
        model,
        image: optionsArg.image,
        prompt: optionsArg.prompt,
        n: optionsArg.n || 1,
      };

      // Add mask if provided
      if (optionsArg.mask) {
        requestParams.mask = optionsArg.mask;
      }

      // Add gpt-image-1 specific parameters
      if (model === 'gpt-image-1') {
        if (optionsArg.quality) requestParams.quality = optionsArg.quality;
        if (optionsArg.size) requestParams.size = optionsArg.size;
        if (optionsArg.background) requestParams.background = optionsArg.background;
        if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
        if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
        if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
        if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
      } else if (model === 'dall-e-2') {
        // DALL-E 2 specific parameters
        if (optionsArg.size) requestParams.size = optionsArg.size;
        requestParams.response_format = 'b64_json';
      }

      const result = await this.openAiApiClient.images.edit(requestParams);

      const images = (result.data || []).map(img => ({
        b64_json: img.b64_json,
        url: img.url,
        revisedPrompt: img.revised_prompt
      }));

      return {
        images,
        metadata: {
          model,
          quality: result.quality,
          size: result.size,
          outputFormat: result.output_format,
          tokensUsed: result.usage?.total_tokens
        }
      };
    } catch (error) {
      console.error('Image edit error:', error);
      throw new Error(`Failed to edit image: ${error.message}`);
    }
  }
}