0.4.2

fix(core): Fix OpenAI chat streaming and PDF document processing logic.
0.4.1
2025-02-25 18:23:28 +00:00 · 2025-02-25 18:23:28 +00:00 · 2025-02-25 13:01:23 +00:00 · 2025-02-25 13:01:23 +00:00
8 changed files with 4772 additions and 1477 deletions
--- a/changelog.md
+++ b/changelog.md
@ -1,5 +1,19 @@
 # Changelog

+## 2025-02-25 - 0.4.2 - fix(core)
+Fix OpenAI chat streaming and PDF document processing logic.
+
+- Updated OpenAI chat streaming to handle new async iterable format.
+- Improved PDF document processing by filtering out empty image buffers.
+- Removed unsupported temperature options from OpenAI requests.
+
+## 2025-02-25 - 0.4.1 - fix(provider)
+Fix provider modules for consistency
+
+- Updated TypeScript interfaces and options in provider modules for better type safety.
+- Modified transform stream handlers in Exo, Groq, and Ollama providers for consistency.
+- Added optional model options to OpenAI provider for custom model usage.
+
 ## 2025-02-08 - 0.4.0 - feat(core)
 Added support for Exo AI provider

--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "@push.rocks/smartai",
-  "version": "0.4.0",
+  "version": "0.4.2",
  "private": false,
  "description": "A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.",
  "main": "dist_ts/index.js",
@ -14,24 +14,24 @@
    "buildDocs": "(tsdoc)"
  },
  "devDependencies": {
-    "@git.zone/tsbuild": "^2.1.84",
-    "@git.zone/tsbundle": "^2.0.5",
-    "@git.zone/tsrun": "^1.2.49",
-    "@git.zone/tstest": "^1.0.90",
-    "@push.rocks/qenv": "^6.0.5",
-    "@push.rocks/tapbundle": "^5.3.0",
-    "@types/node": "^22.5.5"
+    "@git.zone/tsbuild": "^2.2.1",
+    "@git.zone/tsbundle": "^2.2.5",
+    "@git.zone/tsrun": "^1.3.3",
+    "@git.zone/tstest": "^1.0.96",
+    "@push.rocks/qenv": "^6.1.0",
+    "@push.rocks/tapbundle": "^5.5.6",
+    "@types/node": "^22.13.5"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.27.3",
-    "@push.rocks/smartarray": "^1.0.8",
-    "@push.rocks/smartfile": "^11.0.21",
+    "@anthropic-ai/sdk": "^0.37.0",
+    "@push.rocks/smartarray": "^1.1.0",
+    "@push.rocks/smartfile": "^11.2.0",
    "@push.rocks/smartpath": "^5.0.18",
-    "@push.rocks/smartpdf": "^3.1.6",
-    "@push.rocks/smartpromise": "^4.0.4",
-    "@push.rocks/smartrequest": "^2.0.22",
+    "@push.rocks/smartpdf": "^3.2.2",
+    "@push.rocks/smartpromise": "^4.2.3",
+    "@push.rocks/smartrequest": "^2.0.23",
    "@push.rocks/webstream": "^1.0.10",
-    "openai": "^4.62.1"
+    "openai": "^4.85.4"
  },
  "repository": {
    "type": "git",
@ -66,5 +66,10 @@
    "audio responses",
    "text-to-speech",
    "streaming chat"
-  ]
+  ],
+  "pnpm": {
+    "onlyBuiltDependencies": [
+      "puppeteer"
+    ]
+  }
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/ts/00_commitinfo_data.ts
+++ b/ts/00_commitinfo_data.ts
@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: '@push.rocks/smartai',
-  version: '0.4.0',
+  version: '0.4.2',
  description: 'A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.'
 }
--- a/ts/provider.exo.ts
+++ b/ts/provider.exo.ts
@ -38,7 +38,7 @@ export class ExoProvider extends MultiModalModel {

    // Create a TransformStream to process the input
    const transform = new TransformStream<Uint8Array, string>({
-      async transform(chunk, controller) {
+      transform: async (chunk, controller) => {
        buffer += decoder.decode(chunk, { stream: true });

        // Try to parse complete JSON messages from the buffer
--- a/ts/provider.groq.ts
+++ b/ts/provider.groq.ts
@ -32,7 +32,7 @@ export class GroqProvider extends MultiModalModel {

    // Create a TransformStream to process the input
    const transform = new TransformStream<Uint8Array, string>({
-      async transform(chunk, controller) {
+      transform: async (chunk, controller) => {
        buffer += decoder.decode(chunk, { stream: true });

        // Try to parse complete JSON messages from the buffer
--- a/ts/provider.ollama.ts
+++ b/ts/provider.ollama.ts
@ -45,7 +45,7 @@ export class OllamaProvider extends MultiModalModel {

    // Create a TransformStream to process the input
    const transform = new TransformStream<Uint8Array, string>({
-      async transform(chunk, controller) {
+      transform: async (chunk, controller) => {
        buffer += decoder.decode(chunk, { stream: true });

        // Try to parse complete JSON messages from the buffer
--- a/ts/provider.openai.ts
+++ b/ts/provider.openai.ts
@ -1,10 +1,20 @@
 import * as plugins from './plugins.js';
 import * as paths from './paths.js';

+// Custom type definition for chat completion messages
+export type TChatCompletionRequestMessage = { 
+  role: "system" | "user" | "assistant"; 
+  content: string; 
+};
+
 import { MultiModalModel } from './abstract.classes.multimodal.js';

 export interface IOpenaiProviderOptions {
  openaiToken: string;
+  chatModel?: string;
+  audioModel?: string;
+  visionModel?: string;
+  // Optionally add more model options (e.g., documentModel) if needed.
 }

 export class OpenAiProvider extends MultiModalModel {
@ -31,11 +41,14 @@ export class OpenAiProvider extends MultiModalModel {
    // Create a TextDecoder to handle incoming chunks
    const decoder = new TextDecoder();
    let buffer = '';
-    let currentMessage: { role: string; content: string; } | null = null;
+    let currentMessage: { 
+      role: "function" | "user" | "system" | "assistant" | "tool" | "developer"; 
+      content: string; 
+    } | null = null;

    // Create a TransformStream to process the input
    const transform = new TransformStream<Uint8Array, string>({
-      async transform(chunk, controller) {
+      transform: async (chunk, controller) => {
        buffer += decoder.decode(chunk, { stream: true });

        // Try to parse complete JSON messages from the buffer
@ -50,7 +63,7 @@ export class OpenAiProvider extends MultiModalModel {
            try {
              const message = JSON.parse(line);
              currentMessage = {
-                role: message.role || 'user',
+                role: (message.role || 'user') as "function" | "user" | "system" | "assistant" | "tool" | "developer",
                content: message.content || '',
              };
            } catch (e) {
@ -61,20 +74,24 @@ export class OpenAiProvider extends MultiModalModel {

        // If we have a complete message, send it to OpenAI
        if (currentMessage) {
-          const stream = await this.openAiApiClient.chat.completions.create({
-            model: 'gpt-4',
-            messages: [{ role: currentMessage.role, content: currentMessage.content }],
+          const messageToSend = { role: "user" as const, content: currentMessage.content };
+          const chatModel = this.options.chatModel ?? 'o3-mini';
+          const requestParams: any = {
+            model: chatModel,
+            messages: [messageToSend],
            stream: true,
-          });
-
+          };
+          // Temperature is omitted since the model does not support it.
+          const stream = await this.openAiApiClient.chat.completions.create(requestParams);
+          // Explicitly cast the stream as an async iterable to satisfy TypeScript.
+          const streamAsyncIterable = stream as unknown as AsyncIterableIterator<any>;
          // Process each chunk from OpenAI
-          for await (const chunk of stream) {
+          for await (const chunk of streamAsyncIterable) {
            const content = chunk.choices[0]?.delta?.content;
            if (content) {
              controller.enqueue(content);
            }
          }
-
          currentMessage = null;
        }
      },
@ -104,15 +121,17 @@ export class OpenAiProvider extends MultiModalModel {
      content: string;
    }[];
  }) {
-    const result = await this.openAiApiClient.chat.completions.create({
-      model: 'gpt-4o',
-
+    const chatModel = this.options.chatModel ?? 'o3-mini';
+    const requestParams: any = {
+      model: chatModel,
      messages: [
        { role: 'system', content: optionsArg.systemMessage },
        ...optionsArg.messageHistory,
        { role: 'user', content: optionsArg.userMessage },
      ],
-    });
+    };
+    // Temperature parameter removed to avoid unsupported error.
+    const result = await this.openAiApiClient.chat.completions.create(requestParams);
    return {
      role: result.choices[0].message.role as 'assistant',
      message: result.choices[0].message.content,
@ -122,7 +141,7 @@ export class OpenAiProvider extends MultiModalModel {
  public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
    const done = plugins.smartpromise.defer<NodeJS.ReadableStream>();
    const result = await this.openAiApiClient.audio.speech.create({
-      model: 'tts-1-hd',
+      model: this.options.audioModel ?? 'o3-mini',
      input: optionsArg.message,
      voice: 'nova',
      response_format: 'mp3',
@ -144,27 +163,30 @@ export class OpenAiProvider extends MultiModalModel {
  }) {
    let pdfDocumentImageBytesArray: Uint8Array[] = [];

+    // Convert each PDF into one or more image byte arrays.
+    const smartpdfInstance = new plugins.smartpdf.SmartPdf();
+    await smartpdfInstance.start();
    for (const pdfDocument of optionsArg.pdfDocuments) {
-      const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
+      const documentImageArray = await smartpdfInstance.convertPDFToPngBytes(pdfDocument);
      pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
    }
+    await smartpdfInstance.stop();

    console.log(`image smartfile array`);
    console.log(pdfDocumentImageBytesArray.map((smartfile) => smartfile.length));

-    const smartfileArray = await plugins.smartarray.map(
-      pdfDocumentImageBytesArray,
-      async (pdfDocumentImageBytes) => {
-        return plugins.smartfile.SmartFile.fromBuffer(
-          'pdfDocumentImage.jpg',
-          Buffer.from(pdfDocumentImageBytes)
-        );
-      }
-    );
+    // Filter out any empty buffers to avoid sending invalid image URLs.
+    const validImageBytesArray = pdfDocumentImageBytesArray.filter(imageBytes => imageBytes && imageBytes.length > 0);
+    const imageAttachments = validImageBytesArray.map(imageBytes => ({
+      type: 'image_url',
+      image_url: {
+        url: 'data:image/png;base64,' + Buffer.from(imageBytes).toString('base64'),
+      },
+    }));

-    const result = await this.openAiApiClient.chat.completions.create({
-      model: 'gpt-4o',
-      // response_format: { type: "json_object" }, // not supported for now
+    const chatModel = this.options.chatModel ?? 'gpt-4o';
+    const requestParams: any = {
+      model: chatModel,
      messages: [
        { role: 'system', content: optionsArg.systemMessage },
        ...optionsArg.messageHistory,
@ -172,30 +194,22 @@ export class OpenAiProvider extends MultiModalModel {
          role: 'user',
          content: [
            { type: 'text', text: optionsArg.userMessage },
-            ...(() => {
-              const returnArray = [];
-              for (const imageBytes of pdfDocumentImageBytesArray) {
-                returnArray.push({
-                  type: 'image_url',
-                  image_url: {
-                    url: 'data:image/png;base64,' + Buffer.from(imageBytes).toString('base64'),
-                  },
-                });
-              }
-              return returnArray;
-            })(),
+            ...imageAttachments,
          ],
        },
      ],
-    });
+    };
+    // Temperature parameter removed.
+    const result = await this.openAiApiClient.chat.completions.create(requestParams);
    return {
      message: result.choices[0].message,
    };
  }

  public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
-    const result = await this.openAiApiClient.chat.completions.create({
-      model: 'gpt-4-vision-preview',
+    const visionModel = this.options.visionModel ?? 'gpt-4o';
+    const requestParams: any = {
+      model: visionModel,
      messages: [
        {
          role: 'user',
@ -211,8 +225,8 @@ export class OpenAiProvider extends MultiModalModel {
        }
      ],
      max_tokens: 300
-    });
-
+    };
+    const result = await this.openAiApiClient.chat.completions.create(requestParams);
    return result.choices[0].message.content || '';
  }
-}
+}
Author	SHA1	Message	Date
Philipp Kunz	0a80ac0a8a	0.4.2	2025-02-25 18:23:28 +00:00
Philipp Kunz	6ce442354e	fix(core): Fix OpenAI chat streaming and PDF document processing logic.	2025-02-25 18:23:28 +00:00
Philipp Kunz	9b38a3c06e	0.4.1	2025-02-25 13:01:23 +00:00
Philipp Kunz	5dead05324	fix(provider): Fix provider modules for consistency	2025-02-25 13:01:23 +00:00