0.5.4

fix(provider.openai): Update dependency versions, clean test imports, and adjust default OpenAI model configurations
0.5.3
2025-05-13 18:39:58 +00:00 · 2025-05-13 18:39:57 +00:00 · 2025-04-03 21:46:40 +00:00 · 2025-04-03 21:46:40 +00:00 · 2025-04-03 21:46:15 +00:00 · 2025-04-03 21:46:14 +00:00
8 changed files with 1914 additions and 759 deletions
--- a/changelog.md
+++ b/changelog.md
@ -1,5 +1,44 @@
 # Changelog

+## 2025-05-13 - 0.5.4 - fix(provider.openai)
+Update dependency versions, clean test imports, and adjust default OpenAI model configurations
+
+- Bump dependency versions in package.json (@git.zone/tsbuild, @push.rocks/tapbundle, openai, etc.)
+- Change default chatModel from 'gpt-4o' to 'o4-mini' and visionModel from 'gpt-4o' to '04-mini' in provider.openai.ts
+- Remove unused 'expectAsync' import from test file
+
+## 2025-04-03 - 0.5.3 - fix(package.json)
+Add explicit packageManager field to package.json
+
+- Include the packageManager property to specify the pnpm version and checksum.
+- Align package metadata with current standards.
+
+## 2025-04-03 - 0.5.2 - fix(readme)
+Remove redundant conclusion section from README to streamline documentation.
+
+- Eliminated the conclusion block describing SmartAi's capabilities and documentation pointers.
+
+## 2025-02-25 - 0.5.1 - fix(OpenAiProvider)
+Corrected audio model ID in OpenAiProvider
+
+- Fixed audio model identifier from 'o3-mini' to 'tts-1-hd' in the OpenAiProvider's audio method.
+- Addressed minor code formatting issues in test suite for better readability.
+- Corrected spelling errors in test documentation and comments.
+
+## 2025-02-25 - 0.5.0 - feat(documentation and configuration)
+Enhanced package and README documentation
+
+- Expanded the package description to better reflect the library's capabilities.
+- Improved README with detailed usage examples for initialization, chat interactions, streaming chat, audio generation, document analysis, and vision processing.
+- Provided error handling strategies and advanced streaming customization examples.
+
+## 2025-02-25 - 0.4.2 - fix(core)
+Fix OpenAI chat streaming and PDF document processing logic.
+
+- Updated OpenAI chat streaming to handle new async iterable format.
+- Improved PDF document processing by filtering out empty image buffers.
+- Removed unsupported temperature options from OpenAI requests.
+
 ## 2025-02-25 - 0.4.1 - fix(provider)
 Fix provider modules for consistency

--- a/npmextra.json
+++ b/npmextra.json
@ -5,20 +5,33 @@
      "githost": "code.foss.global",
      "gitscope": "push.rocks",
      "gitrepo": "smartai",
-      "description": "A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.",
+      "description": "SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.",
      "npmPackagename": "@push.rocks/smartai",
      "license": "MIT",
      "projectDomain": "push.rocks",
      "keywords": [
        "AI integration",
-        "chatbot",
        "TypeScript",
+        "chatbot",
        "OpenAI",
        "Anthropic",
-        "multi-model support",
-        "audio responses",
+        "multi-model",
+        "audio generation",
        "text-to-speech",
-        "streaming chat"
+        "document processing",
+        "vision processing",
+        "streaming chat",
+        "API",
+        "multiple providers",
+        "AI models",
+        "synchronous chat",
+        "asynchronous chat",
+        "real-time interaction",
+        "content analysis",
+        "image description",
+        "document classification",
+        "AI toolkit",
+        "provider switching"
      ]
    }
  },
--- a/package.json
+++ b/package.json
@ -1,8 +1,8 @@
 {
  "name": "@push.rocks/smartai",
-  "version": "0.4.1",
+  "version": "0.5.4",
  "private": false,
-  "description": "A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.",
+  "description": "SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.",
  "main": "dist_ts/index.js",
  "typings": "dist_ts/index.d.ts",
  "type": "module",
@ -14,24 +14,24 @@
    "buildDocs": "(tsdoc)"
  },
  "devDependencies": {
-    "@git.zone/tsbuild": "^2.2.1",
+    "@git.zone/tsbuild": "^2.3.2",
    "@git.zone/tsbundle": "^2.2.5",
    "@git.zone/tsrun": "^1.3.3",
    "@git.zone/tstest": "^1.0.96",
    "@push.rocks/qenv": "^6.1.0",
-    "@push.rocks/tapbundle": "^5.5.6",
-    "@types/node": "^22.13.5"
+    "@push.rocks/tapbundle": "^6.0.3",
+    "@types/node": "^22.15.17"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.37.0",
+    "@anthropic-ai/sdk": "^0.50.4",
    "@push.rocks/smartarray": "^1.1.0",
    "@push.rocks/smartfile": "^11.2.0",
    "@push.rocks/smartpath": "^5.0.18",
-    "@push.rocks/smartpdf": "^3.1.8",
+    "@push.rocks/smartpdf": "^3.2.2",
    "@push.rocks/smartpromise": "^4.2.3",
-    "@push.rocks/smartrequest": "^2.0.23",
+    "@push.rocks/smartrequest": "^2.1.0",
    "@push.rocks/webstream": "^1.0.10",
-    "openai": "^4.85.4"
+    "openai": "^4.98.0"
  },
  "repository": {
    "type": "git",
@ -58,13 +58,32 @@
  ],
  "keywords": [
    "AI integration",
-    "chatbot",
    "TypeScript",
+    "chatbot",
    "OpenAI",
    "Anthropic",
-    "multi-model support",
-    "audio responses",
+    "multi-model",
+    "audio generation",
    "text-to-speech",
-    "streaming chat"
-  ]
+    "document processing",
+    "vision processing",
+    "streaming chat",
+    "API",
+    "multiple providers",
+    "AI models",
+    "synchronous chat",
+    "asynchronous chat",
+    "real-time interaction",
+    "content analysis",
+    "image description",
+    "document classification",
+    "AI toolkit",
+    "provider switching"
+  ],
+  "pnpm": {
+    "onlyBuiltDependencies": [
+      "puppeteer"
+    ]
+  },
+  "packageManager": "pnpm@10.7.0+sha512.6b865ad4b62a1d9842b61d674a393903b871d9244954f652b8842c2b553c72176b278f64c463e52d40fff8aba385c235c8c9ecf5cc7de4fd78b8bb6d49633ab6"
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/readme.md
+++ b/readme.md
@ -1,144 +1,38 @@
 # @push.rocks/smartai

-[![npm version](https://badge.fury.io/js/%40push.rocks%2Fsmartai.svg)](https://www.npmjs.com/package/@push.rocks/smartai)
+SmartAi is a TypeScript library providing a unified interface for integrating and interacting with multiple AI models, supporting chat interactions, audio and document processing, and vision tasks.

-SmartAi is a comprehensive TypeScript library that provides a standardized interface for integrating and interacting with multiple AI models. It supports a range of operations from synchronous and streaming chat to audio generation, document processing, and vision tasks.
+## Install

-## Table of Contents
-
- [Features](#features)
- [Installation](#installation)
- [Supported AI Providers](#supported-ai-providers)
- [Quick Start](#quick-start)
- [Usage Examples](#usage-examples)
-  - [Chat Interactions](#chat-interactions)
-  - [Streaming Chat](#streaming-chat)
-  - [Audio Generation](#audio-generation)
-  - [Document Processing](#document-processing)
-  - [Vision Processing](#vision-processing)
- [Error Handling](#error-handling)
- [Development](#development)
-  - [Running Tests](#running-tests)
-  - [Building the Project](#building-the-project)
- [Contributing](#contributing)
- [License](#license)
- [Legal Information](#legal-information)
-
-## Features
-
- **Unified API:** Seamlessly integrate multiple AI providers with a consistent interface.
- **Chat & Streaming:** Support for both synchronous and real-time streaming chat interactions.
- **Audio & Vision:** Generate audio responses and perform detailed image analysis.
- **Document Processing:** Analyze PDFs and other documents using vision models.
- **Extensible:** Easily extend the library to support additional AI providers.
-
-## Installation
-
-To install SmartAi, run the following command:
+To install SmartAi into your project, you need to run the following command in your terminal:

 ```bash
 npm install @push.rocks/smartai
 ```

-This will add the package to your project’s dependencies.
+This command will add the SmartAi library to your project's dependencies, making it available for use in your TypeScript application.

-## Supported AI Providers
+## Usage

-SmartAi supports multiple AI providers. Configure each provider with its corresponding token or settings:
+SmartAi is designed to provide a comprehensive and unified API for working seamlessly with multiple AI providers like OpenAI, Anthropic, Perplexity, and others. Below we will delve into how to make the most out of this library, illustrating the setup and functionality with in-depth examples. Our scenarios will explore synchronous and streaming interactions, audio generation, document handling, and vision tasks with different AI providers.

-### OpenAI
+### Initialization

- **Models:** GPT-4, GPT-3.5-turbo, GPT-4-vision-preview
- **Features:** Chat, Streaming, Audio Generation, Vision, Document Processing
- **Configuration Example:**
-  
-  ```typescript
-  openaiToken: 'your-openai-token'
-  ```
-
-### X.AI
-
- **Models:** Grok-2-latest
- **Features:** Chat, Streaming, Document Processing
- **Configuration Example:**
-  
-  ```typescript
-  xaiToken: 'your-xai-token'
-  ```
-
-### Anthropic
-
- **Models:** Claude-3-opus-20240229
- **Features:** Chat, Streaming, Vision, Document Processing
- **Configuration Example:**
-  
-  ```typescript
-  anthropicToken: 'your-anthropic-token'
-  ```
-
-### Perplexity
-
- **Models:** Mixtral-8x7b-instruct
- **Features:** Chat, Streaming
- **Configuration Example:**
-  
-  ```typescript
-  perplexityToken: 'your-perplexity-token'
-  ```
-
-### Groq
-
- **Models:** Llama-3.3-70b-versatile
- **Features:** Chat, Streaming
- **Configuration Example:**
-  
-  ```typescript
-  groqToken: 'your-groq-token'
-  ```
-
-### Ollama
-
- **Models:** Configurable (default: llama2; use llava for vision/document tasks)
- **Features:** Chat, Streaming, Vision, Document Processing
- **Configuration Example:**
-  
-  ```typescript
-  ollama: {
-    baseUrl: 'http://localhost:11434', // Optional
-    model: 'llama2',                  // Optional
-    visionModel: 'llava'               // Optional for vision and document tasks
-  }
-  ```
-
-### Exo
-
- **Models:** Configurable (supports LLaMA, Mistral, LlaVA, Qwen, and Deepseek)
- **Features:** Chat, Streaming
- **Configuration Example:**
-  
-  ```typescript
-  exo: {
-    baseUrl: 'http://localhost:8080/v1', // Optional
-    apiKey: 'your-api-key'               // Optional for local deployments
-  }
-  ```
-
-## Quick Start
-
-Initialize SmartAi with the provider configurations you plan to use:
+Initialization is the first step before using any AI functionalities. You should provide API tokens for each provider you plan to utilize.

 ```typescript
 import { SmartAi } from '@push.rocks/smartai';

 const smartAi = new SmartAi({
  openaiToken: 'your-openai-token',
-  xaiToken: 'your-xai-token',
  anthropicToken: 'your-anthropic-token',
  perplexityToken: 'your-perplexity-token',
+  xaiToken: 'your-xai-token',
  groqToken: 'your-groq-token',
  ollama: {
    baseUrl: 'http://localhost:11434',
-    model: 'llama2'
+    model: 'llama2',
+    visionModel: 'llava'
  },
  exo: {
    baseUrl: 'http://localhost:8080/v1',
@ -149,31 +43,33 @@ const smartAi = new SmartAi({
 await smartAi.start();
 ```

-## Usage Examples
-
 ### Chat Interactions

-**Synchronous Chat:**
+Interaction through chat is a key feature. SmartAi caters to both synchronous and asynchronous (streaming) chats across several AI models.
+
+#### Regular Synchronous Chat
+
+Connect with AI models via straightforward request-response interactions.

 ```typescript
-const response = await smartAi.openaiProvider.chat({
+const syncResponse = await smartAi.openaiProvider.chat({
  systemMessage: 'You are a helpful assistant.',
  userMessage: 'What is the capital of France?',
-  messageHistory: [] // Include previous conversation messages if applicable
+  messageHistory: [] // Could include context or preceding messages
 });

-console.log(response.message);
+console.log(syncResponse.message); // Outputs: "The capital of France is Paris."
 ```

-### Streaming Chat
+#### Real-Time Streaming Chat

-**Real-Time Streaming:**
+For continuous interaction and lower latency, engage in streaming chat.

 ```typescript
 const textEncoder = new TextEncoder();
 const textDecoder = new TextDecoder();

-// Create a transform stream for sending and receiving data
+// Establish a transform stream
 const { writable, readable } = new TransformStream();
 const writer = writable.getWriter();

@ -184,7 +80,7 @@ const message = {

 writer.write(textEncoder.encode(JSON.stringify(message) + '\n'));

-// Start streaming the response
+// Initiate streaming
 const stream = await smartAi.openaiProvider.chatStream(readable);
 const reader = stream.getReader();

@ -197,133 +93,130 @@ while (true) {

 ### Audio Generation

-Generate audio (supported by providers like OpenAI):
+Audio generation from textual input is possible using providers like OpenAI.

 ```typescript
 const audioStream = await smartAi.openaiProvider.audio({
-  message: 'Hello, this is a test of text-to-speech'
+  message: 'This is a test message for generating speech.'
 });

-// Process the audio stream, for example, play it or save to a file.
+// Use the audioStream e.g., playing or saving it.
 ```

-### Document Processing
+### Document Analysis

-Analyze and extract key information from documents:
+SmartAi can ingest and process documents, extracting meaningful information or performing classifications.

 ```typescript
-// Example using OpenAI
-const documentResult = await smartAi.openaiProvider.document({
-  systemMessage: 'Classify the document type',
-  userMessage: 'What type of document is this?',
-  messageHistory: [],
-  pdfDocuments: [pdfBuffer] // Uint8Array containing the PDF content
-});
-```
-
-Other providers (e.g., Ollama and Anthropic) follow a similar pattern:
-
-```typescript
-// Using Ollama for document processing
-const ollamaResult = await smartAi.ollamaProvider.document({
-  systemMessage: 'You are a document analysis assistant',
-  userMessage: 'Extract key information from this document',
+const pdfBuffer = await fetchPdf('https://example.com/document.pdf');
+const documentRes = await smartAi.openaiProvider.document({
+  systemMessage: 'Determine the nature of the document.',
+  userMessage: 'Classify this document.',
  messageHistory: [],
  pdfDocuments: [pdfBuffer]
 });
+
+console.log(documentRes.message); // Outputs: classified document type
 ```

+SmartAi allows easy switching between providers, thus giving developers flexibility:
+
 ```typescript
-// Using Anthropic for document processing
-const anthropicResult = await smartAi.anthropicProvider.document({
-  systemMessage: 'Analyze the document',
-  userMessage: 'Please extract the main points',
+const anthopicRes = await smartAi.anthropicProvider.document({
+  systemMessage: 'Analyze this document.',
+  userMessage: 'Extract core points.',
  messageHistory: [],
  pdfDocuments: [pdfBuffer]
 });
+
+console.log(anthopicRes.message); // Outputs: summarized core points
 ```

 ### Vision Processing

-Analyze images with vision capabilities:
+Engage AI models in analyzing and describing images:

 ```typescript
-// Using OpenAI GPT-4 Vision
-const imageDescription = await smartAi.openaiProvider.vision({
-  image: imageBuffer, // Uint8Array containing image data
-  prompt: 'What do you see in this image?'
+const imageBuffer = await fetchImage('path/to/image.jpg');
+
+// Using OpenAI's vision capabilities
+const visionOutput = await smartAi.openaiProvider.vision({
+  image: imageBuffer,
+  prompt: 'Describe the image.'
 });

-// Using Ollama for vision tasks
-const ollamaImageAnalysis = await smartAi.ollamaProvider.vision({
-  image: imageBuffer,
-  prompt: 'Analyze this image in detail'
-});
-
-// Using Anthropic for vision analysis
-const anthropicImageAnalysis = await smartAi.anthropicProvider.vision({
-  image: imageBuffer,
-  prompt: 'Describe the contents of this image'
-});
+console.log(visionOutput); // Outputs: image description
 ```

-## Error Handling
+Use other providers for more varied analysis:

-Always wrap API calls in try-catch blocks to manage errors effectively:
+```typescript
+const ollamaOutput = await smartAi.ollamaProvider.vision({
+  image: imageBuffer,
+  prompt: 'Detailed analysis required.'
+});
+
+console.log(ollamaOutput); // Outputs: detailed analysis results
+```
+
+### Error Handling
+
+Due to the nature of external integrations, ensure to wrap AI calls within try-catch blocks.

 ```typescript
 try {
-  const response = await smartAi.openaiProvider.chat({
-    systemMessage: 'You are a helpful assistant.',
-    userMessage: 'Hello!',
+  const response = await smartAi.anthropicProvider.chat({
+    systemMessage: 'Hello!',
+    userMessage: 'Help me out.',
    messageHistory: []
  });
  console.log(response.message);
 } catch (error: any) {
-  console.error('AI provider error:', error.message);
+  console.error('Encountered an error:', error.message);
 }
 ```

-## Development
+### Providers and Customization

-### Running Tests
+The library supports provider-specific customization, enabling tailored interactions:

-To run the test suite, use the following command:
+```typescript
+const smartAi = new SmartAi({
+  openaiToken: 'your-openai-token',
+  anthropicToken: 'your-anthropic-token',
+  ollama: {
+    baseUrl: 'http://localhost:11434',
+    model: 'llama2',
+    visionModel: 'llava'
+  }
+});

-```bash
-npm run test
+await smartAi.start();
 ```

-Ensure your environment is configured with the appropriate tokens and settings for the providers you are testing.
+### Advanced Streaming Customization

-### Building the Project
+Developers can implement real-time processing pipelines with custom transformations:

-Compile the TypeScript code and build the package using:
+```typescript
+const customProcessingStream = new TransformStream({
+  transform(chunk, controller) {
+    const processed = chunk.toUpperCase(); // Example transformation
+    controller.enqueue(processed);
+  }
+});

-```bash
-npm run build
+const processedStream = stream.pipeThrough(customProcessingStream);
+const processedReader = processedStream.getReader();
+
+while (true) {
+  const { done, value } = await processedReader.read();
+  if (done) break;
+  console.log('Processed Output:', value);
+}
 ```

-This command prepares the library for distribution.
-
-## Contributing
-
-Contributions are welcome! Please follow these steps:
-
-1. Fork the repository.
-2. Create a feature branch:  
-   ```bash
-   git checkout -b feature/my-feature
-   ```
-3. Commit your changes with clear messages:  
-   ```bash
-   git commit -m 'Add new feature'
-   ```
-4. Push your branch to your fork:  
-   ```bash
-   git push origin feature/my-feature
-   ```
-5. Open a Pull Request with a detailed description of your changes.
+This approach can facilitate adaptive content processing workflows.

 ## License and Legal Information

@ -342,4 +235,4 @@ Registered at District court Bremen HRB 35230 HB, Germany

 For any legal inquiries or if you require further information, please contact us via email at hello@task.vc.

-By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.
+By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.
--- a/test/test.ts
+++ b/test/test.ts
@ -1,4 +1,4 @@
-import { expect, expectAsync, tap } from '@push.rocks/tapbundle';
+import { expect, tap } from '@push.rocks/tapbundle';
 import * as qenv from '@push.rocks/qenv';
 import * as smartrequest from '@push.rocks/smartrequest';
 import * as smartfile from '@push.rocks/smartfile';
@ -21,8 +21,7 @@ tap.test('should create chat response with openai', async () => {
  const response = await testSmartai.openaiProvider.chat({
    systemMessage: 'Hello',
    userMessage: userMessage,
-    messageHistory: [
-    ],
+    messageHistory: [],
  });
  console.log(`userMessage: ${userMessage}`);
  console.log(response.message);
@ -55,7 +54,7 @@ tap.test('should recognize companies in a pdf', async () => {
            address: string;
            city: string;
            country: string;
-            EU: boolean; // wether the entity is within EU
+            EU: boolean; // whether the entity is within EU
          };
          entityReceiver: {
            type: 'official state entity' | 'company' | 'person';
@ -63,7 +62,7 @@ tap.test('should recognize companies in a pdf', async () => {
            address: string;
            city: string;
            country: string;
-            EU: boolean; // wether the entity is within EU
+            EU: boolean; // whether the entity is within EU
          };
          date: string; // the date of the document as YYYY-MM-DD
          title: string; // a short title, suitable for a filename
@ -75,10 +74,27 @@ tap.test('should recognize companies in a pdf', async () => {
    pdfDocuments: [pdfBuffer],
  });
  console.log(result);
-})
+});
+
+tap.test('should create audio response with openai', async () => {
+  // Call the audio method with a sample message.
+  const audioStream = await testSmartai.openaiProvider.audio({
+    message: 'This is a test of audio generation.',
+  });
+  // Read all chunks from the stream.
+  const chunks: Uint8Array[] = [];
+  for await (const chunk of audioStream) {
+    chunks.push(chunk as Uint8Array);
+  }
+  const audioBuffer = Buffer.concat(chunks);
+  await smartfile.fs.toFs(audioBuffer, './.nogit/testoutput.mp3');
+  console.log(`Audio Buffer length: ${audioBuffer.length}`);
+  // Assert that the resulting buffer is not empty.
+  expect(audioBuffer.length).toBeGreaterThan(0);
+});

 tap.test('should stop the smartai instance', async () => {
  await testSmartai.stop();
 });

-export default tap.start();
+export default tap.start();
--- a/ts/00_commitinfo_data.ts
+++ b/ts/00_commitinfo_data.ts
@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: '@push.rocks/smartai',
-  version: '0.4.1',
-  description: 'A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.'
+  version: '0.5.4',
+  description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.'
 }
--- a/ts/provider.openai.ts
+++ b/ts/provider.openai.ts
@ -75,21 +75,23 @@ export class OpenAiProvider extends MultiModalModel {
        // If we have a complete message, send it to OpenAI
        if (currentMessage) {
          const messageToSend = { role: "user" as const, content: currentMessage.content };
-          const stream = await this.openAiApiClient.chat.completions.create({
-            model: this.options.chatModel ?? 'o3-mini',
-            temperature: 0,
+          const chatModel = this.options.chatModel ?? 'o3-mini';
+          const requestParams: any = {
+            model: chatModel,
            messages: [messageToSend],
            stream: true,
-          });
-
+          };
+          // Temperature is omitted since the model does not support it.
+          const stream = await this.openAiApiClient.chat.completions.create(requestParams);
+          // Explicitly cast the stream as an async iterable to satisfy TypeScript.
+          const streamAsyncIterable = stream as unknown as AsyncIterableIterator<any>;
          // Process each chunk from OpenAI
-          for await (const chunk of stream) {
+          for await (const chunk of streamAsyncIterable) {
            const content = chunk.choices[0]?.delta?.content;
            if (content) {
              controller.enqueue(content);
            }
          }
-
          currentMessage = null;
        }
      },
@ -119,15 +121,17 @@ export class OpenAiProvider extends MultiModalModel {
      content: string;
    }[];
  }) {
-    const result = await this.openAiApiClient.chat.completions.create({
-      model: this.options.chatModel ?? 'o3-mini',
-      temperature: 0,
+    const chatModel = this.options.chatModel ?? 'o3-mini';
+    const requestParams: any = {
+      model: chatModel,
      messages: [
        { role: 'system', content: optionsArg.systemMessage },
        ...optionsArg.messageHistory,
        { role: 'user', content: optionsArg.userMessage },
      ],
-    });
+    };
+    // Temperature parameter removed to avoid unsupported error.
+    const result = await this.openAiApiClient.chat.completions.create(requestParams);
    return {
      role: result.choices[0].message.role as 'assistant',
      message: result.choices[0].message.content,
@ -137,7 +141,7 @@ export class OpenAiProvider extends MultiModalModel {
  public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
    const done = plugins.smartpromise.defer<NodeJS.ReadableStream>();
    const result = await this.openAiApiClient.audio.speech.create({
-      model: this.options.audioModel ?? 'o3-mini',
+      model: this.options.audioModel ?? 'tts-1-hd',
      input: optionsArg.message,
      voice: 'nova',
      response_format: 'mp3',
@ -159,27 +163,30 @@ export class OpenAiProvider extends MultiModalModel {
  }) {
    let pdfDocumentImageBytesArray: Uint8Array[] = [];

+    // Convert each PDF into one or more image byte arrays.
+    const smartpdfInstance = new plugins.smartpdf.SmartPdf();
+    await smartpdfInstance.start();
    for (const pdfDocument of optionsArg.pdfDocuments) {
-      const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
+      const documentImageArray = await smartpdfInstance.convertPDFToPngBytes(pdfDocument);
      pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
    }
+    await smartpdfInstance.stop();

    console.log(`image smartfile array`);
    console.log(pdfDocumentImageBytesArray.map((smartfile) => smartfile.length));

-    const smartfileArray = await plugins.smartarray.map(
-      pdfDocumentImageBytesArray,
-      async (pdfDocumentImageBytes) => {
-        return plugins.smartfile.SmartFile.fromBuffer(
-          'pdfDocumentImage.jpg',
-          Buffer.from(pdfDocumentImageBytes)
-        );
-      }
-    );
+    // Filter out any empty buffers to avoid sending invalid image URLs.
+    const validImageBytesArray = pdfDocumentImageBytesArray.filter(imageBytes => imageBytes && imageBytes.length > 0);
+    const imageAttachments = validImageBytesArray.map(imageBytes => ({
+      type: 'image_url',
+      image_url: {
+        url: 'data:image/png;base64,' + Buffer.from(imageBytes).toString('base64'),
+      },
+    }));

-    const result = await this.openAiApiClient.chat.completions.create({
-      model: this.options.chatModel ?? 'o3-mini',
-      temperature: 0,
+    const chatModel = this.options.chatModel ?? 'o4-mini';
+    const requestParams: any = {
+      model: chatModel,
      messages: [
        { role: 'system', content: optionsArg.systemMessage },
        ...optionsArg.messageHistory,
@ -187,31 +194,22 @@ export class OpenAiProvider extends MultiModalModel {
          role: 'user',
          content: [
            { type: 'text', text: optionsArg.userMessage },
-            ...(() => {
-              const returnArray = [];
-              for (const imageBytes of pdfDocumentImageBytesArray) {
-                returnArray.push({
-                  type: 'image_url',
-                  image_url: {
-                    url: 'data:image/png;base64,' + Buffer.from(imageBytes).toString('base64'),
-                  },
-                });
-              }
-              return returnArray;
-            })(),
+            ...imageAttachments,
          ],
        },
      ],
-    });
+    };
+    // Temperature parameter removed.
+    const result = await this.openAiApiClient.chat.completions.create(requestParams);
    return {
      message: result.choices[0].message,
    };
  }

  public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
-    const result = await this.openAiApiClient.chat.completions.create({
-      model: this.options.visionModel ?? 'o3-mini',
-      temperature: 0,
+    const visionModel = this.options.visionModel ?? '04-mini';
+    const requestParams: any = {
+      model: visionModel,
      messages: [
        {
          role: 'user',
@ -227,8 +225,8 @@ export class OpenAiProvider extends MultiModalModel {
        }
      ],
      max_tokens: 300
-    });
-
+    };
+    const result = await this.openAiApiClient.chat.completions.create(requestParams);
    return result.choices[0].message.content || '';
  }
 }
Author	SHA1	Message	Date
Philipp Kunz	6bdbeae144	0.5.4	2025-05-13 18:39:58 +00:00
Philipp Kunz	09c27379cb	fix(provider.openai): Update dependency versions, clean test imports, and adjust default OpenAI model configurations	2025-05-13 18:39:57 +00:00
Philipp Kunz	2bc6f7ee5e	0.5.3	2025-04-03 21:46:40 +00:00
Philipp Kunz	0ac50d647d	fix(package.json): Add explicit packageManager field to package.json	2025-04-03 21:46:40 +00:00
Philipp Kunz	5f9ffc7356	0.5.2	2025-04-03 21:46:15 +00:00
Philipp Kunz	502b665224	fix(readme): Remove redundant conclusion section from README to streamline documentation.	2025-04-03 21:46:14 +00:00
Philipp Kunz	bda0d7ed7e	0.5.1	2025-02-25 19:15:32 +00:00
Philipp Kunz	de2a60d12f	fix(OpenAiProvider): Corrected audio model ID in OpenAiProvider	2025-02-25 19:15:32 +00:00
Philipp Kunz	5b3a93a43a	0.5.0	2025-02-25 19:04:40 +00:00
Philipp Kunz	6b241f8889	feat(documentation and configuration): Enhanced package and README documentation	2025-02-25 19:04:40 +00:00
Philipp Kunz	0a80ac0a8a	0.4.2	2025-02-25 18:23:28 +00:00
Philipp Kunz	6ce442354e	fix(core): Fix OpenAI chat streaming and PDF document processing logic.	2025-02-25 18:23:28 +00:00