Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
de940dff75 | |||
4fc1e029e4 | |||
d0a4151a2b | |||
ad5dd4799b | |||
1c49af74ac | |||
eda8ce36df |
22
changelog.md
22
changelog.md
@ -1,5 +1,27 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2025-02-05 - 0.3.0 - feat(integration-xai)
|
||||||
|
Add support for X.AI provider with chat and document processing capabilities.
|
||||||
|
|
||||||
|
- Introduced XAIProvider class for integrating X.AI features.
|
||||||
|
- Implemented chat streaming and synchronous chat for X.AI.
|
||||||
|
- Enabled document processing capabilities with PDF conversion in X.AI.
|
||||||
|
|
||||||
|
## 2025-02-03 - 0.2.0 - feat(provider.anthropic)
|
||||||
|
Add support for vision and document processing in Anthropic provider
|
||||||
|
|
||||||
|
- Implemented vision tasks for Anthropic provider using Claude-3-opus-20240229 model.
|
||||||
|
- Implemented document processing for Anthropic provider, supporting conversion of PDF documents to images and analysis with Claude-3-opus-20240229 model.
|
||||||
|
- Updated documentation to reflect the new capabilities of the Anthropic provider.
|
||||||
|
|
||||||
|
## 2025-02-03 - 0.1.0 - feat(providers)
|
||||||
|
Add vision and document processing capabilities to providers
|
||||||
|
|
||||||
|
- OpenAI and Ollama providers now support vision tasks using GPT-4 Vision and Llava models respectively.
|
||||||
|
- Document processing has been implemented for OpenAI and Ollama providers, converting PDFs to images for analysis.
|
||||||
|
- Introduced abstract methods for vision and document processing in the MultiModalModel class.
|
||||||
|
- Updated the readme file with examples for vision and document processing.
|
||||||
|
|
||||||
## 2025-02-03 - 0.0.19 - fix(core)
|
## 2025-02-03 - 0.0.19 - fix(core)
|
||||||
Enhanced chat streaming and error handling across providers
|
Enhanced chat streaming and error handling across providers
|
||||||
|
|
||||||
|
19
license
Normal file
19
license
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
Copyright (c) 2024 Task Venture Capital GmbH (hello@task.vc)
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@push.rocks/smartai",
|
"name": "@push.rocks/smartai",
|
||||||
"version": "0.0.19",
|
"version": "0.3.0",
|
||||||
"private": false,
|
"private": false,
|
||||||
"description": "A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.",
|
"description": "A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.",
|
||||||
"main": "dist_ts/index.js",
|
"main": "dist_ts/index.js",
|
||||||
|
68
readme.md
68
readme.md
@ -17,16 +17,24 @@ This command installs the package and adds it to your project's dependencies.
|
|||||||
@push.rocks/smartai supports multiple AI providers, each with its own unique capabilities:
|
@push.rocks/smartai supports multiple AI providers, each with its own unique capabilities:
|
||||||
|
|
||||||
### OpenAI
|
### OpenAI
|
||||||
- Models: GPT-4, GPT-3.5-turbo
|
- Models: GPT-4, GPT-3.5-turbo, GPT-4-vision-preview
|
||||||
- Features: Chat, Streaming, Audio Generation
|
- Features: Chat, Streaming, Audio Generation, Vision, Document Processing
|
||||||
- Configuration:
|
- Configuration:
|
||||||
```typescript
|
```typescript
|
||||||
openaiToken: 'your-openai-token'
|
openaiToken: 'your-openai-token'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### X.AI
|
||||||
|
- Models: Grok-2-latest
|
||||||
|
- Features: Chat, Streaming, Document Processing
|
||||||
|
- Configuration:
|
||||||
|
```typescript
|
||||||
|
xaiToken: 'your-xai-token'
|
||||||
|
```
|
||||||
|
|
||||||
### Anthropic
|
### Anthropic
|
||||||
- Models: Claude-3-opus-20240229
|
- Models: Claude-3-opus-20240229
|
||||||
- Features: Chat, Streaming
|
- Features: Chat, Streaming, Vision, Document Processing
|
||||||
- Configuration:
|
- Configuration:
|
||||||
```typescript
|
```typescript
|
||||||
anthropicToken: 'your-anthropic-token'
|
anthropicToken: 'your-anthropic-token'
|
||||||
@ -49,12 +57,13 @@ This command installs the package and adds it to your project's dependencies.
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Ollama
|
### Ollama
|
||||||
- Models: Configurable (default: llama2)
|
- Models: Configurable (default: llama2, llava for vision/documents)
|
||||||
- Features: Chat, Streaming
|
- Features: Chat, Streaming, Vision, Document Processing
|
||||||
- Configuration:
|
- Configuration:
|
||||||
```typescript
|
```typescript
|
||||||
baseUrl: 'http://localhost:11434' // Optional
|
baseUrl: 'http://localhost:11434' // Optional
|
||||||
model: 'llama2' // Optional
|
model: 'llama2' // Optional
|
||||||
|
visionModel: 'llava' // Optional, for vision and document tasks
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
@ -74,6 +83,7 @@ import { SmartAi } from '@push.rocks/smartai';
|
|||||||
|
|
||||||
const smartAi = new SmartAi({
|
const smartAi = new SmartAi({
|
||||||
openaiToken: 'your-openai-token',
|
openaiToken: 'your-openai-token',
|
||||||
|
xaiToken: 'your-xai-token',
|
||||||
anthropicToken: 'your-anthropic-token',
|
anthropicToken: 'your-anthropic-token',
|
||||||
perplexityToken: 'your-perplexity-token',
|
perplexityToken: 'your-perplexity-token',
|
||||||
groqToken: 'your-groq-token',
|
groqToken: 'your-groq-token',
|
||||||
@ -147,15 +157,61 @@ const audioStream = await smartAi.openaiProvider.audio({
|
|||||||
|
|
||||||
### Document Processing
|
### Document Processing
|
||||||
|
|
||||||
For providers that support document processing (currently OpenAI):
|
For providers that support document processing (OpenAI, Ollama, and Anthropic):
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
|
// Using OpenAI
|
||||||
const result = await smartAi.openaiProvider.document({
|
const result = await smartAi.openaiProvider.document({
|
||||||
systemMessage: 'Classify the document type',
|
systemMessage: 'Classify the document type',
|
||||||
userMessage: 'What type of document is this?',
|
userMessage: 'What type of document is this?',
|
||||||
messageHistory: [],
|
messageHistory: [],
|
||||||
pdfDocuments: [pdfBuffer] // Uint8Array of PDF content
|
pdfDocuments: [pdfBuffer] // Uint8Array of PDF content
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Using Ollama with llava
|
||||||
|
const analysis = await smartAi.ollamaProvider.document({
|
||||||
|
systemMessage: 'You are a document analysis assistant',
|
||||||
|
userMessage: 'Extract the key information from this document',
|
||||||
|
messageHistory: [],
|
||||||
|
pdfDocuments: [pdfBuffer] // Uint8Array of PDF content
|
||||||
|
});
|
||||||
|
|
||||||
|
// Using Anthropic with Claude 3
|
||||||
|
const anthropicAnalysis = await smartAi.anthropicProvider.document({
|
||||||
|
systemMessage: 'You are a document analysis assistant',
|
||||||
|
userMessage: 'Please analyze this document and extract key information',
|
||||||
|
messageHistory: [],
|
||||||
|
pdfDocuments: [pdfBuffer] // Uint8Array of PDF content
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
Both providers will:
|
||||||
|
1. Convert PDF documents to images
|
||||||
|
2. Process each page using their vision models
|
||||||
|
3. Return a comprehensive analysis based on the system message and user query
|
||||||
|
|
||||||
|
### Vision Processing
|
||||||
|
|
||||||
|
For providers that support vision tasks (OpenAI, Ollama, and Anthropic):
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Using OpenAI's GPT-4 Vision
|
||||||
|
const description = await smartAi.openaiProvider.vision({
|
||||||
|
image: imageBuffer, // Buffer containing the image data
|
||||||
|
prompt: 'What do you see in this image?'
|
||||||
|
});
|
||||||
|
|
||||||
|
// Using Ollama's Llava model
|
||||||
|
const analysis = await smartAi.ollamaProvider.vision({
|
||||||
|
image: imageBuffer,
|
||||||
|
prompt: 'Analyze this image in detail'
|
||||||
|
});
|
||||||
|
|
||||||
|
// Using Anthropic's Claude 3
|
||||||
|
const anthropicAnalysis = await smartAi.anthropicProvider.vision({
|
||||||
|
image: imageBuffer,
|
||||||
|
prompt: 'Please analyze this image and describe what you see'
|
||||||
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
## Error Handling
|
## Error Handling
|
||||||
|
@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@push.rocks/smartai',
|
name: '@push.rocks/smartai',
|
||||||
version: '0.0.19',
|
version: '0.3.0',
|
||||||
description: 'A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.'
|
description: 'A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.'
|
||||||
}
|
}
|
||||||
|
@ -62,4 +62,25 @@ export abstract class MultiModalModel {
|
|||||||
* @throws Error if the provider doesn't support audio generation
|
* @throws Error if the provider doesn't support audio generation
|
||||||
*/
|
*/
|
||||||
public abstract audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream>;
|
public abstract audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Vision-language processing
|
||||||
|
* @param optionsArg Options containing the image and prompt for analysis
|
||||||
|
* @returns Promise resolving to the model's description or analysis of the image
|
||||||
|
* @throws Error if the provider doesn't support vision tasks
|
||||||
|
*/
|
||||||
|
public abstract vision(optionsArg: { image: Buffer; prompt: string }): Promise<string>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Document analysis and processing
|
||||||
|
* @param optionsArg Options containing system message, user message, PDF documents, and message history
|
||||||
|
* @returns Promise resolving to the model's analysis of the documents
|
||||||
|
* @throws Error if the provider doesn't support document processing
|
||||||
|
*/
|
||||||
|
public abstract document(optionsArg: {
|
||||||
|
systemMessage: string;
|
||||||
|
userMessage: string;
|
||||||
|
pdfDocuments: Uint8Array[];
|
||||||
|
messageHistory: ChatMessage[];
|
||||||
|
}): Promise<{ message: any }>;
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,9 @@ import * as plugins from './plugins.js';
|
|||||||
import * as paths from './paths.js';
|
import * as paths from './paths.js';
|
||||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||||
import type { ChatOptions, ChatResponse, ChatMessage } from './abstract.classes.multimodal.js';
|
import type { ChatOptions, ChatResponse, ChatMessage } from './abstract.classes.multimodal.js';
|
||||||
|
import type { ImageBlockParam, TextBlockParam } from '@anthropic-ai/sdk/resources/messages';
|
||||||
|
|
||||||
|
type ContentBlock = ImageBlockParam | TextBlockParam;
|
||||||
|
|
||||||
export interface IAnthropicProviderOptions {
|
export interface IAnthropicProviderOptions {
|
||||||
anthropicToken: string;
|
anthropicToken: string;
|
||||||
@ -130,4 +133,108 @@ export class AnthropicProvider extends MultiModalModel {
|
|||||||
// Anthropic does not provide an audio API, so this method is not implemented.
|
// Anthropic does not provide an audio API, so this method is not implemented.
|
||||||
throw new Error('Audio generation is not yet supported by Anthropic.');
|
throw new Error('Audio generation is not yet supported by Anthropic.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||||
|
const base64Image = optionsArg.image.toString('base64');
|
||||||
|
|
||||||
|
const content: ContentBlock[] = [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: optionsArg.prompt
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'image',
|
||||||
|
source: {
|
||||||
|
type: 'base64',
|
||||||
|
media_type: 'image/jpeg',
|
||||||
|
data: base64Image
|
||||||
|
}
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = await this.anthropicApiClient.messages.create({
|
||||||
|
model: 'claude-3-opus-20240229',
|
||||||
|
messages: [{
|
||||||
|
role: 'user',
|
||||||
|
content
|
||||||
|
}],
|
||||||
|
max_tokens: 1024
|
||||||
|
});
|
||||||
|
|
||||||
|
// Extract text content from the response
|
||||||
|
let message = '';
|
||||||
|
for (const block of result.content) {
|
||||||
|
if ('text' in block) {
|
||||||
|
message += block.text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return message;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async document(optionsArg: {
|
||||||
|
systemMessage: string;
|
||||||
|
userMessage: string;
|
||||||
|
pdfDocuments: Uint8Array[];
|
||||||
|
messageHistory: ChatMessage[];
|
||||||
|
}): Promise<{ message: any }> {
|
||||||
|
// Convert PDF documents to images using SmartPDF
|
||||||
|
const smartpdfInstance = new plugins.smartpdf.SmartPdf();
|
||||||
|
let documentImageBytesArray: Uint8Array[] = [];
|
||||||
|
|
||||||
|
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||||
|
const documentImageArray = await smartpdfInstance.convertPDFToPngBytes(pdfDocument);
|
||||||
|
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert message history to Anthropic format
|
||||||
|
const messages = optionsArg.messageHistory.map(msg => ({
|
||||||
|
role: msg.role === 'assistant' ? 'assistant' as const : 'user' as const,
|
||||||
|
content: msg.content
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Create content array with text and images
|
||||||
|
const content: ContentBlock[] = [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: optionsArg.userMessage
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
// Add each document page as an image
|
||||||
|
for (const imageBytes of documentImageBytesArray) {
|
||||||
|
content.push({
|
||||||
|
type: 'image',
|
||||||
|
source: {
|
||||||
|
type: 'base64',
|
||||||
|
media_type: 'image/jpeg',
|
||||||
|
data: Buffer.from(imageBytes).toString('base64')
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await this.anthropicApiClient.messages.create({
|
||||||
|
model: 'claude-3-opus-20240229',
|
||||||
|
system: optionsArg.systemMessage,
|
||||||
|
messages: [
|
||||||
|
...messages,
|
||||||
|
{ role: 'user', content }
|
||||||
|
],
|
||||||
|
max_tokens: 4096
|
||||||
|
});
|
||||||
|
|
||||||
|
// Extract text content from the response
|
||||||
|
let message = '';
|
||||||
|
for (const block of result.content) {
|
||||||
|
if ('text' in block) {
|
||||||
|
message += block.text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
message: {
|
||||||
|
role: 'assistant',
|
||||||
|
content: message
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
@ -176,4 +176,17 @@ export class GroqProvider extends MultiModalModel {
|
|||||||
// Groq does not provide an audio API, so this method is not implemented.
|
// Groq does not provide an audio API, so this method is not implemented.
|
||||||
throw new Error('Audio generation is not yet supported by Groq.');
|
throw new Error('Audio generation is not yet supported by Groq.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||||
|
throw new Error('Vision tasks are not yet supported by Groq.');
|
||||||
|
}
|
||||||
|
|
||||||
|
public async document(optionsArg: {
|
||||||
|
systemMessage: string;
|
||||||
|
userMessage: string;
|
||||||
|
pdfDocuments: Uint8Array[];
|
||||||
|
messageHistory: ChatMessage[];
|
||||||
|
}): Promise<{ message: any }> {
|
||||||
|
throw new Error('Document processing is not yet supported by Groq.');
|
||||||
|
}
|
||||||
}
|
}
|
@ -6,18 +6,21 @@ import type { ChatOptions, ChatResponse, ChatMessage } from './abstract.classes.
|
|||||||
export interface IOllamaProviderOptions {
|
export interface IOllamaProviderOptions {
|
||||||
baseUrl?: string;
|
baseUrl?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
|
visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
|
||||||
}
|
}
|
||||||
|
|
||||||
export class OllamaProvider extends MultiModalModel {
|
export class OllamaProvider extends MultiModalModel {
|
||||||
private options: IOllamaProviderOptions;
|
private options: IOllamaProviderOptions;
|
||||||
private baseUrl: string;
|
private baseUrl: string;
|
||||||
private model: string;
|
private model: string;
|
||||||
|
private visionModel: string;
|
||||||
|
|
||||||
constructor(optionsArg: IOllamaProviderOptions = {}) {
|
constructor(optionsArg: IOllamaProviderOptions = {}) {
|
||||||
super();
|
super();
|
||||||
this.options = optionsArg;
|
this.options = optionsArg;
|
||||||
this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
|
this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
|
||||||
this.model = optionsArg.model || 'llama2';
|
this.model = optionsArg.model || 'llama2';
|
||||||
|
this.visionModel = optionsArg.visionModel || 'llava';
|
||||||
}
|
}
|
||||||
|
|
||||||
async start() {
|
async start() {
|
||||||
@ -167,4 +170,83 @@ export class OllamaProvider extends MultiModalModel {
|
|||||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||||
throw new Error('Audio generation is not supported by Ollama.');
|
throw new Error('Audio generation is not supported by Ollama.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||||
|
const base64Image = optionsArg.image.toString('base64');
|
||||||
|
|
||||||
|
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: this.visionModel,
|
||||||
|
messages: [{
|
||||||
|
role: 'user',
|
||||||
|
content: optionsArg.prompt,
|
||||||
|
images: [base64Image]
|
||||||
|
}],
|
||||||
|
stream: false
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await response.json();
|
||||||
|
return result.message.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async document(optionsArg: {
|
||||||
|
systemMessage: string;
|
||||||
|
userMessage: string;
|
||||||
|
pdfDocuments: Uint8Array[];
|
||||||
|
messageHistory: ChatMessage[];
|
||||||
|
}): Promise<{ message: any }> {
|
||||||
|
// Convert PDF documents to images using SmartPDF
|
||||||
|
const smartpdfInstance = new plugins.smartpdf.SmartPdf();
|
||||||
|
let documentImageBytesArray: Uint8Array[] = [];
|
||||||
|
|
||||||
|
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||||
|
const documentImageArray = await smartpdfInstance.convertPDFToPngBytes(pdfDocument);
|
||||||
|
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert images to base64
|
||||||
|
const base64Images = documentImageBytesArray.map(bytes => Buffer.from(bytes).toString('base64'));
|
||||||
|
|
||||||
|
// Send request to Ollama with images
|
||||||
|
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: this.visionModel,
|
||||||
|
messages: [
|
||||||
|
{ role: 'system', content: optionsArg.systemMessage },
|
||||||
|
...optionsArg.messageHistory,
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: optionsArg.userMessage,
|
||||||
|
images: base64Images
|
||||||
|
}
|
||||||
|
],
|
||||||
|
stream: false
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await response.json();
|
||||||
|
return {
|
||||||
|
message: {
|
||||||
|
role: 'assistant',
|
||||||
|
content: result.message.content
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
@ -192,4 +192,27 @@ export class OpenAiProvider extends MultiModalModel {
|
|||||||
message: result.choices[0].message,
|
message: result.choices[0].message,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||||
|
const result = await this.openAiApiClient.chat.completions.create({
|
||||||
|
model: 'gpt-4-vision-preview',
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: optionsArg.prompt },
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: `data:image/jpeg;base64,${optionsArg.image.toString('base64')}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens: 300
|
||||||
|
});
|
||||||
|
|
||||||
|
return result.choices[0].message.content || '';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -155,4 +155,17 @@ export class PerplexityProvider extends MultiModalModel {
|
|||||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||||
throw new Error('Audio generation is not supported by Perplexity.');
|
throw new Error('Audio generation is not supported by Perplexity.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||||
|
throw new Error('Vision tasks are not supported by Perplexity.');
|
||||||
|
}
|
||||||
|
|
||||||
|
public async document(optionsArg: {
|
||||||
|
systemMessage: string;
|
||||||
|
userMessage: string;
|
||||||
|
pdfDocuments: Uint8Array[];
|
||||||
|
messageHistory: ChatMessage[];
|
||||||
|
}): Promise<{ message: any }> {
|
||||||
|
throw new Error('Document processing is not supported by Perplexity.');
|
||||||
|
}
|
||||||
}
|
}
|
183
ts/provider.xai.ts
Normal file
183
ts/provider.xai.ts
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
import * as plugins from './plugins.js';
|
||||||
|
import * as paths from './paths.js';
|
||||||
|
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||||
|
import type { ChatOptions, ChatResponse, ChatMessage } from './abstract.classes.multimodal.js';
|
||||||
|
import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
|
||||||
|
|
||||||
|
export interface IXAIProviderOptions {
|
||||||
|
xaiToken: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class XAIProvider extends MultiModalModel {
|
||||||
|
private options: IXAIProviderOptions;
|
||||||
|
public openAiApiClient: plugins.openai.default;
|
||||||
|
public smartpdfInstance: plugins.smartpdf.SmartPdf;
|
||||||
|
|
||||||
|
constructor(optionsArg: IXAIProviderOptions) {
|
||||||
|
super();
|
||||||
|
this.options = optionsArg;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async start() {
|
||||||
|
this.openAiApiClient = new plugins.openai.default({
|
||||||
|
apiKey: this.options.xaiToken,
|
||||||
|
baseURL: 'https://api.x.ai/v1',
|
||||||
|
});
|
||||||
|
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
|
||||||
|
}
|
||||||
|
|
||||||
|
public async stop() {}
|
||||||
|
|
||||||
|
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||||
|
// Create a TextDecoder to handle incoming chunks
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buffer = '';
|
||||||
|
let currentMessage: { role: string; content: string; } | null = null;
|
||||||
|
|
||||||
|
// Create a TransformStream to process the input
|
||||||
|
const transform = new TransformStream<Uint8Array, string>({
|
||||||
|
async transform(chunk, controller) {
|
||||||
|
buffer += decoder.decode(chunk, { stream: true });
|
||||||
|
|
||||||
|
// Try to parse complete JSON messages from the buffer
|
||||||
|
while (true) {
|
||||||
|
const newlineIndex = buffer.indexOf('\n');
|
||||||
|
if (newlineIndex === -1) break;
|
||||||
|
|
||||||
|
const line = buffer.slice(0, newlineIndex);
|
||||||
|
buffer = buffer.slice(newlineIndex + 1);
|
||||||
|
|
||||||
|
if (line.trim()) {
|
||||||
|
try {
|
||||||
|
const message = JSON.parse(line);
|
||||||
|
currentMessage = {
|
||||||
|
role: message.role || 'user',
|
||||||
|
content: message.content || '',
|
||||||
|
};
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to parse message:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have a complete message, send it to X.AI
|
||||||
|
if (currentMessage) {
|
||||||
|
const stream = await this.openAiApiClient.chat.completions.create({
|
||||||
|
model: 'grok-2-latest',
|
||||||
|
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||||
|
stream: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Process each chunk from X.AI
|
||||||
|
for await (const chunk of stream) {
|
||||||
|
const content = chunk.choices[0]?.delta?.content;
|
||||||
|
if (content) {
|
||||||
|
controller.enqueue(content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
currentMessage = null;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
flush(controller) {
|
||||||
|
if (buffer) {
|
||||||
|
try {
|
||||||
|
const message = JSON.parse(buffer);
|
||||||
|
controller.enqueue(message.content || '');
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to parse remaining buffer:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Connect the input to our transform stream
|
||||||
|
return input.pipeThrough(transform);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async chat(optionsArg: {
|
||||||
|
systemMessage: string;
|
||||||
|
userMessage: string;
|
||||||
|
messageHistory: { role: string; content: string; }[];
|
||||||
|
}): Promise<{ role: 'assistant'; message: string; }> {
|
||||||
|
// Prepare messages array with system message, history, and user message
|
||||||
|
const messages: ChatCompletionMessageParam[] = [
|
||||||
|
{ role: 'system', content: optionsArg.systemMessage },
|
||||||
|
...optionsArg.messageHistory.map(msg => ({
|
||||||
|
role: msg.role as 'system' | 'user' | 'assistant',
|
||||||
|
content: msg.content
|
||||||
|
})),
|
||||||
|
{ role: 'user', content: optionsArg.userMessage }
|
||||||
|
];
|
||||||
|
|
||||||
|
// Call X.AI's chat completion API
|
||||||
|
const completion = await this.openAiApiClient.chat.completions.create({
|
||||||
|
model: 'grok-2-latest',
|
||||||
|
messages: messages,
|
||||||
|
stream: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Return the assistant's response
|
||||||
|
return {
|
||||||
|
role: 'assistant',
|
||||||
|
message: completion.choices[0]?.message?.content || ''
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||||
|
throw new Error('Audio generation is not supported by X.AI');
|
||||||
|
}
|
||||||
|
|
||||||
|
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||||
|
throw new Error('Vision tasks are not supported by X.AI');
|
||||||
|
}
|
||||||
|
|
||||||
|
public async document(optionsArg: {
|
||||||
|
systemMessage: string;
|
||||||
|
userMessage: string;
|
||||||
|
pdfDocuments: Uint8Array[];
|
||||||
|
messageHistory: { role: string; content: string; }[];
|
||||||
|
}): Promise<{ message: any }> {
|
||||||
|
// First convert PDF documents to images
|
||||||
|
let pdfDocumentImageBytesArray: Uint8Array[] = [];
|
||||||
|
|
||||||
|
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||||
|
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
|
||||||
|
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert images to base64 for inclusion in the message
|
||||||
|
const imageBase64Array = pdfDocumentImageBytesArray.map(bytes =>
|
||||||
|
Buffer.from(bytes).toString('base64')
|
||||||
|
);
|
||||||
|
|
||||||
|
// Combine document images into the user message
|
||||||
|
const enhancedUserMessage = `
|
||||||
|
${optionsArg.userMessage}
|
||||||
|
|
||||||
|
Document contents (as images):
|
||||||
|
${imageBase64Array.map((img, i) => `Image ${i + 1}: <image data>`).join('\n')}
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Use chat completion to analyze the documents
|
||||||
|
const messages: ChatCompletionMessageParam[] = [
|
||||||
|
{ role: 'system', content: optionsArg.systemMessage },
|
||||||
|
...optionsArg.messageHistory.map(msg => ({
|
||||||
|
role: msg.role as 'system' | 'user' | 'assistant',
|
||||||
|
content: msg.content
|
||||||
|
})),
|
||||||
|
{ role: 'user', content: enhancedUserMessage }
|
||||||
|
];
|
||||||
|
|
||||||
|
const completion = await this.openAiApiClient.chat.completions.create({
|
||||||
|
model: 'grok-2-latest',
|
||||||
|
messages: messages,
|
||||||
|
stream: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
message: completion.choices[0]?.message?.content || ''
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user