4 Commits

7 changed files with 340 additions and 7 deletions

View File

@ -1,5 +1,19 @@
# Changelog # Changelog
## 2025-02-05 - 0.3.0 - feat(integration-xai)
Add support for X.AI provider with chat and document processing capabilities.
- Introduced XAIProvider class for integrating X.AI features.
- Implemented chat streaming and synchronous chat for X.AI.
- Enabled document processing capabilities with PDF conversion in X.AI.
## 2025-02-03 - 0.2.0 - feat(provider.anthropic)
Add support for vision and document processing in Anthropic provider
- Implemented vision tasks for Anthropic provider using Claude-3-opus-20240229 model.
- Implemented document processing for Anthropic provider, supporting conversion of PDF documents to images and analysis with Claude-3-opus-20240229 model.
- Updated documentation to reflect the new capabilities of the Anthropic provider.
## 2025-02-03 - 0.1.0 - feat(providers) ## 2025-02-03 - 0.1.0 - feat(providers)
Add vision and document processing capabilities to providers Add vision and document processing capabilities to providers

19
license Normal file
View File

@ -0,0 +1,19 @@
Copyright (c) 2024 Task Venture Capital GmbH (hello@task.vc)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,6 +1,6 @@
{ {
"name": "@push.rocks/smartai", "name": "@push.rocks/smartai",
"version": "0.1.0", "version": "0.3.0",
"private": false, "private": false,
"description": "A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.", "description": "A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",

View File

@ -24,9 +24,17 @@ This command installs the package and adds it to your project's dependencies.
openaiToken: 'your-openai-token' openaiToken: 'your-openai-token'
``` ```
### X.AI
- Models: Grok-2-latest
- Features: Chat, Streaming, Document Processing
- Configuration:
```typescript
xaiToken: 'your-xai-token'
```
### Anthropic ### Anthropic
- Models: Claude-3-opus-20240229 - Models: Claude-3-opus-20240229
- Features: Chat, Streaming - Features: Chat, Streaming, Vision, Document Processing
- Configuration: - Configuration:
```typescript ```typescript
anthropicToken: 'your-anthropic-token' anthropicToken: 'your-anthropic-token'
@ -75,6 +83,7 @@ import { SmartAi } from '@push.rocks/smartai';
const smartAi = new SmartAi({ const smartAi = new SmartAi({
openaiToken: 'your-openai-token', openaiToken: 'your-openai-token',
xaiToken: 'your-xai-token',
anthropicToken: 'your-anthropic-token', anthropicToken: 'your-anthropic-token',
perplexityToken: 'your-perplexity-token', perplexityToken: 'your-perplexity-token',
groqToken: 'your-groq-token', groqToken: 'your-groq-token',
@ -148,7 +157,7 @@ const audioStream = await smartAi.openaiProvider.audio({
### Document Processing ### Document Processing
For providers that support document processing (OpenAI and Ollama): For providers that support document processing (OpenAI, Ollama, and Anthropic):
```typescript ```typescript
// Using OpenAI // Using OpenAI
@ -166,6 +175,14 @@ const analysis = await smartAi.ollamaProvider.document({
messageHistory: [], messageHistory: [],
pdfDocuments: [pdfBuffer] // Uint8Array of PDF content pdfDocuments: [pdfBuffer] // Uint8Array of PDF content
}); });
// Using Anthropic with Claude 3
const anthropicAnalysis = await smartAi.anthropicProvider.document({
systemMessage: 'You are a document analysis assistant',
userMessage: 'Please analyze this document and extract key information',
messageHistory: [],
pdfDocuments: [pdfBuffer] // Uint8Array of PDF content
});
``` ```
Both providers will: Both providers will:
@ -175,7 +192,7 @@ Both providers will:
### Vision Processing ### Vision Processing
For providers that support vision tasks (OpenAI and Ollama): For providers that support vision tasks (OpenAI, Ollama, and Anthropic):
```typescript ```typescript
// Using OpenAI's GPT-4 Vision // Using OpenAI's GPT-4 Vision
@ -189,6 +206,12 @@ const analysis = await smartAi.ollamaProvider.vision({
image: imageBuffer, image: imageBuffer,
prompt: 'Analyze this image in detail' prompt: 'Analyze this image in detail'
}); });
// Using Anthropic's Claude 3
const anthropicAnalysis = await smartAi.anthropicProvider.vision({
image: imageBuffer,
prompt: 'Please analyze this image and describe what you see'
});
``` ```
## Error Handling ## Error Handling

View File

@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@push.rocks/smartai', name: '@push.rocks/smartai',
version: '0.1.0', version: '0.3.0',
description: 'A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.' description: 'A TypeScript library for integrating and interacting with multiple AI models, offering capabilities for chat and potentially audio responses.'
} }

View File

@ -2,6 +2,9 @@ import * as plugins from './plugins.js';
import * as paths from './paths.js'; import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js'; import { MultiModalModel } from './abstract.classes.multimodal.js';
import type { ChatOptions, ChatResponse, ChatMessage } from './abstract.classes.multimodal.js'; import type { ChatOptions, ChatResponse, ChatMessage } from './abstract.classes.multimodal.js';
import type { ImageBlockParam, TextBlockParam } from '@anthropic-ai/sdk/resources/messages';
type ContentBlock = ImageBlockParam | TextBlockParam;
export interface IAnthropicProviderOptions { export interface IAnthropicProviderOptions {
anthropicToken: string; anthropicToken: string;
@ -132,7 +135,40 @@ export class AnthropicProvider extends MultiModalModel {
} }
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> { public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
throw new Error('Vision tasks are not yet supported by Anthropic.'); const base64Image = optionsArg.image.toString('base64');
const content: ContentBlock[] = [
{
type: 'text',
text: optionsArg.prompt
},
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/jpeg',
data: base64Image
}
}
];
const result = await this.anthropicApiClient.messages.create({
model: 'claude-3-opus-20240229',
messages: [{
role: 'user',
content
}],
max_tokens: 1024
});
// Extract text content from the response
let message = '';
for (const block of result.content) {
if ('text' in block) {
message += block.text;
}
}
return message;
} }
public async document(optionsArg: { public async document(optionsArg: {
@ -141,6 +177,64 @@ export class AnthropicProvider extends MultiModalModel {
pdfDocuments: Uint8Array[]; pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[]; messageHistory: ChatMessage[];
}): Promise<{ message: any }> { }): Promise<{ message: any }> {
throw new Error('Document processing is not yet supported by Anthropic.'); // Convert PDF documents to images using SmartPDF
const smartpdfInstance = new plugins.smartpdf.SmartPdf();
let documentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await smartpdfInstance.convertPDFToPngBytes(pdfDocument);
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
}
// Convert message history to Anthropic format
const messages = optionsArg.messageHistory.map(msg => ({
role: msg.role === 'assistant' ? 'assistant' as const : 'user' as const,
content: msg.content
}));
// Create content array with text and images
const content: ContentBlock[] = [
{
type: 'text',
text: optionsArg.userMessage
}
];
// Add each document page as an image
for (const imageBytes of documentImageBytesArray) {
content.push({
type: 'image',
source: {
type: 'base64',
media_type: 'image/jpeg',
data: Buffer.from(imageBytes).toString('base64')
}
});
}
const result = await this.anthropicApiClient.messages.create({
model: 'claude-3-opus-20240229',
system: optionsArg.systemMessage,
messages: [
...messages,
{ role: 'user', content }
],
max_tokens: 4096
});
// Extract text content from the response
let message = '';
for (const block of result.content) {
if ('text' in block) {
message += block.text;
}
}
return {
message: {
role: 'assistant',
content: message
}
};
} }
} }

183
ts/provider.xai.ts Normal file
View File

@ -0,0 +1,183 @@
import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type { ChatOptions, ChatResponse, ChatMessage } from './abstract.classes.multimodal.js';
import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
export interface IXAIProviderOptions {
xaiToken: string;
}
export class XAIProvider extends MultiModalModel {
private options: IXAIProviderOptions;
public openAiApiClient: plugins.openai.default;
public smartpdfInstance: plugins.smartpdf.SmartPdf;
constructor(optionsArg: IXAIProviderOptions) {
super();
this.options = optionsArg;
}
public async start() {
this.openAiApiClient = new plugins.openai.default({
apiKey: this.options.xaiToken,
baseURL: 'https://api.x.ai/v1',
});
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
}
public async stop() {}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
let buffer = '';
let currentMessage: { role: string; content: string; } | null = null;
// Create a TransformStream to process the input
const transform = new TransformStream<Uint8Array, string>({
async transform(chunk, controller) {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
currentMessage = {
role: message.role || 'user',
content: message.content || '',
};
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
// If we have a complete message, send it to X.AI
if (currentMessage) {
const stream = await this.openAiApiClient.chat.completions.create({
model: 'grok-2-latest',
messages: [{ role: currentMessage.role, content: currentMessage.content }],
stream: true,
});
// Process each chunk from X.AI
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) {
controller.enqueue(content);
}
}
currentMessage = null;
}
},
flush(controller) {
if (buffer) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
// Connect the input to our transform stream
return input.pipeThrough(transform);
}
public async chat(optionsArg: {
systemMessage: string;
userMessage: string;
messageHistory: { role: string; content: string; }[];
}): Promise<{ role: 'assistant'; message: string; }> {
// Prepare messages array with system message, history, and user message
const messages: ChatCompletionMessageParam[] = [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory.map(msg => ({
role: msg.role as 'system' | 'user' | 'assistant',
content: msg.content
})),
{ role: 'user', content: optionsArg.userMessage }
];
// Call X.AI's chat completion API
const completion = await this.openAiApiClient.chat.completions.create({
model: 'grok-2-latest',
messages: messages,
stream: false,
});
// Return the assistant's response
return {
role: 'assistant',
message: completion.choices[0]?.message?.content || ''
};
}
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
throw new Error('Audio generation is not supported by X.AI');
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
throw new Error('Vision tasks are not supported by X.AI');
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: { role: string; content: string; }[];
}): Promise<{ message: any }> {
// First convert PDF documents to images
let pdfDocumentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
}
// Convert images to base64 for inclusion in the message
const imageBase64Array = pdfDocumentImageBytesArray.map(bytes =>
Buffer.from(bytes).toString('base64')
);
// Combine document images into the user message
const enhancedUserMessage = `
${optionsArg.userMessage}
Document contents (as images):
${imageBase64Array.map((img, i) => `Image ${i + 1}: <image data>`).join('\n')}
`;
// Use chat completion to analyze the documents
const messages: ChatCompletionMessageParam[] = [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory.map(msg => ({
role: msg.role as 'system' | 'user' | 'assistant',
content: msg.content
})),
{ role: 'user', content: enhancedUserMessage }
];
const completion = await this.openAiApiClient.chat.completions.create({
model: 'grok-2-latest',
messages: messages,
stream: false,
});
return {
message: completion.choices[0]?.message?.content || ''
};
}
}