BREAKING CHANGE(vercel-ai-sdk): migrate to Vercel AI SDK v6 and introduce provider registry (getModel) returning LanguageModelV3
This commit is contained in:
10
changelog.md
10
changelog.md
@@ -1,5 +1,15 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-03-05 - 2.0.0 - BREAKING CHANGE(vercel-ai-sdk)
|
||||
migrate to Vercel AI SDK v6 and introduce provider registry (getModel) returning LanguageModelV3
|
||||
|
||||
- Major API rewrite and module reorganization; bump package version to 1.0.0
|
||||
- Replace many legacy provider implementations with @ai-sdk/* providers and a new Ollama adapter (LanguageModelV3-based)
|
||||
- Add subpath exports for capability packages: ./vision, ./audio, ./image, ./document, ./research
|
||||
- Introduce Anthropic prompt-caching middleware and provider-level promptCaching option
|
||||
- Split functionality into focused ts_* packages (ts_audio, ts_image, ts_document, ts_vision, ts_research) and adapt tests accordingly
|
||||
- Update dependencies and devDependencies to use ai SDK providers and newer package versions
|
||||
|
||||
## 2026-01-20 - 0.13.3 - fix()
|
||||
no changes detected
|
||||
|
||||
|
||||
72
package.json
72
package.json
@@ -1,39 +1,67 @@
|
||||
{
|
||||
"name": "@push.rocks/smartai",
|
||||
"version": "0.13.3",
|
||||
"version": "1.0.0",
|
||||
"private": false,
|
||||
"description": "SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.",
|
||||
"description": "Provider registry and capability utilities for ai-sdk (Vercel AI SDK). Core export returns LanguageModel; subpath exports provide vision, audio, image, document and research capabilities.",
|
||||
"main": "dist_ts/index.js",
|
||||
"typings": "dist_ts/index.d.ts",
|
||||
"type": "module",
|
||||
"exports": {
|
||||
".": {
|
||||
"import": "./dist_ts/index.js",
|
||||
"types": "./dist_ts/index.d.ts"
|
||||
},
|
||||
"./vision": {
|
||||
"import": "./dist_ts_vision/index.js",
|
||||
"types": "./dist_ts_vision/index.d.ts"
|
||||
},
|
||||
"./audio": {
|
||||
"import": "./dist_ts_audio/index.js",
|
||||
"types": "./dist_ts_audio/index.d.ts"
|
||||
},
|
||||
"./image": {
|
||||
"import": "./dist_ts_image/index.js",
|
||||
"types": "./dist_ts_image/index.d.ts"
|
||||
},
|
||||
"./document": {
|
||||
"import": "./dist_ts_document/index.js",
|
||||
"types": "./dist_ts_document/index.d.ts"
|
||||
},
|
||||
"./research": {
|
||||
"import": "./dist_ts_research/index.js",
|
||||
"types": "./dist_ts_research/index.d.ts"
|
||||
}
|
||||
},
|
||||
"author": "Task Venture Capital GmbH",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"test": "(tstest test/ --web --verbose)",
|
||||
"test": "(tstest test/ --verbose --logfile)",
|
||||
"typecheck": "tsbuild check",
|
||||
"build": "(tsbuild tsfolders --allowimplicitany)",
|
||||
"buildDocs": "(tsdoc)"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@git.zone/tsbuild": "^4.1.2",
|
||||
"@git.zone/tsbundle": "^2.8.1",
|
||||
"@git.zone/tsbuild": "^4.2.6",
|
||||
"@git.zone/tsbundle": "^2.9.1",
|
||||
"@git.zone/tsrun": "^2.0.1",
|
||||
"@git.zone/tstest": "^3.1.6",
|
||||
"@git.zone/tstest": "^3.2.0",
|
||||
"@push.rocks/qenv": "^6.1.3",
|
||||
"@types/node": "^25.0.9",
|
||||
"@types/node": "^25.3.3",
|
||||
"typescript": "^5.9.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.71.2",
|
||||
"@mistralai/mistralai": "^1.12.0",
|
||||
"@push.rocks/smartarray": "^1.1.0",
|
||||
"@push.rocks/smartfs": "^1.3.1",
|
||||
"@push.rocks/smartpath": "^6.0.0",
|
||||
"@push.rocks/smartpdf": "^4.1.1",
|
||||
"@push.rocks/smartpromise": "^4.2.3",
|
||||
"@push.rocks/smartrequest": "^5.0.1",
|
||||
"@push.rocks/webstream": "^1.0.10",
|
||||
"openai": "^6.16.0"
|
||||
"@ai-sdk/anthropic": "^3.0.58",
|
||||
"@ai-sdk/google": "^3.0.43",
|
||||
"@ai-sdk/groq": "^3.0.29",
|
||||
"@ai-sdk/mistral": "^3.0.24",
|
||||
"@ai-sdk/openai": "^3.0.41",
|
||||
"@ai-sdk/perplexity": "^3.0.23",
|
||||
"@ai-sdk/provider": "^3.0.8",
|
||||
"@ai-sdk/xai": "^3.0.67",
|
||||
"@anthropic-ai/sdk": "^0.78.0",
|
||||
"@push.rocks/smartpdf": "^4.1.3",
|
||||
"ai": "^6.0.116",
|
||||
"openai": "^6.26.0"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@@ -48,13 +76,13 @@
|
||||
],
|
||||
"files": [
|
||||
"ts/**/*",
|
||||
"ts_web/**/*",
|
||||
"dist/**/*",
|
||||
"ts_vision/**/*",
|
||||
"ts_audio/**/*",
|
||||
"ts_image/**/*",
|
||||
"ts_document/**/*",
|
||||
"ts_research/**/*",
|
||||
"dist_*/**/*",
|
||||
"dist_ts/**/*",
|
||||
"dist_ts_web/**/*",
|
||||
"assets/**/*",
|
||||
"cli.js",
|
||||
"npmextra.json",
|
||||
"readme.md"
|
||||
],
|
||||
|
||||
5723
pnpm-lock.yaml
generated
5723
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
130
readme.hints.md
130
readme.hints.md
@@ -1,104 +1,50 @@
|
||||
# SmartAI Project Hints
|
||||
|
||||
## Architecture (v1.0.0 - Vercel AI SDK rewrite)
|
||||
|
||||
The package is a **provider registry** built on the Vercel AI SDK (`ai` v6). The core export returns a `LanguageModelV3` from `@ai-sdk/provider`. Specialized capabilities are in subpath exports.
|
||||
|
||||
### Core Entry (`ts/`)
|
||||
- `getModel(options)` → returns `LanguageModelV3` for any supported provider
|
||||
- Providers: anthropic, openai, google, groq, mistral, xai, perplexity, ollama
|
||||
- Anthropic prompt caching via `wrapLanguageModel` middleware (enabled by default)
|
||||
- Custom Ollama provider implementing `LanguageModelV3` directly (for think, num_ctx support)
|
||||
|
||||
### Subpath Exports
|
||||
- `@push.rocks/smartai/vision` — `analyzeImage()` using `generateText` with image content
|
||||
- `@push.rocks/smartai/audio` — `textToSpeech()` using OpenAI SDK directly
|
||||
- `@push.rocks/smartai/image` — `generateImage()`, `editImage()` using OpenAI SDK directly
|
||||
- `@push.rocks/smartai/document` — `analyzeDocuments()` using SmartPdf + `generateText`
|
||||
- `@push.rocks/smartai/research` — `research()` using `@anthropic-ai/sdk` web_search tool
|
||||
|
||||
## Dependencies
|
||||
|
||||
- Uses `@git.zone/tstest` v3.x for testing (import from `@git.zone/tstest/tapbundle`)
|
||||
- `@push.rocks/smartfs` v1.x for file system operations
|
||||
- `@anthropic-ai/sdk` v0.71.x with extended thinking support
|
||||
- `@mistralai/mistralai` v1.x for Mistral OCR and chat capabilities
|
||||
- `openai` v6.x for OpenAI API integration
|
||||
- `@push.rocks/smartrequest` v5.x - uses `response.stream()` + `Readable.fromWeb()` for streaming
|
||||
- `ai` ^6.0.116 — Vercel AI SDK core
|
||||
- `@ai-sdk/*` — Provider packages (anthropic, openai, google, groq, mistral, xai, perplexity)
|
||||
- `@ai-sdk/provider` ^3.0.8 — LanguageModelV3 types
|
||||
- `@anthropic-ai/sdk` ^0.78.0 — Direct SDK for research (web search tool)
|
||||
- `openai` ^6.25.0 — Direct SDK for audio TTS and image generation/editing
|
||||
- `@push.rocks/smartpdf` ^4.1.3 — PDF to PNG conversion for document analysis
|
||||
|
||||
## Build
|
||||
|
||||
- `pnpm build` → `tsbuild tsfolders --allowimplicitany`
|
||||
- Compiles: ts/, ts_vision/, ts_audio/, ts_image/, ts_document/, ts_research/
|
||||
|
||||
## Important Notes
|
||||
|
||||
- When extended thinking is enabled, temperature parameter must NOT be set (or set to 1)
|
||||
- The `streamNode()` method was removed in smartrequest v5, use `response.stream()` with `Readable.fromWeb()` instead
|
||||
|
||||
## Provider Capabilities Summary
|
||||
|
||||
| Provider | Chat | Stream | TTS | Vision | Documents | Research | Images |
|
||||
|--------------|------|--------|-----|--------|-----------|----------|--------|
|
||||
| OpenAI | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| Anthropic | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ |
|
||||
| Mistral | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ |
|
||||
| ElevenLabs | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| Ollama | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ |
|
||||
| XAI | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| Perplexity | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| Groq | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| Exo | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
||||
## Mistral Provider Integration
|
||||
|
||||
### Overview
|
||||
|
||||
The Mistral provider supports:
|
||||
- **Document AI** via Mistral OCR (December 2025) - native PDF processing without image conversion
|
||||
- **Chat capabilities** using Mistral's chat models (`mistral-large-latest`, etc.)
|
||||
|
||||
### Key Advantage: Native PDF Support
|
||||
|
||||
Unlike other providers that require converting PDFs to images (using SmartPdf), Mistral OCR natively accepts PDF documents as base64-encoded data. This makes document processing potentially faster and more accurate for text extraction.
|
||||
|
||||
### Configuration
|
||||
|
||||
```typescript
|
||||
import * as smartai from '@push.rocks/smartai';
|
||||
|
||||
const provider = new smartai.MistralProvider({
|
||||
mistralToken: 'your-token-here',
|
||||
chatModel: 'mistral-large-latest', // default
|
||||
ocrModel: 'mistral-ocr-latest', // default
|
||||
tableFormat: 'markdown', // 'markdown' or 'html'
|
||||
});
|
||||
|
||||
await provider.start();
|
||||
```
|
||||
|
||||
### API Key
|
||||
|
||||
Tests require `MISTRAL_API_KEY` in `.nogit/env.json`.
|
||||
|
||||
## Anthropic Extended Thinking Feature
|
||||
|
||||
### Configuration
|
||||
|
||||
Extended thinking is configured at the provider level during instantiation:
|
||||
|
||||
```typescript
|
||||
import * as smartai from '@push.rocks/smartai';
|
||||
|
||||
const provider = new smartai.AnthropicProvider({
|
||||
anthropicToken: 'your-token-here',
|
||||
extendedThinking: 'normal', // Options: 'quick' | 'normal' | 'deep' | 'off'
|
||||
});
|
||||
```
|
||||
|
||||
### Thinking Modes
|
||||
|
||||
| Mode | Budget Tokens | Use Case |
|
||||
| ---------- | ------------- | ----------------------------------------------- |
|
||||
| `'quick'` | 2,048 | Lightweight reasoning for simple queries |
|
||||
| `'normal'` | 8,000 | **Default** - Balanced reasoning for most tasks |
|
||||
| `'deep'` | 16,000 | Complex reasoning for difficult problems |
|
||||
| `'off'` | 0 | Disable extended thinking |
|
||||
|
||||
### Implementation Details
|
||||
|
||||
- Extended thinking is implemented via `getThinkingConfig()` private method
|
||||
- When thinking is enabled, temperature must NOT be set
|
||||
- Uses `claude-sonnet-4-5-20250929` model
|
||||
- LanguageModelV3 uses `unified`/`raw` in FinishReason (not `type`/`rawType`)
|
||||
- LanguageModelV3 system messages have `content: string` (not array)
|
||||
- LanguageModelV3 file parts use `mediaType` (not `mimeType`)
|
||||
- LanguageModelV3FunctionTool uses `inputSchema` (not `parameters`)
|
||||
- Ollama `think` param goes at request body top level, not inside `options`
|
||||
- Qwen models get default temperature 0.55 in the custom Ollama provider
|
||||
- `qenv.getEnvVarOnDemand()` returns a Promise — must be awaited in tests
|
||||
|
||||
## Testing
|
||||
|
||||
Run tests with:
|
||||
|
||||
```bash
|
||||
pnpm test
|
||||
```
|
||||
|
||||
Run specific tests:
|
||||
|
||||
```bash
|
||||
npx tstest test/test.something.ts --verbose
|
||||
pnpm test # all tests
|
||||
tstest test/test.smartai.ts --verbose # core tests
|
||||
tstest test/test.ollama.ts --verbose # ollama provider tests (mocked, no API needed)
|
||||
```
|
||||
|
||||
861
readme.md
861
readme.md
@@ -1,12 +1,12 @@
|
||||
# @push.rocks/smartai
|
||||
|
||||
**One API to rule them all** 🚀
|
||||
**A unified provider registry for the Vercel AI SDK** 🧠⚡
|
||||
|
||||
[](https://www.npmjs.com/package/@push.rocks/smartai)
|
||||
[](https://www.typescriptlang.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
|
||||
SmartAI unifies the world's leading AI providers — OpenAI, Anthropic, Mistral, Perplexity, Ollama, Groq, XAI, Exo, and ElevenLabs — under a single, elegant TypeScript interface. Build AI applications at lightning speed without vendor lock-in.
|
||||
SmartAI gives you a single `getModel()` function that returns a standard `LanguageModelV3` for **any** supported provider — Anthropic, OpenAI, Google, Groq, Mistral, XAI, Perplexity, or Ollama. Use the returned model with the Vercel AI SDK's `generateText()`, `streamText()`, and tool ecosystem. Specialized capabilities like vision, audio, image generation, document analysis, and web research are available as dedicated subpath imports.
|
||||
|
||||
## Issue Reporting and Security
|
||||
|
||||
@@ -14,679 +14,416 @@ For reporting bugs, issues, or security vulnerabilities, please visit [community
|
||||
|
||||
## 🎯 Why SmartAI?
|
||||
|
||||
- **🔌 Universal Interface**: Write once, run with any AI provider. Switch between GPT-5, Claude, Llama, or Grok with a single line change.
|
||||
- **🛡️ Type-Safe**: Full TypeScript support with comprehensive type definitions for all operations.
|
||||
- **🌊 Streaming First**: Built for real-time applications with native streaming support.
|
||||
- **🎨 Multi-Modal**: Seamlessly work with text, images, audio, and documents.
|
||||
- **🏠 Local & Cloud**: Support for both cloud providers and local models via Ollama/Exo.
|
||||
- **⚡ Zero Lock-In**: Your code remains portable across all AI providers.
|
||||
- **🔌 One function, eight providers** — `getModel()` returns a standard `LanguageModelV3`. Switch providers by changing a string.
|
||||
- **🧱 Built on Vercel AI SDK** — Uses `ai` v6 under the hood. Your model works with `generateText()`, `streamText()`, tool calling, structured output, and everything else in the AI SDK ecosystem.
|
||||
- **🏠 Custom Ollama provider** — A full `LanguageModelV3` implementation for Ollama with support for `think` mode, `num_ctx`, auto-tuned temperature for Qwen models, and native tool calling.
|
||||
- **💰 Anthropic prompt caching** — Automatic `cacheControl` middleware reduces cost and latency on repeated calls. Enabled by default, opt out with `promptCaching: false`.
|
||||
- **📦 Modular subpath exports** — Vision, audio, image, document, and research capabilities ship as separate imports. Only import what you need.
|
||||
- **⚡ Zero lock-in** — Your code uses standard AI SDK types. Swap providers without touching application logic.
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
```bash
|
||||
npm install @push.rocks/smartai
|
||||
# or
|
||||
pnpm install @push.rocks/smartai
|
||||
```
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
```typescript
|
||||
import { SmartAi } from '@push.rocks/smartai';
|
||||
import { getModel, generateText, streamText } from '@push.rocks/smartai';
|
||||
|
||||
// Initialize with your favorite providers
|
||||
const ai = new SmartAi({
|
||||
openaiToken: 'sk-...',
|
||||
anthropicToken: 'sk-ant-...',
|
||||
elevenlabsToken: 'sk-...',
|
||||
elevenlabs: {
|
||||
defaultVoiceId: '19STyYD15bswVz51nqLf', // Optional: Samara voice
|
||||
},
|
||||
// Get a model for any provider
|
||||
const model = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey: process.env.ANTHROPIC_TOKEN,
|
||||
});
|
||||
|
||||
await ai.start();
|
||||
|
||||
// Same API, multiple providers
|
||||
const response = await ai.openaiProvider.chat({
|
||||
systemMessage: 'You are a helpful assistant.',
|
||||
userMessage: 'Explain quantum computing in simple terms',
|
||||
messageHistory: [],
|
||||
// Use it with the standard AI SDK functions
|
||||
const result = await generateText({
|
||||
model,
|
||||
prompt: 'Explain quantum computing in simple terms.',
|
||||
});
|
||||
|
||||
console.log(response.message);
|
||||
console.log(result.text);
|
||||
```
|
||||
|
||||
## 📊 Provider Capabilities Matrix
|
||||
That's it. Change `provider` to `'openai'` and `model` to `'gpt-4o'` and the rest of your code stays exactly the same.
|
||||
|
||||
Choose the right provider for your use case:
|
||||
## 🔧 Core API
|
||||
|
||||
| Provider | Chat | Streaming | TTS | Vision | Documents | Research | Images | Highlights |
|
||||
| -------------- | :--: | :-------: | :-: | :----: | :-------: | :------: | :----: | --------------------------------------------------------------- |
|
||||
| **OpenAI** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | gpt-image-1 • DALL-E 3 • Deep Research API |
|
||||
| **Anthropic** | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | Claude Sonnet 4.5 • Extended Thinking • Web Search API |
|
||||
| **Mistral** | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | Native PDF OCR • mistral-large • Fast inference |
|
||||
| **ElevenLabs** | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | Premium TTS • 70+ languages • v3 model |
|
||||
| **Ollama** | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | 100% local • Privacy-first • No API costs |
|
||||
| **XAI** | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | Grok 2 • Real-time data |
|
||||
| **Perplexity** | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | Web-aware • Research-focused • Sonar Pro |
|
||||
| **Groq** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | 10x faster • LPU inference • Llama 3.3 |
|
||||
| **Exo** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | Distributed • P2P compute • Decentralized |
|
||||
### `getModel(options): LanguageModelV3`
|
||||
|
||||
## 🎮 Core Features
|
||||
|
||||
### 💬 Universal Chat Interface
|
||||
|
||||
Works identically across all providers:
|
||||
The primary export. Returns a standard `LanguageModelV3` you can use with any AI SDK function.
|
||||
|
||||
```typescript
|
||||
// Use GPT-5 for complex reasoning
|
||||
const gptResponse = await ai.openaiProvider.chat({
|
||||
systemMessage: 'You are an expert physicist.',
|
||||
userMessage: 'Explain the implications of quantum entanglement',
|
||||
messageHistory: [],
|
||||
});
|
||||
import { getModel } from '@push.rocks/smartai';
|
||||
import type { ISmartAiOptions } from '@push.rocks/smartai';
|
||||
|
||||
// Use Claude for safety-critical applications
|
||||
const claudeResponse = await ai.anthropicProvider.chat({
|
||||
systemMessage: 'You are a medical advisor.',
|
||||
userMessage: 'Review this patient data for concerns',
|
||||
messageHistory: [],
|
||||
});
|
||||
const options: ISmartAiOptions = {
|
||||
provider: 'anthropic', // 'anthropic' | 'openai' | 'google' | 'groq' | 'mistral' | 'xai' | 'perplexity' | 'ollama'
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey: 'sk-ant-...',
|
||||
// Anthropic-only: prompt caching (default: true)
|
||||
promptCaching: true,
|
||||
// Ollama-only: base URL (default: http://localhost:11434)
|
||||
baseUrl: 'http://localhost:11434',
|
||||
// Ollama-only: model runtime options
|
||||
ollamaOptions: { think: true, num_ctx: 4096 },
|
||||
};
|
||||
|
||||
// Use Groq for lightning-fast responses
|
||||
const groqResponse = await ai.groqProvider.chat({
|
||||
systemMessage: 'You are a code reviewer.',
|
||||
userMessage: 'Quick! Find the bug in this code: ...',
|
||||
messageHistory: [],
|
||||
});
|
||||
const model = getModel(options);
|
||||
```
|
||||
|
||||
### 🌊 Real-Time Streaming
|
||||
### Re-exported AI SDK Functions
|
||||
|
||||
Build responsive chat interfaces with token-by-token streaming:
|
||||
SmartAI re-exports the most commonly used functions from `ai` for convenience:
|
||||
|
||||
```typescript
|
||||
// Create a chat stream
|
||||
const stream = await ai.openaiProvider.chatStream(inputStream);
|
||||
const reader = stream.getReader();
|
||||
import {
|
||||
getModel,
|
||||
generateText,
|
||||
streamText,
|
||||
tool,
|
||||
jsonSchema,
|
||||
} from '@push.rocks/smartai';
|
||||
|
||||
// Display responses as they arrive
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
import type {
|
||||
ModelMessage,
|
||||
ToolSet,
|
||||
StreamTextResult,
|
||||
LanguageModelV3,
|
||||
} from '@push.rocks/smartai';
|
||||
```
|
||||
|
||||
// Update UI in real-time
|
||||
process.stdout.write(value);
|
||||
## 🤖 Supported Providers
|
||||
|
||||
| Provider | Package | Example Models |
|
||||
|----------|---------|----------------|
|
||||
| **Anthropic** | `@ai-sdk/anthropic` | `claude-sonnet-4-5-20250929`, `claude-opus-4-5-20250929` |
|
||||
| **OpenAI** | `@ai-sdk/openai` | `gpt-4o`, `gpt-4o-mini`, `o3-mini` |
|
||||
| **Google** | `@ai-sdk/google` | `gemini-2.0-flash`, `gemini-2.5-pro` |
|
||||
| **Groq** | `@ai-sdk/groq` | `llama-3.3-70b-versatile`, `mixtral-8x7b-32768` |
|
||||
| **Mistral** | `@ai-sdk/mistral` | `mistral-large-latest`, `mistral-small-latest` |
|
||||
| **XAI** | `@ai-sdk/xai` | `grok-3`, `grok-3-mini` |
|
||||
| **Perplexity** | `@ai-sdk/perplexity` | `sonar-pro`, `sonar` |
|
||||
| **Ollama** | Custom `LanguageModelV3` | `qwen3:8b`, `llama3:8b`, `deepseek-r1` |
|
||||
|
||||
## 💬 Text Generation
|
||||
|
||||
### Generate Text
|
||||
|
||||
```typescript
|
||||
import { getModel, generateText } from '@push.rocks/smartai';
|
||||
|
||||
const model = getModel({
|
||||
provider: 'openai',
|
||||
model: 'gpt-4o',
|
||||
apiKey: process.env.OPENAI_TOKEN,
|
||||
});
|
||||
|
||||
const result = await generateText({
|
||||
model,
|
||||
system: 'You are a helpful assistant.',
|
||||
prompt: 'What is 2 + 2?',
|
||||
});
|
||||
|
||||
console.log(result.text); // "4"
|
||||
```
|
||||
|
||||
### Stream Text
|
||||
|
||||
```typescript
|
||||
import { getModel, streamText } from '@push.rocks/smartai';
|
||||
|
||||
const model = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey: process.env.ANTHROPIC_TOKEN,
|
||||
});
|
||||
|
||||
const result = await streamText({
|
||||
model,
|
||||
prompt: 'Count from 1 to 10.',
|
||||
});
|
||||
|
||||
for await (const chunk of result.textStream) {
|
||||
process.stdout.write(chunk);
|
||||
}
|
||||
```
|
||||
|
||||
### 🎙️ Text-to-Speech
|
||||
|
||||
Generate natural voices with OpenAI or ElevenLabs:
|
||||
### Tool Calling
|
||||
|
||||
```typescript
|
||||
// OpenAI TTS
|
||||
const audioStream = await ai.openaiProvider.audio({
|
||||
message: 'Welcome to the future of AI development!',
|
||||
import { getModel, generateText, tool, jsonSchema } from '@push.rocks/smartai';
|
||||
|
||||
const model = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey: process.env.ANTHROPIC_TOKEN,
|
||||
});
|
||||
|
||||
// ElevenLabs TTS - Premium quality, natural voices (uses v3 by default)
|
||||
const elevenLabsAudio = await ai.elevenlabsProvider.audio({
|
||||
message: 'Experience the most lifelike text to speech technology.',
|
||||
voiceId: '19STyYD15bswVz51nqLf', // Optional: Samara voice
|
||||
modelId: 'eleven_v3', // Optional: defaults to eleven_v3 (70+ languages)
|
||||
voiceSettings: {
|
||||
// Optional: fine-tune voice characteristics
|
||||
stability: 0.5, // 0-1: Speech consistency
|
||||
similarity_boost: 0.8, // 0-1: Voice similarity to original
|
||||
style: 0.0, // 0-1: Expressiveness
|
||||
use_speaker_boost: true, // Enhanced clarity
|
||||
const result = await generateText({
|
||||
model,
|
||||
prompt: 'What is the weather in London?',
|
||||
tools: {
|
||||
getWeather: tool({
|
||||
description: 'Get weather for a location',
|
||||
parameters: jsonSchema({
|
||||
type: 'object',
|
||||
properties: {
|
||||
location: { type: 'string' },
|
||||
},
|
||||
required: ['location'],
|
||||
}),
|
||||
execute: async ({ location }) => {
|
||||
return { temperature: 18, condition: 'cloudy' };
|
||||
},
|
||||
}),
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
## 🏠 Ollama (Local Models)
|
||||
|
||||
The custom Ollama provider implements `LanguageModelV3` directly, calling Ollama's native `/api/chat` endpoint. This gives you features that generic OpenAI-compatible wrappers miss:
|
||||
|
||||
```typescript
|
||||
import { getModel, generateText } from '@push.rocks/smartai';
|
||||
|
||||
const model = getModel({
|
||||
provider: 'ollama',
|
||||
model: 'qwen3:8b',
|
||||
baseUrl: 'http://localhost:11434', // default
|
||||
ollamaOptions: {
|
||||
think: true, // Enable thinking/reasoning mode
|
||||
num_ctx: 8192, // Context window size
|
||||
temperature: 0.7, // Override default (Qwen models auto-default to 0.55)
|
||||
},
|
||||
});
|
||||
|
||||
// Stream directly to speakers or save to file
|
||||
audioStream.pipe(fs.createWriteStream('welcome.mp3'));
|
||||
const result = await generateText({
|
||||
model,
|
||||
prompt: 'Solve this step by step: what is 15% of 340?',
|
||||
});
|
||||
|
||||
console.log(result.text);
|
||||
```
|
||||
|
||||
### 👁️ Vision Analysis
|
||||
### Ollama Features
|
||||
|
||||
Understand images with multiple providers:
|
||||
- **`think` mode** — Enables reasoning for models that support it (Qwen3, QwQ, DeepSeek-R1). The `think` parameter is sent at the top level of the request body as required by the Ollama API.
|
||||
- **Auto-tuned temperature** — Qwen models automatically get `temperature: 0.55` when no explicit temperature is set, matching the recommended inference setting.
|
||||
- **Native tool calling** — Full tool call support via Ollama's native format (not shimmed through OpenAI-compatible endpoints).
|
||||
- **Streaming with reasoning** — `doStream()` emits proper `reasoning-start`, `reasoning-delta`, `reasoning-end` parts alongside text.
|
||||
- **All Ollama options** — `num_ctx`, `top_k`, `top_p`, `repeat_penalty`, `num_predict`, `stop`, `seed`.
|
||||
|
||||
## 💰 Anthropic Prompt Caching
|
||||
|
||||
When using the Anthropic provider, SmartAI automatically wraps the model with caching middleware that adds `cacheControl: { type: 'ephemeral' }` to the last system message and last user message. This can significantly reduce cost and latency for repeated calls with the same system prompt.
|
||||
|
||||
```typescript
|
||||
const image = fs.readFileSync('product-photo.jpg');
|
||||
|
||||
// OpenAI: General purpose vision
|
||||
const gptVision = await ai.openaiProvider.vision({
|
||||
image,
|
||||
prompt: 'Describe this product and suggest marketing angles',
|
||||
// Caching enabled by default
|
||||
const model = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey: process.env.ANTHROPIC_TOKEN,
|
||||
});
|
||||
|
||||
// Anthropic: Detailed analysis with extended thinking
|
||||
const claudeVision = await ai.anthropicProvider.vision({
|
||||
image,
|
||||
prompt: 'Identify any safety concerns or defects',
|
||||
});
|
||||
|
||||
// Ollama: Private, local analysis
|
||||
const ollamaVision = await ai.ollamaProvider.vision({
|
||||
image,
|
||||
prompt: 'Extract all text and categorize the content',
|
||||
// Opt out of caching
|
||||
const modelNoCaching = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey: process.env.ANTHROPIC_TOKEN,
|
||||
promptCaching: false,
|
||||
});
|
||||
```
|
||||
|
||||
### 📄 Document Intelligence
|
||||
|
||||
Extract insights from PDFs with AI:
|
||||
You can also use the middleware directly:
|
||||
|
||||
```typescript
|
||||
const contract = fs.readFileSync('contract.pdf');
|
||||
const invoice = fs.readFileSync('invoice.pdf');
|
||||
import { createAnthropicCachingMiddleware } from '@push.rocks/smartai';
|
||||
import { wrapLanguageModel } from 'ai';
|
||||
|
||||
// Analyze documents with OpenAI
|
||||
const analysis = await ai.openaiProvider.document({
|
||||
systemMessage: 'You are a legal expert.',
|
||||
userMessage: 'Compare these documents and highlight key differences',
|
||||
messageHistory: [],
|
||||
pdfDocuments: [contract, invoice],
|
||||
});
|
||||
|
||||
// Multi-document analysis with Anthropic
|
||||
const taxDocs = [form1099, w2, receipts];
|
||||
const taxAnalysis = await ai.anthropicProvider.document({
|
||||
systemMessage: 'You are a tax advisor.',
|
||||
userMessage: 'Prepare a tax summary from these documents',
|
||||
messageHistory: [],
|
||||
pdfDocuments: taxDocs,
|
||||
});
|
||||
const middleware = createAnthropicCachingMiddleware();
|
||||
const cachedModel = wrapLanguageModel({ model: baseModel, middleware });
|
||||
```
|
||||
|
||||
### 🔬 Research & Web Search
|
||||
## 📦 Subpath Exports
|
||||
|
||||
Perform deep research with web search capabilities across multiple providers:
|
||||
SmartAI provides specialized capabilities as separate subpath imports. Each one is a focused utility that takes a model (or API key) and does one thing well.
|
||||
|
||||
### 👁️ Vision — `@push.rocks/smartai/vision`
|
||||
|
||||
Analyze images using any vision-capable model.
|
||||
|
||||
```typescript
|
||||
// OpenAI Deep Research - Comprehensive analysis
|
||||
const deepResearch = await ai.openaiProvider.research({
|
||||
query: 'What are the latest developments in quantum computing?',
|
||||
searchDepth: 'deep',
|
||||
includeWebSearch: true,
|
||||
import { analyzeImage } from '@push.rocks/smartai/vision';
|
||||
import { getModel } from '@push.rocks/smartai';
|
||||
import * as fs from 'fs';
|
||||
|
||||
const model = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey: process.env.ANTHROPIC_TOKEN,
|
||||
});
|
||||
|
||||
console.log(deepResearch.answer);
|
||||
console.log('Sources:', deepResearch.sources);
|
||||
|
||||
// Anthropic Web Search - Domain-filtered research
|
||||
import { AnthropicProvider } from '@push.rocks/smartai';
|
||||
|
||||
const anthropic = new AnthropicProvider({
|
||||
anthropicToken: 'sk-ant-...',
|
||||
enableWebSearch: true,
|
||||
searchDomainAllowList: ['nature.com', 'science.org'],
|
||||
const description = await analyzeImage({
|
||||
model,
|
||||
image: fs.readFileSync('photo.jpg'),
|
||||
prompt: 'Describe this image in detail.',
|
||||
mediaType: 'image/jpeg', // optional, defaults to 'image/jpeg'
|
||||
});
|
||||
|
||||
const scientificResearch = await anthropic.research({
|
||||
query: 'Latest breakthroughs in CRISPR gene editing',
|
||||
searchDepth: 'advanced',
|
||||
});
|
||||
|
||||
// Perplexity - Research-focused with citations
|
||||
const perplexityResearch = await ai.perplexityProvider.research({
|
||||
query: 'Current state of autonomous vehicle technology',
|
||||
searchDepth: 'deep', // Uses Sonar Pro model
|
||||
});
|
||||
console.log(description);
|
||||
```
|
||||
|
||||
**Research Options:**
|
||||
**`analyzeImage(options)`** accepts:
|
||||
- `model` — Any `LanguageModelV3` with vision support
|
||||
- `image` — `Buffer` or `Uint8Array`
|
||||
- `prompt` — What to ask about the image
|
||||
- `mediaType` — `'image/jpeg'` | `'image/png'` | `'image/webp'` | `'image/gif'`
|
||||
|
||||
- `searchDepth`: `'basic'` | `'advanced'` | `'deep'`
|
||||
- `maxSources`: Number of sources to include
|
||||
- `includeWebSearch`: Enable web search (OpenAI)
|
||||
- `background`: Run as background task (OpenAI)
|
||||
### 🎙️ Audio — `@push.rocks/smartai/audio`
|
||||
|
||||
**Supported Providers:**
|
||||
|
||||
- **OpenAI**: Deep Research API with specialized models (`o3-deep-research-*`, `o4-mini-deep-research-*`)
|
||||
- **Anthropic**: Web Search API with domain filtering
|
||||
- **Perplexity**: Sonar and Sonar Pro models with built-in citations
|
||||
|
||||
### 🧠 Extended Thinking (Anthropic)
|
||||
|
||||
Enable Claude to spend more time reasoning about complex problems before generating responses:
|
||||
Text-to-speech using OpenAI's TTS models.
|
||||
|
||||
```typescript
|
||||
import { AnthropicProvider } from '@push.rocks/smartai';
|
||||
import { textToSpeech } from '@push.rocks/smartai/audio';
|
||||
import * as fs from 'fs';
|
||||
|
||||
// Configure extended thinking mode at provider level
|
||||
const anthropic = new AnthropicProvider({
|
||||
anthropicToken: 'sk-ant-...',
|
||||
extendedThinking: 'normal', // Options: 'quick' | 'normal' | 'deep' | 'off'
|
||||
const stream = await textToSpeech({
|
||||
apiKey: process.env.OPENAI_TOKEN,
|
||||
text: 'Welcome to the future of AI development!',
|
||||
voice: 'nova', // 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer'
|
||||
model: 'tts-1-hd', // 'tts-1' | 'tts-1-hd'
|
||||
responseFormat: 'mp3', // 'mp3' | 'opus' | 'aac' | 'flac'
|
||||
speed: 1.0, // 0.25 to 4.0
|
||||
});
|
||||
|
||||
await anthropic.start();
|
||||
|
||||
// Extended thinking is automatically applied to all methods
|
||||
const response = await anthropic.chat({
|
||||
systemMessage: 'You are an expert mathematician.',
|
||||
userMessage: 'Prove the Pythagorean theorem from first principles',
|
||||
messageHistory: [],
|
||||
});
|
||||
stream.pipe(fs.createWriteStream('welcome.mp3'));
|
||||
```
|
||||
|
||||
**Thinking Modes:**
|
||||
### 🎨 Image — `@push.rocks/smartai/image`
|
||||
|
||||
| Mode | Budget Tokens | Use Case |
|
||||
| ---------- | ------------- | ------------------------------------------------ |
|
||||
| `'quick'` | 2,048 | Lightweight reasoning for simple queries |
|
||||
| `'normal'` | 8,000 | **Default** — Balanced reasoning for most tasks |
|
||||
| `'deep'` | 16,000 | Complex reasoning for difficult problems |
|
||||
| `'off'` | 0 | Disable extended thinking |
|
||||
|
||||
**Best Practices:**
|
||||
|
||||
- Start with `'normal'` (default) for general usage
|
||||
- Use `'deep'` for complex analytical tasks, philosophy, mathematics, or research
|
||||
- Use `'quick'` for simple factual queries where deep reasoning isn't needed
|
||||
- Thinking budget counts against total token usage
|
||||
|
||||
### 📑 Native PDF OCR (Mistral)
|
||||
|
||||
Mistral provides native PDF document processing via their OCR API — no image conversion required:
|
||||
Generate and edit images using OpenAI's image models.
|
||||
|
||||
```typescript
|
||||
import { MistralProvider } from '@push.rocks/smartai';
|
||||
import { generateImage, editImage } from '@push.rocks/smartai/image';
|
||||
|
||||
const mistral = new MistralProvider({
|
||||
mistralToken: 'your-api-key',
|
||||
chatModel: 'mistral-large-latest', // Default
|
||||
ocrModel: 'mistral-ocr-latest', // Default
|
||||
tableFormat: 'markdown', // 'markdown' | 'html'
|
||||
});
|
||||
|
||||
await mistral.start();
|
||||
|
||||
// Direct PDF processing - no image conversion overhead
|
||||
const result = await mistral.document({
|
||||
systemMessage: 'You are a document analyst.',
|
||||
userMessage: 'Extract all invoice details and calculate the total.',
|
||||
pdfDocuments: [invoicePdfBuffer],
|
||||
messageHistory: [],
|
||||
});
|
||||
```
|
||||
|
||||
**Key Advantage**: Unlike other providers that convert PDFs to images first, Mistral's OCR API processes PDFs natively, potentially offering faster and more accurate text extraction for document-heavy workloads.
|
||||
|
||||
**Supported Formats:**
|
||||
|
||||
- Native PDF processing via Files API
|
||||
- Image OCR (JPEG, PNG, GIF, WebP) for vision tasks
|
||||
- Table extraction with markdown or HTML output
|
||||
|
||||
### 🎨 Image Generation & Editing
|
||||
|
||||
Generate and edit images with OpenAI's cutting-edge models:
|
||||
|
||||
```typescript
|
||||
// Basic image generation with gpt-image-1
|
||||
const image = await ai.openaiProvider.imageGenerate({
|
||||
prompt: 'A futuristic robot assistant in a modern office, digital art',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'high',
|
||||
// Generate an image
|
||||
const result = await generateImage({
|
||||
apiKey: process.env.OPENAI_TOKEN,
|
||||
prompt: 'A futuristic cityscape at sunset, digital art',
|
||||
model: 'gpt-image-1', // 'gpt-image-1' | 'dall-e-3' | 'dall-e-2'
|
||||
quality: 'high', // 'low' | 'medium' | 'high' | 'auto'
|
||||
size: '1024x1024',
|
||||
background: 'transparent', // gpt-image-1 only
|
||||
outputFormat: 'png', // 'png' | 'jpeg' | 'webp'
|
||||
n: 1,
|
||||
});
|
||||
|
||||
// Save the generated image
|
||||
const imageBuffer = Buffer.from(image.images[0].b64_json!, 'base64');
|
||||
fs.writeFileSync('robot.png', imageBuffer);
|
||||
|
||||
// Advanced: Transparent background with custom format
|
||||
const logo = await ai.openaiProvider.imageGenerate({
|
||||
prompt: 'Minimalist mountain peak logo, geometric design',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'high',
|
||||
size: '1024x1024',
|
||||
background: 'transparent',
|
||||
outputFormat: 'png',
|
||||
});
|
||||
|
||||
// WebP with compression for web use
|
||||
const webImage = await ai.openaiProvider.imageGenerate({
|
||||
prompt: 'Product showcase: sleek smartphone on marble surface',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'high',
|
||||
size: '1536x1024',
|
||||
outputFormat: 'webp',
|
||||
outputCompression: 85,
|
||||
});
|
||||
|
||||
// Superior text rendering (gpt-image-1's strength)
|
||||
const signage = await ai.openaiProvider.imageGenerate({
|
||||
prompt:
|
||||
'Vintage cafe sign saying "COFFEE & CODE" in hand-lettered typography',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'high',
|
||||
size: '1024x1024',
|
||||
});
|
||||
|
||||
// Generate multiple variations at once
|
||||
const variations = await ai.openaiProvider.imageGenerate({
|
||||
prompt: 'Abstract geometric pattern, colorful minimalist art',
|
||||
model: 'gpt-image-1',
|
||||
n: 3,
|
||||
quality: 'medium',
|
||||
size: '1024x1024',
|
||||
});
|
||||
// result.images[0].b64_json — base64-encoded image data
|
||||
const imageBuffer = Buffer.from(result.images[0].b64_json!, 'base64');
|
||||
|
||||
// Edit an existing image
|
||||
const editedImage = await ai.openaiProvider.imageEdit({
|
||||
image: originalImageBuffer,
|
||||
prompt: 'Add sunglasses and change the background to a beach sunset',
|
||||
const edited = await editImage({
|
||||
apiKey: process.env.OPENAI_TOKEN,
|
||||
image: imageBuffer,
|
||||
prompt: 'Add a rainbow in the sky',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'high',
|
||||
});
|
||||
```
|
||||
|
||||
**Image Generation Options:**
|
||||
### 📄 Document — `@push.rocks/smartai/document`
|
||||
|
||||
- `model`: `'gpt-image-1'` | `'dall-e-3'` | `'dall-e-2'`
|
||||
- `quality`: `'low'` | `'medium'` | `'high'` | `'auto'`
|
||||
- `size`: Multiple aspect ratios up to 4096×4096
|
||||
- `background`: `'transparent'` | `'opaque'` | `'auto'`
|
||||
- `outputFormat`: `'png'` | `'jpeg'` | `'webp'`
|
||||
- `outputCompression`: 0–100 for webp/jpeg
|
||||
- `moderation`: `'low'` | `'auto'`
|
||||
- `n`: Number of images (1–10)
|
||||
|
||||
**gpt-image-1 Advantages:**
|
||||
|
||||
- Superior text rendering in images
|
||||
- Up to 4096×4096 resolution
|
||||
- Transparent background support
|
||||
- Advanced output formats (WebP with compression)
|
||||
- Better prompt understanding
|
||||
- Streaming support for progressive rendering
|
||||
|
||||
### 🔄 Persistent Conversations
|
||||
|
||||
Maintain context across interactions:
|
||||
Analyze PDF documents by converting them to images and using a vision model. Uses `@push.rocks/smartpdf` for PDF-to-PNG conversion (requires Chromium/Puppeteer).
|
||||
|
||||
```typescript
|
||||
// Create a coding assistant conversation
|
||||
const assistant = ai.createConversation('openai');
|
||||
await assistant.setSystemMessage('You are an expert TypeScript developer.');
|
||||
import { analyzeDocuments, stopSmartpdf } from '@push.rocks/smartai/document';
|
||||
import { getModel } from '@push.rocks/smartai';
|
||||
import * as fs from 'fs';
|
||||
|
||||
// First question
|
||||
const inputWriter = assistant.getInputStreamWriter();
|
||||
await inputWriter.write('How do I implement a singleton pattern?');
|
||||
|
||||
// Continue the conversation
|
||||
await inputWriter.write('Now show me how to make it thread-safe');
|
||||
|
||||
// The assistant remembers the entire context
|
||||
```
|
||||
|
||||
## 🚀 Real-World Examples
|
||||
|
||||
### Build a Customer Support Bot
|
||||
|
||||
```typescript
|
||||
const supportBot = new SmartAi({
|
||||
anthropicToken: process.env.ANTHROPIC_KEY, // Claude for empathetic responses
|
||||
const model = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey: process.env.ANTHROPIC_TOKEN,
|
||||
});
|
||||
|
||||
async function handleCustomerQuery(query: string, history: ChatMessage[]) {
|
||||
try {
|
||||
const response = await supportBot.anthropicProvider.chat({
|
||||
systemMessage: `You are a helpful customer support agent.
|
||||
Be empathetic, professional, and solution-oriented.`,
|
||||
userMessage: query,
|
||||
messageHistory: history,
|
||||
});
|
||||
|
||||
return response.message;
|
||||
} catch (error) {
|
||||
// Fallback to another provider if needed
|
||||
return await supportBot.openaiProvider.chat({ /* ... */ });
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Create a Code Review Assistant
|
||||
|
||||
```typescript
|
||||
const codeReviewer = new SmartAi({
|
||||
groqToken: process.env.GROQ_KEY, // Groq for speed
|
||||
const analysis = await analyzeDocuments({
|
||||
model,
|
||||
systemMessage: 'You are a legal document analyst.',
|
||||
userMessage: 'Summarize the key terms and conditions.',
|
||||
pdfDocuments: [fs.readFileSync('contract.pdf')],
|
||||
messageHistory: [], // optional: prior conversation context
|
||||
});
|
||||
|
||||
async function reviewCode(code: string, language: string) {
|
||||
const review = await codeReviewer.groqProvider.chat({
|
||||
systemMessage: `You are a ${language} expert. Review code for:
|
||||
- Security vulnerabilities
|
||||
- Performance issues
|
||||
- Best practices
|
||||
- Potential bugs`,
|
||||
userMessage: `Review this code:\n\n${code}`,
|
||||
messageHistory: [],
|
||||
});
|
||||
console.log(analysis);
|
||||
|
||||
return review.message;
|
||||
}
|
||||
// Clean up the SmartPdf instance when done
|
||||
await stopSmartpdf();
|
||||
```
|
||||
|
||||
### Build a Research Assistant
|
||||
### 🔬 Research — `@push.rocks/smartai/research`
|
||||
|
||||
Perform web-search-powered research using Anthropic's `web_search_20250305` tool.
|
||||
|
||||
```typescript
|
||||
const researcher = new SmartAi({
|
||||
perplexityToken: process.env.PERPLEXITY_KEY,
|
||||
import { research } from '@push.rocks/smartai/research';
|
||||
|
||||
const result = await research({
|
||||
apiKey: process.env.ANTHROPIC_TOKEN,
|
||||
query: 'What are the latest developments in quantum computing?',
|
||||
searchDepth: 'basic', // 'basic' | 'advanced' | 'deep'
|
||||
maxSources: 10, // optional: limit number of search results
|
||||
allowedDomains: ['nature.com', 'arxiv.org'], // optional: restrict to domains
|
||||
blockedDomains: ['reddit.com'], // optional: exclude domains
|
||||
});
|
||||
|
||||
async function research(topic: string) {
|
||||
// Perplexity excels at web-aware research
|
||||
const findings = await researcher.perplexityProvider.research({
|
||||
query: `Research the latest developments in ${topic}`,
|
||||
searchDepth: 'deep',
|
||||
});
|
||||
|
||||
return {
|
||||
answer: findings.answer,
|
||||
sources: findings.sources,
|
||||
};
|
||||
}
|
||||
console.log(result.answer);
|
||||
console.log('Sources:', result.sources); // Array<{ url, title, snippet }>
|
||||
console.log('Queries:', result.searchQueries); // search queries the model used
|
||||
```
|
||||
|
||||
### Local AI for Sensitive Data
|
||||
## 🧪 Testing
|
||||
|
||||
```typescript
|
||||
const localAI = new SmartAi({
|
||||
ollama: {
|
||||
baseUrl: 'http://localhost:11434',
|
||||
model: 'llama2',
|
||||
visionModel: 'llava',
|
||||
},
|
||||
});
|
||||
```bash
|
||||
# All tests
|
||||
pnpm test
|
||||
|
||||
// Process sensitive documents without leaving your infrastructure
|
||||
async function analyzeSensitiveDoc(pdfBuffer: Buffer) {
|
||||
const analysis = await localAI.ollamaProvider.document({
|
||||
systemMessage: 'Extract and summarize key information.',
|
||||
userMessage: 'Analyze this confidential document',
|
||||
messageHistory: [],
|
||||
pdfDocuments: [pdfBuffer],
|
||||
});
|
||||
|
||||
// Data never leaves your servers
|
||||
return analysis.message;
|
||||
}
|
||||
# Individual test files
|
||||
tstest test/test.smartai.ts --verbose # Core getModel + generateText + streamText
|
||||
tstest test/test.ollama.ts --verbose # Ollama provider (mocked, no API needed)
|
||||
tstest test/test.vision.ts --verbose # Vision analysis
|
||||
tstest test/test.image.ts --verbose # Image generation
|
||||
tstest test/test.research.ts --verbose # Web research
|
||||
tstest test/test.audio.ts --verbose # Text-to-speech
|
||||
tstest test/test.document.ts --verbose # Document analysis (needs Chromium)
|
||||
```
|
||||
|
||||
## ⚡ Performance Tips
|
||||
Most tests skip gracefully when API keys are not set. The Ollama tests are fully mocked and require no external services.
|
||||
|
||||
### 1. Provider Selection Strategy
|
||||
## 📐 Architecture
|
||||
|
||||
```typescript
|
||||
class SmartAIRouter {
|
||||
constructor(private ai: SmartAi) {}
|
||||
|
||||
async query(
|
||||
message: string,
|
||||
requirements: {
|
||||
speed?: boolean;
|
||||
accuracy?: boolean;
|
||||
cost?: boolean;
|
||||
privacy?: boolean;
|
||||
}
|
||||
) {
|
||||
if (requirements.privacy) {
|
||||
return this.ai.ollamaProvider.chat({ /* ... */ }); // Local only
|
||||
}
|
||||
if (requirements.speed) {
|
||||
return this.ai.groqProvider.chat({ /* ... */ }); // 10x faster
|
||||
}
|
||||
if (requirements.accuracy) {
|
||||
return this.ai.anthropicProvider.chat({ /* ... */ }); // Best reasoning
|
||||
}
|
||||
// Default fallback
|
||||
return this.ai.openaiProvider.chat({ /* ... */ });
|
||||
}
|
||||
}
|
||||
```
|
||||
@push.rocks/smartai
|
||||
├── ts/ # Core package
|
||||
│ ├── index.ts # Re-exports getModel, AI SDK functions, types
|
||||
│ ├── smartai.classes.smartai.ts # getModel() — provider switch
|
||||
│ ├── smartai.interfaces.ts # ISmartAiOptions, TProvider, IOllamaModelOptions
|
||||
│ ├── smartai.provider.ollama.ts # Custom LanguageModelV3 for Ollama
|
||||
│ ├── smartai.middleware.anthropic.ts # Prompt caching middleware
|
||||
│ └── plugins.ts # AI SDK provider factories
|
||||
├── ts_vision/ # @push.rocks/smartai/vision
|
||||
├── ts_audio/ # @push.rocks/smartai/audio
|
||||
├── ts_image/ # @push.rocks/smartai/image
|
||||
├── ts_document/ # @push.rocks/smartai/document
|
||||
└── ts_research/ # @push.rocks/smartai/research
|
||||
```
|
||||
|
||||
### 2. Streaming for Large Responses
|
||||
The core package is a thin registry. `getModel()` creates the appropriate `@ai-sdk/*` provider, calls it with the model ID, and returns the resulting `LanguageModelV3`. For Anthropic, it optionally wraps the model with prompt caching middleware. For Ollama, it returns a custom `LanguageModelV3` implementation that talks directly to Ollama's `/api/chat` endpoint.
|
||||
|
||||
```typescript
|
||||
// Don't wait for the entire response
|
||||
async function streamResponse(userQuery: string) {
|
||||
const stream = await ai.openaiProvider.chatStream(
|
||||
createInputStream(userQuery)
|
||||
);
|
||||
|
||||
// Process tokens as they arrive
|
||||
for await (const chunk of stream) {
|
||||
updateUI(chunk); // Immediate feedback
|
||||
await processChunk(chunk); // Parallel processing
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Parallel Multi-Provider Queries
|
||||
|
||||
```typescript
|
||||
// Get the best answer from multiple AIs
|
||||
async function consensusQuery(question: string) {
|
||||
const providers = [
|
||||
ai.openaiProvider.chat({ /* ... */ }),
|
||||
ai.anthropicProvider.chat({ /* ... */ }),
|
||||
ai.perplexityProvider.chat({ /* ... */ }),
|
||||
];
|
||||
|
||||
const responses = await Promise.all(providers);
|
||||
return synthesizeResponses(responses);
|
||||
}
|
||||
```
|
||||
|
||||
## 🛠️ Advanced Configuration
|
||||
|
||||
### Provider-Specific Options
|
||||
|
||||
```typescript
|
||||
const ai = new SmartAi({
|
||||
// OpenAI
|
||||
openaiToken: 'sk-...',
|
||||
|
||||
// Anthropic with extended thinking
|
||||
anthropicToken: 'sk-ant-...',
|
||||
|
||||
// Perplexity for research
|
||||
perplexityToken: 'pplx-...',
|
||||
|
||||
// Groq for speed
|
||||
groqToken: 'gsk_...',
|
||||
|
||||
// Mistral with OCR settings
|
||||
mistralToken: 'your-key',
|
||||
mistral: {
|
||||
chatModel: 'mistral-large-latest',
|
||||
ocrModel: 'mistral-ocr-latest',
|
||||
tableFormat: 'markdown',
|
||||
},
|
||||
|
||||
// XAI (Grok)
|
||||
xaiToken: 'xai-...',
|
||||
|
||||
// ElevenLabs TTS
|
||||
elevenlabsToken: 'sk-...',
|
||||
elevenlabs: {
|
||||
defaultVoiceId: '19STyYD15bswVz51nqLf',
|
||||
defaultModelId: 'eleven_v3',
|
||||
},
|
||||
|
||||
// Ollama (local)
|
||||
ollama: {
|
||||
baseUrl: 'http://localhost:11434',
|
||||
model: 'llama2',
|
||||
visionModel: 'llava',
|
||||
defaultOptions: {
|
||||
num_ctx: 4096,
|
||||
temperature: 0.7,
|
||||
top_p: 0.9,
|
||||
},
|
||||
defaultTimeout: 120000,
|
||||
},
|
||||
|
||||
// Exo (distributed)
|
||||
exo: {
|
||||
baseUrl: 'http://localhost:8080/v1',
|
||||
apiKey: 'optional-key',
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
### Error Handling & Fallbacks
|
||||
|
||||
```typescript
|
||||
class ResilientAI {
|
||||
private providers = ['openai', 'anthropic', 'groq'];
|
||||
|
||||
async query(opts: ChatOptions): Promise<ChatResponse> {
|
||||
for (const provider of this.providers) {
|
||||
try {
|
||||
return await this.ai[`${provider}Provider`].chat(opts);
|
||||
} catch (error) {
|
||||
console.warn(`${provider} failed, trying next...`);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
throw new Error('All providers failed');
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 🎯 Choosing the Right Provider
|
||||
|
||||
| Use Case | Recommended Provider | Why |
|
||||
| --------------------- | -------------------- | --------------------------------------------------------- |
|
||||
| **General Purpose** | OpenAI | Most features, stable, well-documented |
|
||||
| **Complex Reasoning** | Anthropic | Superior logical thinking, extended thinking, safer |
|
||||
| **Document OCR** | Mistral | Native PDF processing, no image conversion overhead |
|
||||
| **Research & Facts** | Perplexity | Web-aware, provides citations |
|
||||
| **Deep Research** | OpenAI | Deep Research API with comprehensive analysis |
|
||||
| **Premium TTS** | ElevenLabs | Most natural voices, 70+ languages, v3 model |
|
||||
| **Speed Critical** | Groq | 10x faster inference, sub-second responses |
|
||||
| **Privacy Critical** | Ollama | 100% local, no data leaves your servers |
|
||||
| **Real-time Data** | XAI | Grok with access to current information |
|
||||
| **Cost Sensitive** | Ollama/Exo | Free (local) or distributed compute |
|
||||
|
||||
## 📈 Roadmap
|
||||
|
||||
- [x] Research & Web Search API
|
||||
- [x] Image generation support (gpt-image-1, DALL-E 3, DALL-E 2)
|
||||
- [x] Extended thinking (Anthropic)
|
||||
- [x] Native PDF OCR (Mistral)
|
||||
- [ ] Streaming function calls
|
||||
- [ ] Voice input processing
|
||||
- [ ] Fine-tuning integration
|
||||
- [ ] Embedding support
|
||||
- [ ] Agent framework
|
||||
- [ ] More providers (Cohere, AI21, etc.)
|
||||
Subpath modules are independent — they import `ai` and provider SDKs directly, not through the core package. This keeps the dependency graph clean and allows tree-shaking.
|
||||
|
||||
## License and Legal Information
|
||||
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
const smartfs = new SmartFs(new SmartFsProviderNode());
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let testSmartai: smartai.SmartAi;
|
||||
|
||||
tap.test('ElevenLabs Audio: should create a smartai instance with ElevenLabs provider', async () => {
|
||||
testSmartai = new smartai.SmartAi({
|
||||
elevenlabsToken: await testQenv.getEnvVarOnDemand('ELEVENLABS_TOKEN'),
|
||||
elevenlabs: {
|
||||
defaultVoiceId: '19STyYD15bswVz51nqLf',
|
||||
},
|
||||
});
|
||||
await testSmartai.start();
|
||||
});
|
||||
|
||||
tap.test('ElevenLabs Audio: should create audio response', async () => {
|
||||
const audioStream = await testSmartai.elevenlabsProvider.audio({
|
||||
message: 'Welcome to SmartAI, the unified interface for the world\'s leading artificial intelligence providers. SmartAI brings together OpenAI, Anthropic, Perplexity, and ElevenLabs under a single elegant TypeScript API. Whether you need text generation, vision analysis, document processing, or premium text-to-speech capabilities, SmartAI provides a consistent and powerful interface for all your AI needs. Build intelligent applications at lightning speed without vendor lock-in.',
|
||||
});
|
||||
const chunks: Uint8Array[] = [];
|
||||
for await (const chunk of audioStream) {
|
||||
chunks.push(chunk as Uint8Array);
|
||||
}
|
||||
const audioBuffer = Buffer.concat(chunks);
|
||||
await smartfs.file('./.nogit/testoutput_elevenlabs.mp3').write(audioBuffer);
|
||||
console.log(`Audio Buffer length: ${audioBuffer.length}`);
|
||||
expect(audioBuffer.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
tap.test('ElevenLabs Audio: should create audio with custom voice', async () => {
|
||||
const audioStream = await testSmartai.elevenlabsProvider.audio({
|
||||
message: 'Testing with a different voice.',
|
||||
voiceId: 'JBFqnCBsd6RMkjVDRZzb',
|
||||
});
|
||||
const chunks: Uint8Array[] = [];
|
||||
for await (const chunk of audioStream) {
|
||||
chunks.push(chunk as Uint8Array);
|
||||
}
|
||||
const audioBuffer = Buffer.concat(chunks);
|
||||
await smartfs.file('./.nogit/testoutput_elevenlabs_custom.mp3').write(audioBuffer);
|
||||
console.log(`Audio Buffer length (custom voice): ${audioBuffer.length}`);
|
||||
expect(audioBuffer.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
tap.test('ElevenLabs Audio: should stop the smartai instance', async () => {
|
||||
await testSmartai.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,40 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
const smartfs = new SmartFs(new SmartFsProviderNode());
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let testSmartai: smartai.SmartAi;
|
||||
|
||||
tap.test('OpenAI Audio: should create a smartai instance with OpenAI provider', async () => {
|
||||
testSmartai = new smartai.SmartAi({
|
||||
openaiToken: await testQenv.getEnvVarOnDemand('OPENAI_TOKEN'),
|
||||
});
|
||||
await testSmartai.start();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Audio: should create audio response', async () => {
|
||||
// Call the audio method with a sample message.
|
||||
const audioStream = await testSmartai.openaiProvider.audio({
|
||||
message: 'This is a test of audio generation.',
|
||||
});
|
||||
// Read all chunks from the stream.
|
||||
const chunks: Uint8Array[] = [];
|
||||
for await (const chunk of audioStream) {
|
||||
chunks.push(chunk as Uint8Array);
|
||||
}
|
||||
const audioBuffer = Buffer.concat(chunks);
|
||||
await smartfs.file('./.nogit/testoutput.mp3').write(audioBuffer);
|
||||
console.log(`Audio Buffer length: ${audioBuffer.length}`);
|
||||
// Assert that the resulting buffer is not empty.
|
||||
expect(audioBuffer.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
tap.test('OpenAI Audio: should stop the smartai instance', async () => {
|
||||
await testSmartai.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,36 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let anthropicProvider: smartai.AnthropicProvider;
|
||||
|
||||
tap.test('Audio Stubs: should create Anthropic provider', async () => {
|
||||
anthropicProvider = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
});
|
||||
await anthropicProvider.start();
|
||||
});
|
||||
|
||||
tap.test('Audio Stubs: Anthropic audio should throw not supported error', async () => {
|
||||
let errorCaught = false;
|
||||
|
||||
try {
|
||||
await anthropicProvider.audio({
|
||||
message: 'This should fail'
|
||||
});
|
||||
} catch (error) {
|
||||
errorCaught = true;
|
||||
expect(error.message).toInclude('not yet supported');
|
||||
}
|
||||
|
||||
expect(errorCaught).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('Audio Stubs: should stop Anthropic provider', async () => {
|
||||
await anthropicProvider.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
36
test/test.audio.ts
Normal file
36
test/test.audio.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import { textToSpeech } from '../ts_audio/index.js';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
tap.test('textToSpeech should return a readable stream', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('OPENAI_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('OPENAI_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const stream = await textToSpeech({
|
||||
apiKey,
|
||||
text: 'Hello, this is a test of the text to speech system.',
|
||||
voice: 'alloy',
|
||||
model: 'tts-1',
|
||||
});
|
||||
|
||||
expect(stream).toBeTruthy();
|
||||
expect(stream.readable).toBeTrue();
|
||||
|
||||
// Read some bytes to verify it's actual audio data
|
||||
const chunks: Buffer[] = [];
|
||||
for await (const chunk of stream) {
|
||||
chunks.push(Buffer.from(chunk));
|
||||
if (chunks.length > 2) break; // Just read a few chunks to verify
|
||||
}
|
||||
|
||||
const totalBytes = chunks.reduce((sum, c) => sum + c.length, 0);
|
||||
console.log(`Audio stream produced ${totalBytes} bytes in ${chunks.length} chunks`);
|
||||
expect(totalBytes).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,93 +0,0 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
// Basic instantiation tests that don't require API tokens
|
||||
// These tests can run in CI/CD environments without credentials
|
||||
|
||||
tap.test('Basic: should create SmartAi instance', async () => {
|
||||
const testSmartai = new smartai.SmartAi({
|
||||
openaiToken: 'dummy-token-for-testing'
|
||||
});
|
||||
expect(testSmartai).toBeInstanceOf(smartai.SmartAi);
|
||||
// Provider is only created after calling start()
|
||||
expect(testSmartai.options.openaiToken).toEqual('dummy-token-for-testing');
|
||||
});
|
||||
|
||||
tap.test('Basic: should instantiate OpenAI provider', async () => {
|
||||
const openaiProvider = new smartai.OpenAiProvider({
|
||||
openaiToken: 'dummy-token'
|
||||
});
|
||||
expect(openaiProvider).toBeInstanceOf(smartai.OpenAiProvider);
|
||||
expect(typeof openaiProvider.chat).toEqual('function');
|
||||
expect(typeof openaiProvider.audio).toEqual('function');
|
||||
expect(typeof openaiProvider.vision).toEqual('function');
|
||||
expect(typeof openaiProvider.document).toEqual('function');
|
||||
expect(typeof openaiProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('Basic: should instantiate Anthropic provider', async () => {
|
||||
const anthropicProvider = new smartai.AnthropicProvider({
|
||||
anthropicToken: 'dummy-token'
|
||||
});
|
||||
expect(anthropicProvider).toBeInstanceOf(smartai.AnthropicProvider);
|
||||
expect(typeof anthropicProvider.chat).toEqual('function');
|
||||
expect(typeof anthropicProvider.audio).toEqual('function');
|
||||
expect(typeof anthropicProvider.vision).toEqual('function');
|
||||
expect(typeof anthropicProvider.document).toEqual('function');
|
||||
expect(typeof anthropicProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('Basic: should instantiate Perplexity provider', async () => {
|
||||
const perplexityProvider = new smartai.PerplexityProvider({
|
||||
perplexityToken: 'dummy-token'
|
||||
});
|
||||
expect(perplexityProvider).toBeInstanceOf(smartai.PerplexityProvider);
|
||||
expect(typeof perplexityProvider.chat).toEqual('function');
|
||||
expect(typeof perplexityProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('Basic: should instantiate Groq provider', async () => {
|
||||
const groqProvider = new smartai.GroqProvider({
|
||||
groqToken: 'dummy-token'
|
||||
});
|
||||
expect(groqProvider).toBeInstanceOf(smartai.GroqProvider);
|
||||
expect(typeof groqProvider.chat).toEqual('function');
|
||||
expect(typeof groqProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('Basic: should instantiate Ollama provider', async () => {
|
||||
const ollamaProvider = new smartai.OllamaProvider({
|
||||
baseUrl: 'http://localhost:11434'
|
||||
});
|
||||
expect(ollamaProvider).toBeInstanceOf(smartai.OllamaProvider);
|
||||
expect(typeof ollamaProvider.chat).toEqual('function');
|
||||
expect(typeof ollamaProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('Basic: should instantiate xAI provider', async () => {
|
||||
const xaiProvider = new smartai.XAIProvider({
|
||||
xaiToken: 'dummy-token'
|
||||
});
|
||||
expect(xaiProvider).toBeInstanceOf(smartai.XAIProvider);
|
||||
expect(typeof xaiProvider.chat).toEqual('function');
|
||||
expect(typeof xaiProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('Basic: should instantiate Exo provider', async () => {
|
||||
const exoProvider = new smartai.ExoProvider({
|
||||
exoBaseUrl: 'http://localhost:8000'
|
||||
});
|
||||
expect(exoProvider).toBeInstanceOf(smartai.ExoProvider);
|
||||
expect(typeof exoProvider.chat).toEqual('function');
|
||||
expect(typeof exoProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('Basic: all providers should extend MultiModalModel', async () => {
|
||||
const openai = new smartai.OpenAiProvider({ openaiToken: 'test' });
|
||||
const anthropic = new smartai.AnthropicProvider({ anthropicToken: 'test' });
|
||||
|
||||
expect(openai).toBeInstanceOf(smartai.MultiModalModel);
|
||||
expect(anthropic).toBeInstanceOf(smartai.MultiModalModel);
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,72 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let anthropicProvider: smartai.AnthropicProvider;
|
||||
|
||||
tap.test('Anthropic Chat: should create and start Anthropic provider', async () => {
|
||||
anthropicProvider = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
});
|
||||
await anthropicProvider.start();
|
||||
expect(anthropicProvider).toBeInstanceOf(smartai.AnthropicProvider);
|
||||
});
|
||||
|
||||
tap.test('Anthropic Chat: should create chat response', async () => {
|
||||
const userMessage = 'What is the capital of France? Answer in one word.';
|
||||
const response = await anthropicProvider.chat({
|
||||
systemMessage: 'You are a helpful assistant. Be concise.',
|
||||
userMessage: userMessage,
|
||||
messageHistory: [],
|
||||
});
|
||||
console.log(`Anthropic Chat - User: ${userMessage}`);
|
||||
console.log(`Anthropic Chat - Response: ${response.message}`);
|
||||
|
||||
expect(response.role).toEqual('assistant');
|
||||
expect(response.message).toBeTruthy();
|
||||
expect(response.message.toLowerCase()).toInclude('paris');
|
||||
});
|
||||
|
||||
tap.test('Anthropic Chat: should handle message history', async () => {
|
||||
const messageHistory: smartai.ChatMessage[] = [
|
||||
{ role: 'user', content: 'My name is Claude Test' },
|
||||
{ role: 'assistant', content: 'Nice to meet you, Claude Test!' }
|
||||
];
|
||||
|
||||
const response = await anthropicProvider.chat({
|
||||
systemMessage: 'You are a helpful assistant with good memory.',
|
||||
userMessage: 'What is my name?',
|
||||
messageHistory: messageHistory,
|
||||
});
|
||||
|
||||
console.log(`Anthropic Memory Test - Response: ${response.message}`);
|
||||
expect(response.message.toLowerCase()).toInclude('claude test');
|
||||
});
|
||||
|
||||
tap.test('Anthropic Chat: should handle errors gracefully', async () => {
|
||||
// Test with invalid message (empty)
|
||||
let errorCaught = false;
|
||||
|
||||
try {
|
||||
await anthropicProvider.chat({
|
||||
systemMessage: '',
|
||||
userMessage: '',
|
||||
messageHistory: [],
|
||||
});
|
||||
} catch (error) {
|
||||
errorCaught = true;
|
||||
console.log('Expected error caught:', error.message);
|
||||
}
|
||||
|
||||
// Anthropic might handle empty messages, so we don't assert error
|
||||
console.log(`Error handling test - Error caught: ${errorCaught}`);
|
||||
});
|
||||
|
||||
tap.test('Anthropic Chat: should stop the provider', async () => {
|
||||
await anthropicProvider.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,66 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let mistralProvider: smartai.MistralProvider;
|
||||
|
||||
tap.test('Mistral Chat: should create and start Mistral provider', async () => {
|
||||
mistralProvider = new smartai.MistralProvider({
|
||||
mistralToken: await testQenv.getEnvVarOnDemand('MISTRAL_API_KEY'),
|
||||
});
|
||||
await mistralProvider.start();
|
||||
expect(mistralProvider).toBeInstanceOf(smartai.MistralProvider);
|
||||
});
|
||||
|
||||
tap.test('Mistral Chat: should create chat response', async () => {
|
||||
const userMessage = 'What is the capital of France? Answer in one word.';
|
||||
const response = await mistralProvider.chat({
|
||||
systemMessage: 'You are a helpful assistant. Be concise.',
|
||||
userMessage: userMessage,
|
||||
messageHistory: [],
|
||||
});
|
||||
console.log(`Mistral Chat - User: ${userMessage}`);
|
||||
console.log(`Mistral Chat - Response: ${response.message}`);
|
||||
|
||||
expect(response.role).toEqual('assistant');
|
||||
expect(response.message).toBeTruthy();
|
||||
expect(response.message.toLowerCase()).toInclude('paris');
|
||||
});
|
||||
|
||||
tap.test('Mistral Chat: should handle message history', async () => {
|
||||
const messageHistory: smartai.ChatMessage[] = [
|
||||
{ role: 'user', content: 'My name is Claude Test' },
|
||||
{ role: 'assistant', content: 'Nice to meet you, Claude Test!' }
|
||||
];
|
||||
|
||||
const response = await mistralProvider.chat({
|
||||
systemMessage: 'You are a helpful assistant with good memory.',
|
||||
userMessage: 'What is my name?',
|
||||
messageHistory: messageHistory,
|
||||
});
|
||||
|
||||
console.log(`Mistral Memory Test - Response: ${response.message}`);
|
||||
expect(response.message.toLowerCase()).toInclude('claude test');
|
||||
});
|
||||
|
||||
tap.test('Mistral Chat: should handle longer conversations', async () => {
|
||||
const response = await mistralProvider.chat({
|
||||
systemMessage: 'You are a helpful coding assistant.',
|
||||
userMessage: 'Write a simple hello world function in TypeScript. Keep it brief.',
|
||||
messageHistory: [],
|
||||
});
|
||||
|
||||
console.log(`Mistral Coding Test - Response: ${response.message}`);
|
||||
expect(response.message).toBeTruthy();
|
||||
// Should contain some TypeScript/function code
|
||||
expect(response.message).toInclude('function');
|
||||
});
|
||||
|
||||
tap.test('Mistral Chat: should stop the provider', async () => {
|
||||
await mistralProvider.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,34 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let testSmartai: smartai.SmartAi;
|
||||
|
||||
tap.test('OpenAI Chat: should create a smartai instance with OpenAI provider', async () => {
|
||||
testSmartai = new smartai.SmartAi({
|
||||
openaiToken: await testQenv.getEnvVarOnDemand('OPENAI_TOKEN'),
|
||||
});
|
||||
await testSmartai.start();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Chat: should create chat response', async () => {
|
||||
const userMessage = 'How are you?';
|
||||
const response = await testSmartai.openaiProvider.chat({
|
||||
systemMessage: 'Hello',
|
||||
userMessage: userMessage,
|
||||
messageHistory: [],
|
||||
});
|
||||
console.log(`userMessage: ${userMessage}`);
|
||||
console.log(response.message);
|
||||
expect(response.role).toEqual('assistant');
|
||||
expect(response.message).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Chat: should stop the smartai instance', async () => {
|
||||
await testSmartai.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,79 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartrequest from '@push.rocks/smartrequest';
|
||||
import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
const smartfs = new SmartFs(new SmartFsProviderNode());
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let anthropicProvider: smartai.AnthropicProvider;
|
||||
|
||||
tap.test('Anthropic Document: should create and start Anthropic provider', async () => {
|
||||
anthropicProvider = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
});
|
||||
await anthropicProvider.start();
|
||||
expect(anthropicProvider).toBeInstanceOf(smartai.AnthropicProvider);
|
||||
});
|
||||
|
||||
tap.test('Anthropic Document: should document a PDF', async () => {
|
||||
const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf';
|
||||
const pdfResponse = await smartrequest.SmartRequest.create()
|
||||
.url(pdfUrl)
|
||||
.get();
|
||||
|
||||
const result = await anthropicProvider.document({
|
||||
systemMessage: 'Classify the document. Only the following answers are allowed: "invoice", "bank account statement", "contract", "test document", "other". The answer should only contain the keyword for machine use.',
|
||||
userMessage: 'Classify this document.',
|
||||
messageHistory: [],
|
||||
pdfDocuments: [Buffer.from(await pdfResponse.arrayBuffer())],
|
||||
});
|
||||
|
||||
console.log(`Anthropic Document - Result:`, result);
|
||||
expect(result).toBeTruthy();
|
||||
expect(result.message).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('Anthropic Document: should handle complex document analysis', async () => {
|
||||
// Test with the demo PDF if it exists
|
||||
const pdfPath = './.nogit/demo_without_textlayer.pdf';
|
||||
let pdfBuffer: Uint8Array;
|
||||
|
||||
try {
|
||||
pdfBuffer = await smartfs.file(pdfPath).read();
|
||||
} catch (error) {
|
||||
// If the file doesn't exist, use the dummy PDF
|
||||
console.log('Demo PDF not found, using dummy PDF instead');
|
||||
const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf';
|
||||
const pdfResponse = await smartrequest.SmartRequest.create()
|
||||
.url(pdfUrl)
|
||||
.get();
|
||||
pdfBuffer = Buffer.from(await pdfResponse.arrayBuffer());
|
||||
}
|
||||
|
||||
const result = await anthropicProvider.document({
|
||||
systemMessage: `
|
||||
Analyze this document and provide a JSON response with the following structure:
|
||||
{
|
||||
"documentType": "string",
|
||||
"hasText": boolean,
|
||||
"summary": "string"
|
||||
}
|
||||
`,
|
||||
userMessage: 'Analyze this document.',
|
||||
messageHistory: [],
|
||||
pdfDocuments: [pdfBuffer],
|
||||
});
|
||||
|
||||
console.log(`Anthropic Complex Document Analysis:`, result);
|
||||
expect(result).toBeTruthy();
|
||||
expect(result.message).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('Anthropic Document: should stop the provider', async () => {
|
||||
await anthropicProvider.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,100 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartrequest from '@push.rocks/smartrequest';
|
||||
import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
const smartfs = new SmartFs(new SmartFsProviderNode());
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let mistralProvider: smartai.MistralProvider;
|
||||
|
||||
tap.test('Mistral Document: should create and start Mistral provider', async () => {
|
||||
mistralProvider = new smartai.MistralProvider({
|
||||
mistralToken: await testQenv.getEnvVarOnDemand('MISTRAL_API_KEY'),
|
||||
tableFormat: 'markdown',
|
||||
});
|
||||
await mistralProvider.start();
|
||||
expect(mistralProvider).toBeInstanceOf(smartai.MistralProvider);
|
||||
});
|
||||
|
||||
tap.test('Mistral Document: should process a PDF document', async () => {
|
||||
const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf';
|
||||
const pdfResponse = await smartrequest.SmartRequest.create()
|
||||
.url(pdfUrl)
|
||||
.get();
|
||||
|
||||
const result = await mistralProvider.document({
|
||||
systemMessage: 'Classify the document. Only the following answers are allowed: "invoice", "bank account statement", "contract", "test document", "other". The answer should only contain the keyword for machine use.',
|
||||
userMessage: 'Classify this document.',
|
||||
messageHistory: [],
|
||||
pdfDocuments: [Buffer.from(await pdfResponse.arrayBuffer())],
|
||||
});
|
||||
|
||||
console.log(`Mistral Document - Result:`, result);
|
||||
expect(result).toBeTruthy();
|
||||
expect(result.message).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('Mistral Document: should handle complex document analysis', async () => {
|
||||
// Test with the demo PDF if it exists
|
||||
const pdfPath = './.nogit/demo_without_textlayer.pdf';
|
||||
let pdfBuffer: Uint8Array;
|
||||
|
||||
try {
|
||||
pdfBuffer = await smartfs.file(pdfPath).read();
|
||||
} catch (error) {
|
||||
// If the file doesn't exist, use the dummy PDF
|
||||
console.log('Demo PDF not found, using dummy PDF instead');
|
||||
const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf';
|
||||
const pdfResponse = await smartrequest.SmartRequest.create()
|
||||
.url(pdfUrl)
|
||||
.get();
|
||||
pdfBuffer = Buffer.from(await pdfResponse.arrayBuffer());
|
||||
}
|
||||
|
||||
const result = await mistralProvider.document({
|
||||
systemMessage: `
|
||||
Analyze this document and provide a JSON response with the following structure:
|
||||
{
|
||||
"documentType": "string",
|
||||
"hasText": boolean,
|
||||
"summary": "string"
|
||||
}
|
||||
`,
|
||||
userMessage: 'Analyze this document.',
|
||||
messageHistory: [],
|
||||
pdfDocuments: [pdfBuffer],
|
||||
});
|
||||
|
||||
console.log(`Mistral Complex Document Analysis:`, result);
|
||||
expect(result).toBeTruthy();
|
||||
expect(result.message).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('Mistral Document: should process multiple PDF documents', async () => {
|
||||
const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf';
|
||||
const pdfResponse = await smartrequest.SmartRequest.create()
|
||||
.url(pdfUrl)
|
||||
.get();
|
||||
|
||||
const pdfBuffer = Buffer.from(await pdfResponse.arrayBuffer());
|
||||
|
||||
const result = await mistralProvider.document({
|
||||
systemMessage: 'You are a document comparison assistant.',
|
||||
userMessage: 'Are these two documents the same? Answer yes or no.',
|
||||
messageHistory: [],
|
||||
pdfDocuments: [pdfBuffer, pdfBuffer], // Same document twice for test
|
||||
});
|
||||
|
||||
console.log(`Mistral Multi-Document - Result:`, result);
|
||||
expect(result).toBeTruthy();
|
||||
expect(result.message).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('Mistral Document: should stop the provider', async () => {
|
||||
await mistralProvider.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,77 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartrequest from '@push.rocks/smartrequest';
|
||||
import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
const smartfs = new SmartFs(new SmartFsProviderNode());
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let testSmartai: smartai.SmartAi;
|
||||
|
||||
tap.test('OpenAI Document: should create a smartai instance with OpenAI provider', async () => {
|
||||
testSmartai = new smartai.SmartAi({
|
||||
openaiToken: await testQenv.getEnvVarOnDemand('OPENAI_TOKEN'),
|
||||
});
|
||||
await testSmartai.start();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Document: should document a pdf', async () => {
|
||||
const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf';
|
||||
const pdfResponse = await smartrequest.SmartRequest.create()
|
||||
.url(pdfUrl)
|
||||
.get();
|
||||
const result = await testSmartai.openaiProvider.document({
|
||||
systemMessage: 'Classify the document. Only the following answers are allowed: "invoice", "bank account statement", "contract", "other". The answer should only contain the keyword for machine use.',
|
||||
userMessage: "Classify the document.",
|
||||
messageHistory: [],
|
||||
pdfDocuments: [Buffer.from(await pdfResponse.arrayBuffer())],
|
||||
});
|
||||
console.log(result);
|
||||
expect(result.message).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Document: should recognize companies in a pdf', async () => {
|
||||
const pdfBuffer = await smartfs.file('./.nogit/demo_without_textlayer.pdf').read();
|
||||
const result = await testSmartai.openaiProvider.document({
|
||||
systemMessage: `
|
||||
summarize the document.
|
||||
|
||||
answer in JSON format, adhering to the following schema:
|
||||
\`\`\`typescript
|
||||
type TAnswer = {
|
||||
entitySender: {
|
||||
type: 'official state entity' | 'company' | 'person';
|
||||
name: string;
|
||||
address: string;
|
||||
city: string;
|
||||
country: string;
|
||||
EU: boolean; // whether the entity is within EU
|
||||
};
|
||||
entityReceiver: {
|
||||
type: 'official state entity' | 'company' | 'person';
|
||||
name: string;
|
||||
address: string;
|
||||
city: string;
|
||||
country: string;
|
||||
EU: boolean; // whether the entity is within EU
|
||||
};
|
||||
date: string; // the date of the document as YYYY-MM-DD
|
||||
title: string; // a short title, suitable for a filename
|
||||
}
|
||||
\`\`\`
|
||||
`,
|
||||
userMessage: "Classify the document.",
|
||||
messageHistory: [],
|
||||
pdfDocuments: [pdfBuffer],
|
||||
});
|
||||
console.log(result);
|
||||
expect(result.message).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Document: should stop the smartai instance', async () => {
|
||||
await testSmartai.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
50
test/test.document.ts
Normal file
50
test/test.document.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import { getModel } from '../ts/index.js';
|
||||
import { analyzeDocuments, stopSmartpdf } from '../ts_document/index.js';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
tap.test('analyzeDocuments should analyze a PDF', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('ANTHROPIC_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a minimal test PDF (this is a valid minimal PDF)
|
||||
const minimalPdf = Buffer.from(
|
||||
'%PDF-1.0\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n' +
|
||||
'2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj\n' +
|
||||
'3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Contents 4 0 R/Resources<</Font<</F1 5 0 R>>>>>>endobj\n' +
|
||||
'4 0 obj<</Length 44>>stream\nBT /F1 12 Tf 100 700 Td (Hello World) Tj ET\nendstream\nendobj\n' +
|
||||
'5 0 obj<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>endobj\n' +
|
||||
'xref\n0 6\n0000000000 65535 f \n0000000009 00000 n \n0000000058 00000 n \n0000000115 00000 n \n0000000266 00000 n \n0000000360 00000 n \n' +
|
||||
'trailer<</Size 6/Root 1 0 R>>\nstartxref\n434\n%%EOF'
|
||||
);
|
||||
|
||||
const model = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey,
|
||||
promptCaching: false,
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await analyzeDocuments({
|
||||
model,
|
||||
systemMessage: 'You are a document analysis assistant.',
|
||||
userMessage: 'What text is visible in this document?',
|
||||
pdfDocuments: [minimalPdf],
|
||||
});
|
||||
|
||||
console.log('Document analysis result:', result);
|
||||
expect(result).toBeTruthy();
|
||||
} catch (error) {
|
||||
console.log('Document test failed (may need puppeteer):', error.message);
|
||||
} finally {
|
||||
await stopSmartpdf();
|
||||
}
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,203 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartai from '../ts/index.js';
|
||||
import * as path from 'path';
|
||||
import { promises as fs } from 'fs';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
let openaiProvider: smartai.OpenAiProvider;
|
||||
|
||||
// Helper function to save image results
|
||||
async function saveImageResult(testName: string, result: any) {
|
||||
const sanitizedName = testName.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const filename = `openai_${sanitizedName}_${timestamp}.json`;
|
||||
const filepath = path.join('.nogit', 'testresults', 'images', filename);
|
||||
|
||||
await fs.mkdir(path.dirname(filepath), { recursive: true });
|
||||
await fs.writeFile(filepath, JSON.stringify(result, null, 2), 'utf-8');
|
||||
|
||||
console.log(` 💾 Saved to: ${filepath}`);
|
||||
|
||||
// Also save the actual image if b64_json is present
|
||||
if (result.images && result.images[0]?.b64_json) {
|
||||
const imageFilename = `openai_${sanitizedName}_${timestamp}.png`;
|
||||
const imageFilepath = path.join('.nogit', 'testresults', 'images', imageFilename);
|
||||
await fs.writeFile(imageFilepath, Buffer.from(result.images[0].b64_json, 'base64'));
|
||||
console.log(` 🖼️ Image saved to: ${imageFilepath}`);
|
||||
}
|
||||
}
|
||||
|
||||
tap.test('OpenAI Image Generation: should initialize provider', async () => {
|
||||
const openaiToken = await testQenv.getEnvVarOnDemand('OPENAI_TOKEN');
|
||||
expect(openaiToken).toBeTruthy();
|
||||
|
||||
openaiProvider = new smartai.OpenAiProvider({
|
||||
openaiToken,
|
||||
imageModel: 'gpt-image-1'
|
||||
});
|
||||
|
||||
await openaiProvider.start();
|
||||
expect(openaiProvider).toBeInstanceOf(smartai.OpenAiProvider);
|
||||
});
|
||||
|
||||
tap.test('OpenAI Image: Basic generation with gpt-image-1', async () => {
|
||||
const result = await openaiProvider.imageGenerate({
|
||||
prompt: 'A cute robot reading a book in a cozy library, digital art style',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'medium',
|
||||
size: '1024x1024'
|
||||
});
|
||||
|
||||
console.log('Basic gpt-image-1 Generation:');
|
||||
console.log('- Images generated:', result.images.length);
|
||||
console.log('- Model used:', result.metadata?.model);
|
||||
console.log('- Quality:', result.metadata?.quality);
|
||||
console.log('- Size:', result.metadata?.size);
|
||||
console.log('- Tokens used:', result.metadata?.tokensUsed);
|
||||
|
||||
await saveImageResult('basic_generation_gptimage1', result);
|
||||
|
||||
expect(result.images).toBeTruthy();
|
||||
expect(result.images.length).toEqual(1);
|
||||
expect(result.images[0].b64_json).toBeTruthy();
|
||||
expect(result.metadata?.model).toEqual('gpt-image-1');
|
||||
});
|
||||
|
||||
tap.test('OpenAI Image: High quality with transparent background', async () => {
|
||||
const result = await openaiProvider.imageGenerate({
|
||||
prompt: 'A simple geometric logo of a mountain peak, minimal design, clean lines',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'high',
|
||||
size: '1024x1024',
|
||||
background: 'transparent',
|
||||
outputFormat: 'png'
|
||||
});
|
||||
|
||||
console.log('High Quality Transparent:');
|
||||
console.log('- Quality:', result.metadata?.quality);
|
||||
console.log('- Background: transparent');
|
||||
console.log('- Format:', result.metadata?.outputFormat);
|
||||
console.log('- Tokens used:', result.metadata?.tokensUsed);
|
||||
|
||||
await saveImageResult('high_quality_transparent', result);
|
||||
|
||||
expect(result.images.length).toEqual(1);
|
||||
expect(result.images[0].b64_json).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Image: WebP format with compression', async () => {
|
||||
const result = await openaiProvider.imageGenerate({
|
||||
prompt: 'A futuristic cityscape at sunset with flying cars, photorealistic',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'high',
|
||||
size: '1536x1024',
|
||||
outputFormat: 'webp',
|
||||
outputCompression: 85
|
||||
});
|
||||
|
||||
console.log('WebP with Compression:');
|
||||
console.log('- Format:', result.metadata?.outputFormat);
|
||||
console.log('- Compression: 85%');
|
||||
console.log('- Size:', result.metadata?.size);
|
||||
|
||||
await saveImageResult('webp_compression', result);
|
||||
|
||||
expect(result.images.length).toEqual(1);
|
||||
expect(result.images[0].b64_json).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Image: Text rendering with gpt-image-1', async () => {
|
||||
const result = await openaiProvider.imageGenerate({
|
||||
prompt: 'A vintage cafe sign that says "COFFEE & CODE" in elegant hand-lettered typography, warm colors',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'high',
|
||||
size: '1024x1024'
|
||||
});
|
||||
|
||||
console.log('Text Rendering:');
|
||||
console.log('- Prompt includes text: "COFFEE & CODE"');
|
||||
console.log('- gpt-image-1 has superior text rendering');
|
||||
console.log('- Tokens used:', result.metadata?.tokensUsed);
|
||||
|
||||
await saveImageResult('text_rendering', result);
|
||||
|
||||
expect(result.images.length).toEqual(1);
|
||||
expect(result.images[0].b64_json).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Image: Multiple images generation', async () => {
|
||||
const result = await openaiProvider.imageGenerate({
|
||||
prompt: 'Abstract colorful geometric patterns, modern minimalist art',
|
||||
model: 'gpt-image-1',
|
||||
n: 2,
|
||||
quality: 'medium',
|
||||
size: '1024x1024'
|
||||
});
|
||||
|
||||
console.log('Multiple Images:');
|
||||
console.log('- Images requested: 2');
|
||||
console.log('- Images generated:', result.images.length);
|
||||
|
||||
await saveImageResult('multiple_images', result);
|
||||
|
||||
expect(result.images.length).toEqual(2);
|
||||
expect(result.images[0].b64_json).toBeTruthy();
|
||||
expect(result.images[1].b64_json).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Image: Low moderation setting', async () => {
|
||||
const result = await openaiProvider.imageGenerate({
|
||||
prompt: 'A fantasy battle scene with warriors and dragons',
|
||||
model: 'gpt-image-1',
|
||||
moderation: 'low',
|
||||
quality: 'medium'
|
||||
});
|
||||
|
||||
console.log('Low Moderation:');
|
||||
console.log('- Moderation: low (less restrictive filtering)');
|
||||
console.log('- Tokens used:', result.metadata?.tokensUsed);
|
||||
|
||||
await saveImageResult('low_moderation', result);
|
||||
|
||||
expect(result.images.length).toEqual(1);
|
||||
expect(result.images[0].b64_json).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Image Editing: edit with gpt-image-1', async () => {
|
||||
// First, generate a base image
|
||||
const baseResult = await openaiProvider.imageGenerate({
|
||||
prompt: 'A simple white cat sitting on a red cushion',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'low',
|
||||
size: '1024x1024'
|
||||
});
|
||||
|
||||
const baseImageBuffer = Buffer.from(baseResult.images[0].b64_json!, 'base64');
|
||||
|
||||
// Now edit it
|
||||
const editResult = await openaiProvider.imageEdit({
|
||||
image: baseImageBuffer,
|
||||
prompt: 'Change the cat to orange and add stylish sunglasses',
|
||||
model: 'gpt-image-1',
|
||||
quality: 'medium'
|
||||
});
|
||||
|
||||
console.log('Image Editing:');
|
||||
console.log('- Base image created');
|
||||
console.log('- Edit: change color and add sunglasses');
|
||||
console.log('- Result images:', editResult.images.length);
|
||||
|
||||
await saveImageResult('image_edit', editResult);
|
||||
|
||||
expect(editResult.images.length).toEqual(1);
|
||||
expect(editResult.images[0].b64_json).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Image: should clean up provider', async () => {
|
||||
await openaiProvider.stop();
|
||||
console.log('OpenAI image provider stopped successfully');
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
35
test/test.image.ts
Normal file
35
test/test.image.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import { generateImage } from '../ts_image/index.js';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
tap.test('generateImage should return an image response', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('OPENAI_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('OPENAI_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await generateImage({
|
||||
apiKey,
|
||||
prompt: 'A simple red circle on a white background',
|
||||
model: 'gpt-image-1',
|
||||
size: '1024x1024',
|
||||
quality: 'low',
|
||||
n: 1,
|
||||
});
|
||||
|
||||
console.log('Image generation result: images count =', result.images.length);
|
||||
expect(result.images).toBeArray();
|
||||
expect(result.images.length).toBeGreaterThan(0);
|
||||
|
||||
const firstImage = result.images[0];
|
||||
// gpt-image-1 returns b64_json by default
|
||||
expect(firstImage.b64_json || firstImage.url).toBeTruthy();
|
||||
|
||||
expect(result.metadata).toBeTruthy();
|
||||
expect(result.metadata!.model).toEqual('gpt-image-1');
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,140 +0,0 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
// Test interface exports and type checking
|
||||
// These tests verify that all interfaces are properly exported and usable
|
||||
|
||||
tap.test('Interfaces: ResearchOptions should be properly typed', async () => {
|
||||
const testOptions: smartai.ResearchOptions = {
|
||||
query: 'test query',
|
||||
searchDepth: 'basic',
|
||||
maxSources: 10,
|
||||
includeWebSearch: true,
|
||||
background: false
|
||||
};
|
||||
|
||||
expect(testOptions).toBeInstanceOf(Object);
|
||||
expect(testOptions.query).toEqual('test query');
|
||||
expect(testOptions.searchDepth).toEqual('basic');
|
||||
});
|
||||
|
||||
tap.test('Interfaces: ResearchResponse should be properly typed', async () => {
|
||||
const testResponse: smartai.ResearchResponse = {
|
||||
answer: 'test answer',
|
||||
sources: [
|
||||
{
|
||||
url: 'https://example.com',
|
||||
title: 'Example Source',
|
||||
snippet: 'This is a snippet'
|
||||
}
|
||||
],
|
||||
searchQueries: ['query1', 'query2'],
|
||||
metadata: {
|
||||
model: 'test-model',
|
||||
tokensUsed: 100
|
||||
}
|
||||
};
|
||||
|
||||
expect(testResponse).toBeInstanceOf(Object);
|
||||
expect(testResponse.answer).toEqual('test answer');
|
||||
expect(testResponse.sources).toBeArray();
|
||||
expect(testResponse.sources[0].url).toEqual('https://example.com');
|
||||
});
|
||||
|
||||
tap.test('Interfaces: ChatOptions should be properly typed', async () => {
|
||||
const testChatOptions: smartai.ChatOptions = {
|
||||
systemMessage: 'You are a helpful assistant',
|
||||
userMessage: 'Hello',
|
||||
messageHistory: [
|
||||
{ role: 'user', content: 'Previous message' },
|
||||
{ role: 'assistant', content: 'Previous response' }
|
||||
]
|
||||
};
|
||||
|
||||
expect(testChatOptions).toBeInstanceOf(Object);
|
||||
expect(testChatOptions.systemMessage).toBeTruthy();
|
||||
expect(testChatOptions.messageHistory).toBeArray();
|
||||
});
|
||||
|
||||
tap.test('Interfaces: ChatResponse should be properly typed', async () => {
|
||||
const testChatResponse: smartai.ChatResponse = {
|
||||
role: 'assistant',
|
||||
message: 'This is a response'
|
||||
};
|
||||
|
||||
expect(testChatResponse).toBeInstanceOf(Object);
|
||||
expect(testChatResponse.role).toEqual('assistant');
|
||||
expect(testChatResponse.message).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('Interfaces: ChatMessage should be properly typed', async () => {
|
||||
const testMessage: smartai.ChatMessage = {
|
||||
role: 'user',
|
||||
content: 'Test message'
|
||||
};
|
||||
|
||||
expect(testMessage).toBeInstanceOf(Object);
|
||||
expect(testMessage.role).toBeOneOf(['user', 'assistant', 'system']);
|
||||
expect(testMessage.content).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('Interfaces: Provider options should be properly typed', async () => {
|
||||
// OpenAI options
|
||||
const openaiOptions: smartai.IOpenaiProviderOptions = {
|
||||
openaiToken: 'test-token',
|
||||
chatModel: 'gpt-5-mini',
|
||||
audioModel: 'tts-1-hd',
|
||||
visionModel: '04-mini',
|
||||
researchModel: 'o4-mini-deep-research-2025-06-26',
|
||||
enableWebSearch: true
|
||||
};
|
||||
|
||||
expect(openaiOptions).toBeInstanceOf(Object);
|
||||
expect(openaiOptions.openaiToken).toBeTruthy();
|
||||
|
||||
// Anthropic options
|
||||
const anthropicOptions: smartai.IAnthropicProviderOptions = {
|
||||
anthropicToken: 'test-token',
|
||||
enableWebSearch: true,
|
||||
searchDomainAllowList: ['example.com'],
|
||||
searchDomainBlockList: ['blocked.com']
|
||||
};
|
||||
|
||||
expect(anthropicOptions).toBeInstanceOf(Object);
|
||||
expect(anthropicOptions.anthropicToken).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('Interfaces: Search depth values should be valid', async () => {
|
||||
const validDepths: smartai.ResearchOptions['searchDepth'][] = ['basic', 'advanced', 'deep'];
|
||||
|
||||
for (const depth of validDepths) {
|
||||
const options: smartai.ResearchOptions = {
|
||||
query: 'test',
|
||||
searchDepth: depth
|
||||
};
|
||||
expect(options.searchDepth).toBeOneOf(['basic', 'advanced', 'deep', undefined]);
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('Interfaces: Optional properties should work correctly', async () => {
|
||||
// Minimal ResearchOptions
|
||||
const minimalOptions: smartai.ResearchOptions = {
|
||||
query: 'test query'
|
||||
};
|
||||
|
||||
expect(minimalOptions.query).toBeTruthy();
|
||||
expect(minimalOptions.searchDepth).toBeUndefined();
|
||||
expect(minimalOptions.maxSources).toBeUndefined();
|
||||
|
||||
// Minimal ChatOptions
|
||||
const minimalChat: smartai.ChatOptions = {
|
||||
systemMessage: 'system',
|
||||
userMessage: 'user',
|
||||
messageHistory: []
|
||||
};
|
||||
|
||||
expect(minimalChat.messageHistory).toBeArray();
|
||||
expect(minimalChat.messageHistory.length).toEqual(0);
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
390
test/test.ollama.ts
Normal file
390
test/test.ollama.ts
Normal file
@@ -0,0 +1,390 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import { createOllamaModel } from '../ts/smartai.provider.ollama.js';
|
||||
import type { ISmartAiOptions } from '../ts/smartai.interfaces.js';
|
||||
|
||||
tap.test('createOllamaModel returns valid LanguageModelV3', async () => {
|
||||
const model = createOllamaModel({
|
||||
provider: 'ollama',
|
||||
model: 'qwen3:8b',
|
||||
ollamaOptions: { think: true, num_ctx: 4096 },
|
||||
});
|
||||
|
||||
expect(model.specificationVersion).toEqual('v3');
|
||||
expect(model.provider).toEqual('ollama');
|
||||
expect(model.modelId).toEqual('qwen3:8b');
|
||||
expect(model).toHaveProperty('doGenerate');
|
||||
expect(model).toHaveProperty('doStream');
|
||||
});
|
||||
|
||||
tap.test('Qwen models get default temperature 0.55', async () => {
|
||||
// Mock fetch to capture the request body
|
||||
const originalFetch = globalThis.fetch;
|
||||
let capturedBody: Record<string, unknown> | undefined;
|
||||
|
||||
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
capturedBody = JSON.parse(init?.body as string);
|
||||
return new Response(JSON.stringify({
|
||||
message: { content: 'test response', role: 'assistant' },
|
||||
done: true,
|
||||
prompt_eval_count: 10,
|
||||
eval_count: 5,
|
||||
}), { status: 200 });
|
||||
};
|
||||
|
||||
try {
|
||||
const model = createOllamaModel({
|
||||
provider: 'ollama',
|
||||
model: 'qwen3:8b',
|
||||
});
|
||||
|
||||
await model.doGenerate({
|
||||
prompt: [{ role: 'user', content: [{ type: 'text', text: 'hello' }] }],
|
||||
inputFormat: 'prompt',
|
||||
} as any);
|
||||
|
||||
expect(capturedBody).toBeTruthy();
|
||||
// Temperature 0.55 should be in the options
|
||||
expect((capturedBody!.options as Record<string, unknown>).temperature).toEqual(0.55);
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('think option is passed at top level of request body', async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
let capturedBody: Record<string, unknown> | undefined;
|
||||
|
||||
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
capturedBody = JSON.parse(init?.body as string);
|
||||
return new Response(JSON.stringify({
|
||||
message: { content: 'test', role: 'assistant', thinking: 'let me think...' },
|
||||
done: true,
|
||||
prompt_eval_count: 10,
|
||||
eval_count: 5,
|
||||
}), { status: 200 });
|
||||
};
|
||||
|
||||
try {
|
||||
const model = createOllamaModel({
|
||||
provider: 'ollama',
|
||||
model: 'qwen3:8b',
|
||||
ollamaOptions: { think: true, num_ctx: 4096 },
|
||||
});
|
||||
|
||||
await model.doGenerate({
|
||||
prompt: [{ role: 'user', content: [{ type: 'text', text: 'hello' }] }],
|
||||
inputFormat: 'prompt',
|
||||
} as any);
|
||||
|
||||
expect(capturedBody).toBeTruthy();
|
||||
// think should be at top level, not inside options
|
||||
expect(capturedBody!.think).toEqual(true);
|
||||
// num_ctx should be in options
|
||||
expect((capturedBody!.options as Record<string, unknown>).num_ctx).toEqual(4096);
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('Non-qwen models do not get default temperature', async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
let capturedBody: Record<string, unknown> | undefined;
|
||||
|
||||
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
capturedBody = JSON.parse(init?.body as string);
|
||||
return new Response(JSON.stringify({
|
||||
message: { content: 'test', role: 'assistant' },
|
||||
done: true,
|
||||
}), { status: 200 });
|
||||
};
|
||||
|
||||
try {
|
||||
const model = createOllamaModel({
|
||||
provider: 'ollama',
|
||||
model: 'llama3:8b',
|
||||
});
|
||||
|
||||
await model.doGenerate({
|
||||
prompt: [{ role: 'user', content: [{ type: 'text', text: 'hello' }] }],
|
||||
inputFormat: 'prompt',
|
||||
} as any);
|
||||
|
||||
expect(capturedBody).toBeTruthy();
|
||||
// No temperature should be set
|
||||
expect((capturedBody!.options as Record<string, unknown>).temperature).toBeUndefined();
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('doGenerate parses reasoning/thinking from response', async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
return new Response(JSON.stringify({
|
||||
message: {
|
||||
content: 'The answer is 42.',
|
||||
role: 'assistant',
|
||||
thinking: 'Let me reason about this carefully...',
|
||||
},
|
||||
done: true,
|
||||
prompt_eval_count: 20,
|
||||
eval_count: 15,
|
||||
}), { status: 200 });
|
||||
};
|
||||
|
||||
try {
|
||||
const model = createOllamaModel({
|
||||
provider: 'ollama',
|
||||
model: 'qwen3:8b',
|
||||
ollamaOptions: { think: true },
|
||||
});
|
||||
|
||||
const result = await model.doGenerate({
|
||||
prompt: [{ role: 'user', content: [{ type: 'text', text: 'What is the meaning of life?' }] }],
|
||||
} as any);
|
||||
|
||||
// Should have both reasoning and text content
|
||||
const reasoningParts = result.content.filter(c => c.type === 'reasoning');
|
||||
const textParts = result.content.filter(c => c.type === 'text');
|
||||
|
||||
expect(reasoningParts.length).toEqual(1);
|
||||
expect((reasoningParts[0] as any).text).toEqual('Let me reason about this carefully...');
|
||||
expect(textParts.length).toEqual(1);
|
||||
expect((textParts[0] as any).text).toEqual('The answer is 42.');
|
||||
expect(result.finishReason.unified).toEqual('stop');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('doGenerate parses tool calls from response', async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
return new Response(JSON.stringify({
|
||||
message: {
|
||||
content: '',
|
||||
role: 'assistant',
|
||||
tool_calls: [
|
||||
{
|
||||
function: {
|
||||
name: 'get_weather',
|
||||
arguments: { location: 'London', unit: 'celsius' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
done: true,
|
||||
prompt_eval_count: 30,
|
||||
eval_count: 10,
|
||||
}), { status: 200 });
|
||||
};
|
||||
|
||||
try {
|
||||
const model = createOllamaModel({
|
||||
provider: 'ollama',
|
||||
model: 'qwen3:8b',
|
||||
});
|
||||
|
||||
const result = await model.doGenerate({
|
||||
prompt: [{ role: 'user', content: [{ type: 'text', text: 'What is the weather in London?' }] }],
|
||||
tools: [{
|
||||
type: 'function' as const,
|
||||
name: 'get_weather',
|
||||
description: 'Get weather for a location',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
location: { type: 'string' },
|
||||
unit: { type: 'string' },
|
||||
},
|
||||
},
|
||||
}],
|
||||
} as any);
|
||||
|
||||
const toolCalls = result.content.filter(c => c.type === 'tool-call');
|
||||
expect(toolCalls.length).toEqual(1);
|
||||
expect((toolCalls[0] as any).toolName).toEqual('get_weather');
|
||||
expect(JSON.parse((toolCalls[0] as any).input)).toEqual({ location: 'London', unit: 'celsius' });
|
||||
expect(result.finishReason.unified).toEqual('tool-calls');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('doStream produces correct stream parts', async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
// Simulate Ollama's newline-delimited JSON streaming
|
||||
const chunks = [
|
||||
JSON.stringify({ message: { content: 'Hello', role: 'assistant' }, done: false }) + '\n',
|
||||
JSON.stringify({ message: { content: ' world', role: 'assistant' }, done: false }) + '\n',
|
||||
JSON.stringify({ message: { content: '!', role: 'assistant' }, done: true, prompt_eval_count: 5, eval_count: 3 }) + '\n',
|
||||
];
|
||||
|
||||
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
const encoder = new TextEncoder();
|
||||
const stream = new ReadableStream({
|
||||
start(controller) {
|
||||
for (const chunk of chunks) {
|
||||
controller.enqueue(encoder.encode(chunk));
|
||||
}
|
||||
controller.close();
|
||||
},
|
||||
});
|
||||
return new Response(stream, { status: 200 });
|
||||
};
|
||||
|
||||
try {
|
||||
const model = createOllamaModel({
|
||||
provider: 'ollama',
|
||||
model: 'llama3:8b',
|
||||
});
|
||||
|
||||
const result = await model.doStream({
|
||||
prompt: [{ role: 'user', content: [{ type: 'text', text: 'hello' }] }],
|
||||
} as any);
|
||||
|
||||
const parts: any[] = [];
|
||||
const reader = result.stream.getReader();
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
parts.push(value);
|
||||
}
|
||||
|
||||
// Should have: text-start, text-delta x3, text-end, finish
|
||||
const textDeltas = parts.filter(p => p.type === 'text-delta');
|
||||
const finishParts = parts.filter(p => p.type === 'finish');
|
||||
const textStarts = parts.filter(p => p.type === 'text-start');
|
||||
const textEnds = parts.filter(p => p.type === 'text-end');
|
||||
|
||||
expect(textStarts.length).toEqual(1);
|
||||
expect(textDeltas.length).toEqual(3);
|
||||
expect(textDeltas.map((d: any) => d.delta).join('')).toEqual('Hello world!');
|
||||
expect(textEnds.length).toEqual(1);
|
||||
expect(finishParts.length).toEqual(1);
|
||||
expect(finishParts[0].finishReason.unified).toEqual('stop');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('doStream handles thinking/reasoning in stream', async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
const chunks = [
|
||||
JSON.stringify({ message: { thinking: 'Let me think...', content: '', role: 'assistant' }, done: false }) + '\n',
|
||||
JSON.stringify({ message: { thinking: ' about this.', content: '', role: 'assistant' }, done: false }) + '\n',
|
||||
JSON.stringify({ message: { content: 'The answer.', role: 'assistant' }, done: false }) + '\n',
|
||||
JSON.stringify({ message: { content: '', role: 'assistant' }, done: true, prompt_eval_count: 10, eval_count: 8 }) + '\n',
|
||||
];
|
||||
|
||||
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
const encoder = new TextEncoder();
|
||||
const stream = new ReadableStream({
|
||||
start(controller) {
|
||||
for (const chunk of chunks) {
|
||||
controller.enqueue(encoder.encode(chunk));
|
||||
}
|
||||
controller.close();
|
||||
},
|
||||
});
|
||||
return new Response(stream, { status: 200 });
|
||||
};
|
||||
|
||||
try {
|
||||
const model = createOllamaModel({
|
||||
provider: 'ollama',
|
||||
model: 'qwen3:8b',
|
||||
ollamaOptions: { think: true },
|
||||
});
|
||||
|
||||
const result = await model.doStream({
|
||||
prompt: [{ role: 'user', content: [{ type: 'text', text: 'think about this' }] }],
|
||||
} as any);
|
||||
|
||||
const parts: any[] = [];
|
||||
const reader = result.stream.getReader();
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
parts.push(value);
|
||||
}
|
||||
|
||||
const reasoningStarts = parts.filter(p => p.type === 'reasoning-start');
|
||||
const reasoningDeltas = parts.filter(p => p.type === 'reasoning-delta');
|
||||
const reasoningEnds = parts.filter(p => p.type === 'reasoning-end');
|
||||
const textDeltas = parts.filter(p => p.type === 'text-delta');
|
||||
|
||||
expect(reasoningStarts.length).toEqual(1);
|
||||
expect(reasoningDeltas.length).toEqual(2);
|
||||
expect(reasoningDeltas.map((d: any) => d.delta).join('')).toEqual('Let me think... about this.');
|
||||
expect(reasoningEnds.length).toEqual(1);
|
||||
expect(textDeltas.length).toEqual(1);
|
||||
expect(textDeltas[0].delta).toEqual('The answer.');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('message conversion handles system, assistant, and tool messages', async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
let capturedBody: Record<string, unknown> | undefined;
|
||||
|
||||
globalThis.fetch = async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
capturedBody = JSON.parse(init?.body as string);
|
||||
return new Response(JSON.stringify({
|
||||
message: { content: 'response', role: 'assistant' },
|
||||
done: true,
|
||||
}), { status: 200 });
|
||||
};
|
||||
|
||||
try {
|
||||
const model = createOllamaModel({
|
||||
provider: 'ollama',
|
||||
model: 'llama3:8b',
|
||||
});
|
||||
|
||||
await model.doGenerate({
|
||||
prompt: [
|
||||
{ role: 'system', content: 'You are helpful.' },
|
||||
{ role: 'user', content: [{ type: 'text', text: 'Hi' }] },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'text', text: 'Let me check.' },
|
||||
{ type: 'tool-call', toolCallId: 'tc1', toolName: 'search', input: '{"q":"test"}' },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'tool',
|
||||
content: [
|
||||
{ type: 'tool-result', toolCallId: 'tc1', output: { type: 'text', value: 'result data' } },
|
||||
],
|
||||
},
|
||||
{ role: 'user', content: [{ type: 'text', text: 'What did you find?' }] },
|
||||
],
|
||||
} as any);
|
||||
|
||||
const messages = capturedBody!.messages as Array<Record<string, unknown>>;
|
||||
expect(messages.length).toEqual(5);
|
||||
expect(messages[0].role).toEqual('system');
|
||||
expect(messages[0].content).toEqual('You are helpful.');
|
||||
expect(messages[1].role).toEqual('user');
|
||||
expect(messages[1].content).toEqual('Hi');
|
||||
expect(messages[2].role).toEqual('assistant');
|
||||
expect(messages[2].content).toEqual('Let me check.');
|
||||
expect((messages[2].tool_calls as any[]).length).toEqual(1);
|
||||
expect((messages[2].tool_calls as any[])[0].function.name).toEqual('search');
|
||||
expect(messages[3].role).toEqual('tool');
|
||||
expect(messages[3].content).toEqual('result data');
|
||||
expect(messages[4].role).toEqual('user');
|
||||
expect(messages[4].content).toEqual('What did you find?');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,223 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartai from '../ts/index.js';
|
||||
import * as path from 'path';
|
||||
import { promises as fs } from 'fs';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
// Helper function to save research results
|
||||
async function saveResearchResult(testName: string, result: any) {
|
||||
const sanitizedName = testName.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const filename = `${sanitizedName}_${timestamp}.json`;
|
||||
const filepath = path.join('.nogit', 'testresults', 'research', filename);
|
||||
|
||||
await fs.mkdir(path.dirname(filepath), { recursive: true });
|
||||
await fs.writeFile(filepath, JSON.stringify(result, null, 2), 'utf-8');
|
||||
|
||||
console.log(` 💾 Saved to: ${filepath}`);
|
||||
}
|
||||
|
||||
let anthropicProvider: smartai.AnthropicProvider;
|
||||
|
||||
tap.test('Anthropic Research: should initialize provider with web search', async () => {
|
||||
anthropicProvider = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
enableWebSearch: true
|
||||
});
|
||||
|
||||
await anthropicProvider.start();
|
||||
expect(anthropicProvider).toBeInstanceOf(smartai.AnthropicProvider);
|
||||
expect(typeof anthropicProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('Anthropic Research: should perform basic research query', async () => {
|
||||
const result = await anthropicProvider.research({
|
||||
query: 'What is machine learning and its main applications?',
|
||||
searchDepth: 'basic'
|
||||
});
|
||||
|
||||
console.log('Anthropic Basic Research:');
|
||||
console.log('- Answer length:', result.answer.length);
|
||||
console.log('- Sources found:', result.sources.length);
|
||||
console.log('- First 200 chars:', result.answer.substring(0, 200));
|
||||
|
||||
await saveResearchResult('basic_research_machine_learning', result);
|
||||
|
||||
expect(result).toBeTruthy();
|
||||
expect(result.answer).toBeTruthy();
|
||||
expect(result.answer.toLowerCase()).toInclude('machine learning');
|
||||
expect(result.sources).toBeArray();
|
||||
expect(result.metadata).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('Anthropic Research: should perform research with web search', async () => {
|
||||
const result = await anthropicProvider.research({
|
||||
query: 'What are the latest developments in renewable energy technology?',
|
||||
searchDepth: 'advanced',
|
||||
includeWebSearch: true,
|
||||
maxSources: 5
|
||||
});
|
||||
|
||||
console.log('Anthropic Web Search Research:');
|
||||
console.log('- Answer length:', result.answer.length);
|
||||
console.log('- Sources:', result.sources.length);
|
||||
if (result.searchQueries) {
|
||||
console.log('- Search queries:', result.searchQueries);
|
||||
}
|
||||
|
||||
await saveResearchResult('web_search_renewable_energy', result);
|
||||
|
||||
expect(result.answer).toBeTruthy();
|
||||
expect(result.answer.toLowerCase()).toInclude('renewable');
|
||||
|
||||
// Check if sources were extracted
|
||||
if (result.sources.length > 0) {
|
||||
console.log('- Example source:', result.sources[0]);
|
||||
expect(result.sources[0]).toHaveProperty('url');
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('Anthropic Research: should handle deep research queries', async () => {
|
||||
const result = await anthropicProvider.research({
|
||||
query: 'Explain the differences between REST and GraphQL APIs',
|
||||
searchDepth: 'deep'
|
||||
});
|
||||
|
||||
console.log('Anthropic Deep Research:');
|
||||
console.log('- Answer length:', result.answer.length);
|
||||
console.log('- Token usage:', result.metadata?.tokensUsed);
|
||||
|
||||
await saveResearchResult('deep_research_rest_vs_graphql', result);
|
||||
|
||||
expect(result.answer).toBeTruthy();
|
||||
expect(result.answer.length).toBeGreaterThan(300);
|
||||
expect(result.answer.toLowerCase()).toInclude('rest');
|
||||
expect(result.answer.toLowerCase()).toInclude('graphql');
|
||||
});
|
||||
|
||||
tap.test('Anthropic Research: should extract citations from response', async () => {
|
||||
const result = await anthropicProvider.research({
|
||||
query: 'What is Docker and how does containerization work?',
|
||||
searchDepth: 'basic',
|
||||
maxSources: 3
|
||||
});
|
||||
|
||||
console.log('Anthropic Citation Extraction:');
|
||||
console.log('- Sources found:', result.sources.length);
|
||||
console.log('- Answer includes Docker:', result.answer.toLowerCase().includes('docker'));
|
||||
|
||||
await saveResearchResult('citation_extraction_docker', result);
|
||||
|
||||
expect(result.answer).toInclude('Docker');
|
||||
|
||||
// Check for URL extraction (both markdown and plain URLs)
|
||||
const hasUrls = result.answer.includes('http') || result.sources.length > 0;
|
||||
console.log('- Contains URLs or sources:', hasUrls);
|
||||
});
|
||||
|
||||
tap.test('Anthropic Research: should use domain filtering when configured', async () => {
|
||||
// Create a new provider with domain restrictions
|
||||
const filteredProvider = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
enableWebSearch: true,
|
||||
searchDomainAllowList: ['wikipedia.org', 'docs.microsoft.com'],
|
||||
searchDomainBlockList: ['reddit.com']
|
||||
});
|
||||
|
||||
await filteredProvider.start();
|
||||
|
||||
const result = await filteredProvider.research({
|
||||
query: 'What is JavaScript?',
|
||||
searchDepth: 'basic'
|
||||
});
|
||||
|
||||
console.log('Anthropic Domain Filtering Test:');
|
||||
console.log('- Answer length:', result.answer.length);
|
||||
console.log('- Applied domain filters (allow: wikipedia, docs.microsoft)');
|
||||
|
||||
await saveResearchResult('domain_filtering_javascript', result);
|
||||
|
||||
expect(result.answer).toBeTruthy();
|
||||
expect(result.answer.toLowerCase()).toInclude('javascript');
|
||||
|
||||
await filteredProvider.stop();
|
||||
});
|
||||
|
||||
tap.test('Anthropic Research: should handle errors gracefully', async () => {
|
||||
let errorCaught = false;
|
||||
|
||||
try {
|
||||
await anthropicProvider.research({
|
||||
query: '', // Empty query
|
||||
searchDepth: 'basic'
|
||||
});
|
||||
} catch (error) {
|
||||
errorCaught = true;
|
||||
console.log('Expected error for empty query:', error.message.substring(0, 100));
|
||||
}
|
||||
|
||||
// Anthropic might handle empty queries differently
|
||||
console.log(`Empty query error test - Error caught: ${errorCaught}`);
|
||||
});
|
||||
|
||||
tap.test('Anthropic Research: should handle different search depths', async () => {
|
||||
// Test basic search depth
|
||||
const basicResult = await anthropicProvider.research({
|
||||
query: 'What is Python?',
|
||||
searchDepth: 'basic'
|
||||
});
|
||||
|
||||
// Test advanced search depth
|
||||
const advancedResult = await anthropicProvider.research({
|
||||
query: 'What is Python?',
|
||||
searchDepth: 'advanced'
|
||||
});
|
||||
|
||||
console.log('Anthropic Search Depth Comparison:');
|
||||
console.log('- Basic answer length:', basicResult.answer.length);
|
||||
console.log('- Advanced answer length:', advancedResult.answer.length);
|
||||
console.log('- Basic tokens:', basicResult.metadata?.tokensUsed);
|
||||
console.log('- Advanced tokens:', advancedResult.metadata?.tokensUsed);
|
||||
|
||||
await saveResearchResult('search_depth_python_basic', basicResult);
|
||||
await saveResearchResult('search_depth_python_advanced', advancedResult);
|
||||
|
||||
expect(basicResult.answer).toBeTruthy();
|
||||
expect(advancedResult.answer).toBeTruthy();
|
||||
|
||||
// Advanced search typically produces longer answers
|
||||
// But this isn't guaranteed, so we just check they exist
|
||||
expect(basicResult.answer.toLowerCase()).toInclude('python');
|
||||
expect(advancedResult.answer.toLowerCase()).toInclude('python');
|
||||
});
|
||||
|
||||
tap.test('Anthropic Research: ARM vs. Qualcomm comparison', async () => {
|
||||
const result = await anthropicProvider.research({
|
||||
query: 'Compare ARM and Qualcomm: their technologies, market positions, and recent developments in the mobile and computing sectors',
|
||||
searchDepth: 'advanced',
|
||||
includeWebSearch: true,
|
||||
maxSources: 10
|
||||
});
|
||||
|
||||
console.log('ARM vs. Qualcomm Research:');
|
||||
console.log('- Answer length:', result.answer.length);
|
||||
console.log('- Sources found:', result.sources.length);
|
||||
console.log('- First 300 chars:', result.answer.substring(0, 300));
|
||||
|
||||
await saveResearchResult('arm_vs_qualcomm_comparison', result);
|
||||
|
||||
expect(result.answer).toBeTruthy();
|
||||
expect(result.answer.length).toBeGreaterThan(500);
|
||||
expect(result.answer.toLowerCase()).toInclude('arm');
|
||||
expect(result.answer.toLowerCase()).toInclude('qualcomm');
|
||||
expect(result.sources.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
tap.test('Anthropic Research: should clean up provider', async () => {
|
||||
await anthropicProvider.stop();
|
||||
console.log('Anthropic research provider stopped successfully');
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,172 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartai from '../ts/index.js';
|
||||
import * as path from 'path';
|
||||
import { promises as fs } from 'fs';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
// Helper function to save research results
|
||||
async function saveResearchResult(testName: string, result: any) {
|
||||
const sanitizedName = testName.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||
const filename = `openai_${sanitizedName}_${timestamp}.json`;
|
||||
const filepath = path.join('.nogit', 'testresults', 'research', filename);
|
||||
|
||||
await fs.mkdir(path.dirname(filepath), { recursive: true });
|
||||
await fs.writeFile(filepath, JSON.stringify(result, null, 2), 'utf-8');
|
||||
|
||||
console.log(` 💾 Saved to: ${filepath}`);
|
||||
}
|
||||
|
||||
let openaiProvider: smartai.OpenAiProvider;
|
||||
|
||||
tap.test('OpenAI Research: should initialize provider with research capabilities', async () => {
|
||||
openaiProvider = new smartai.OpenAiProvider({
|
||||
openaiToken: await testQenv.getEnvVarOnDemand('OPENAI_TOKEN'),
|
||||
researchModel: 'o4-mini-deep-research-2025-06-26',
|
||||
enableWebSearch: true
|
||||
});
|
||||
|
||||
await openaiProvider.start();
|
||||
expect(openaiProvider).toBeInstanceOf(smartai.OpenAiProvider);
|
||||
expect(typeof openaiProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('OpenAI Research: should perform basic research query', async () => {
|
||||
const result = await openaiProvider.research({
|
||||
query: 'What is TypeScript and why is it useful for web development?',
|
||||
searchDepth: 'basic'
|
||||
});
|
||||
|
||||
console.log('OpenAI Basic Research:');
|
||||
console.log('- Answer length:', result.answer.length);
|
||||
console.log('- Sources found:', result.sources.length);
|
||||
console.log('- First 200 chars:', result.answer.substring(0, 200));
|
||||
|
||||
await saveResearchResult('basic_research_typescript', result);
|
||||
|
||||
expect(result).toBeTruthy();
|
||||
expect(result.answer).toBeTruthy();
|
||||
expect(result.answer.toLowerCase()).toInclude('typescript');
|
||||
expect(result.sources).toBeArray();
|
||||
expect(result.metadata).toBeTruthy();
|
||||
expect(result.metadata.model).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('OpenAI Research: should perform research with web search enabled', async () => {
|
||||
const result = await openaiProvider.research({
|
||||
query: 'What are the latest features in ECMAScript 2024?',
|
||||
searchDepth: 'advanced',
|
||||
includeWebSearch: true,
|
||||
maxSources: 5
|
||||
});
|
||||
|
||||
console.log('OpenAI Web Search Research:');
|
||||
console.log('- Answer length:', result.answer.length);
|
||||
console.log('- Sources:', result.sources.length);
|
||||
if (result.searchQueries) {
|
||||
console.log('- Search queries used:', result.searchQueries);
|
||||
}
|
||||
|
||||
await saveResearchResult('web_search_ecmascript', result);
|
||||
|
||||
expect(result.answer).toBeTruthy();
|
||||
expect(result.answer.toLowerCase()).toInclude('ecmascript');
|
||||
|
||||
// The model might include sources or search queries
|
||||
if (result.sources.length > 0) {
|
||||
expect(result.sources[0]).toHaveProperty('url');
|
||||
expect(result.sources[0]).toHaveProperty('title');
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('OpenAI Research: should handle deep research for complex topics', async () => {
|
||||
// Skip this test if it takes too long or costs too much
|
||||
// You can enable it for thorough testing
|
||||
const skipDeepResearch = true;
|
||||
|
||||
if (skipDeepResearch) {
|
||||
console.log('Skipping deep research test to save API costs');
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await openaiProvider.research({
|
||||
query: 'Compare the pros and cons of microservices vs monolithic architecture',
|
||||
searchDepth: 'deep',
|
||||
includeWebSearch: true
|
||||
});
|
||||
|
||||
console.log('OpenAI Deep Research:');
|
||||
console.log('- Answer length:', result.answer.length);
|
||||
console.log('- Token usage:', result.metadata?.tokensUsed);
|
||||
|
||||
expect(result.answer).toBeTruthy();
|
||||
expect(result.answer.length).toBeGreaterThan(500);
|
||||
expect(result.answer.toLowerCase()).toInclude('microservices');
|
||||
expect(result.answer.toLowerCase()).toInclude('monolithic');
|
||||
});
|
||||
|
||||
tap.test('OpenAI Research: should extract sources from markdown links', async () => {
|
||||
const result = await openaiProvider.research({
|
||||
query: 'What is Node.js and provide some official documentation links?',
|
||||
searchDepth: 'basic',
|
||||
maxSources: 3
|
||||
});
|
||||
|
||||
console.log('OpenAI Source Extraction:');
|
||||
console.log('- Sources found:', result.sources.length);
|
||||
|
||||
await saveResearchResult('source_extraction_nodejs', result);
|
||||
|
||||
if (result.sources.length > 0) {
|
||||
console.log('- Example source:', result.sources[0]);
|
||||
expect(result.sources[0].url).toBeTruthy();
|
||||
expect(result.sources[0].title).toBeTruthy();
|
||||
}
|
||||
|
||||
expect(result.answer).toInclude('Node.js');
|
||||
});
|
||||
|
||||
tap.test('OpenAI Research: should handle research errors gracefully', async () => {
|
||||
// Test with an extremely long query that might cause issues
|
||||
const longQuery = 'a'.repeat(10000);
|
||||
|
||||
let errorCaught = false;
|
||||
try {
|
||||
await openaiProvider.research({
|
||||
query: longQuery,
|
||||
searchDepth: 'basic'
|
||||
});
|
||||
} catch (error) {
|
||||
errorCaught = true;
|
||||
console.log('Expected error for long query:', error.message.substring(0, 100));
|
||||
expect(error.message).toBeTruthy();
|
||||
}
|
||||
|
||||
// OpenAI might handle long queries, so we don't assert the error
|
||||
console.log(`Long query error test - Error caught: ${errorCaught}`);
|
||||
});
|
||||
|
||||
tap.test('OpenAI Research: should respect maxSources parameter', async () => {
|
||||
const maxSources = 3;
|
||||
const result = await openaiProvider.research({
|
||||
query: 'List popular JavaScript frameworks',
|
||||
searchDepth: 'basic',
|
||||
maxSources: maxSources
|
||||
});
|
||||
|
||||
console.log(`OpenAI Max Sources Test - Requested: ${maxSources}, Found: ${result.sources.length}`);
|
||||
|
||||
// The API might not always return exactly maxSources, but should respect it as a limit
|
||||
if (result.sources.length > 0) {
|
||||
expect(result.sources.length).toBeLessThanOrEqual(maxSources * 2); // Allow some flexibility
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('OpenAI Research: should clean up provider', async () => {
|
||||
await openaiProvider.stop();
|
||||
console.log('OpenAI research provider stopped successfully');
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,80 +0,0 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
// Test research method stubs for providers without full implementation
|
||||
// These providers have research methods that throw "not yet supported" errors
|
||||
|
||||
tap.test('Research Stubs: Perplexity provider should have research method', async () => {
|
||||
const perplexityProvider = new smartai.PerplexityProvider({
|
||||
perplexityToken: 'test-token'
|
||||
});
|
||||
|
||||
// Perplexity has a basic implementation with Sonar models
|
||||
expect(typeof perplexityProvider.research).toEqual('function');
|
||||
});
|
||||
|
||||
tap.test('Research Stubs: Groq provider should throw not supported error', async () => {
|
||||
const groqProvider = new smartai.GroqProvider({
|
||||
groqToken: 'test-token'
|
||||
});
|
||||
|
||||
expect(typeof groqProvider.research).toEqual('function');
|
||||
|
||||
let errorCaught = false;
|
||||
try {
|
||||
await groqProvider.research({ query: 'test' });
|
||||
} catch (error) {
|
||||
errorCaught = true;
|
||||
expect(error.message).toInclude('not yet supported');
|
||||
}
|
||||
expect(errorCaught).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('Research Stubs: Ollama provider should throw not supported error', async () => {
|
||||
const ollamaProvider = new smartai.OllamaProvider({});
|
||||
|
||||
expect(typeof ollamaProvider.research).toEqual('function');
|
||||
|
||||
let errorCaught = false;
|
||||
try {
|
||||
await ollamaProvider.research({ query: 'test' });
|
||||
} catch (error) {
|
||||
errorCaught = true;
|
||||
expect(error.message).toInclude('not yet supported');
|
||||
}
|
||||
expect(errorCaught).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('Research Stubs: xAI provider should throw not supported error', async () => {
|
||||
const xaiProvider = new smartai.XAIProvider({
|
||||
xaiToken: 'test-token'
|
||||
});
|
||||
|
||||
expect(typeof xaiProvider.research).toEqual('function');
|
||||
|
||||
let errorCaught = false;
|
||||
try {
|
||||
await xaiProvider.research({ query: 'test' });
|
||||
} catch (error) {
|
||||
errorCaught = true;
|
||||
expect(error.message).toInclude('not yet supported');
|
||||
}
|
||||
expect(errorCaught).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('Research Stubs: Exo provider should throw not supported error', async () => {
|
||||
const exoProvider = new smartai.ExoProvider({});
|
||||
|
||||
expect(typeof exoProvider.research).toEqual('function');
|
||||
|
||||
let errorCaught = false;
|
||||
try {
|
||||
await exoProvider.research({ query: 'test' });
|
||||
} catch (error) {
|
||||
errorCaught = true;
|
||||
expect(error.message).toInclude('not yet supported');
|
||||
}
|
||||
expect(errorCaught).toBeTrue();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
31
test/test.research.ts
Normal file
31
test/test.research.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import { research } from '../ts_research/index.js';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
tap.test('research should return answer and sources', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('ANTHROPIC_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await research({
|
||||
apiKey,
|
||||
query: 'What is the current version of Node.js?',
|
||||
searchDepth: 'basic',
|
||||
});
|
||||
|
||||
console.log('Research answer:', result.answer.substring(0, 200));
|
||||
console.log('Research sources:', result.sources.length);
|
||||
if (result.searchQueries) {
|
||||
console.log('Search queries:', result.searchQueries);
|
||||
}
|
||||
|
||||
expect(result.answer).toBeTruthy();
|
||||
expect(result.answer.length).toBeGreaterThan(10);
|
||||
expect(result.sources).toBeArray();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
161
test/test.smartai.ts
Normal file
161
test/test.smartai.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
tap.test('getModel should return a LanguageModelV3 for anthropic', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('ANTHROPIC_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const model = smartai.getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey,
|
||||
});
|
||||
|
||||
expect(model).toHaveProperty('specificationVersion');
|
||||
expect(model).toHaveProperty('provider');
|
||||
expect(model).toHaveProperty('modelId');
|
||||
expect(model).toHaveProperty('doGenerate');
|
||||
expect(model).toHaveProperty('doStream');
|
||||
});
|
||||
|
||||
tap.test('getModel with anthropic prompt caching returns wrapped model', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('ANTHROPIC_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
// Default: prompt caching enabled
|
||||
const model = smartai.getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey,
|
||||
});
|
||||
|
||||
// With caching disabled
|
||||
const modelNoCaching = smartai.getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey,
|
||||
promptCaching: false,
|
||||
});
|
||||
|
||||
// Both should be valid models
|
||||
expect(model).toHaveProperty('doGenerate');
|
||||
expect(modelNoCaching).toHaveProperty('doGenerate');
|
||||
});
|
||||
|
||||
tap.test('generateText with anthropic model', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('ANTHROPIC_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const model = smartai.getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey,
|
||||
});
|
||||
|
||||
const result = await smartai.generateText({
|
||||
model,
|
||||
prompt: 'Say hello in exactly 3 words.',
|
||||
});
|
||||
|
||||
console.log('Anthropic response:', result.text);
|
||||
expect(result.text).toBeTruthy();
|
||||
expect(result.text.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
tap.test('getModel should return a LanguageModelV3 for openai', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('OPENAI_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('OPENAI_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const model = smartai.getModel({
|
||||
provider: 'openai',
|
||||
model: 'gpt-4o-mini',
|
||||
apiKey,
|
||||
});
|
||||
|
||||
expect(model).toHaveProperty('doGenerate');
|
||||
expect(model).toHaveProperty('doStream');
|
||||
});
|
||||
|
||||
tap.test('streamText with anthropic model', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('ANTHROPIC_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const model = smartai.getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey,
|
||||
});
|
||||
|
||||
const result = await smartai.streamText({
|
||||
model,
|
||||
prompt: 'Count from 1 to 5.',
|
||||
});
|
||||
|
||||
const tokens: string[] = [];
|
||||
for await (const chunk of result.textStream) {
|
||||
tokens.push(chunk);
|
||||
}
|
||||
|
||||
const fullText = tokens.join('');
|
||||
console.log('Streamed text:', fullText);
|
||||
expect(fullText).toBeTruthy();
|
||||
expect(fullText.length).toBeGreaterThan(0);
|
||||
expect(tokens.length).toBeGreaterThan(1); // Should have multiple chunks
|
||||
});
|
||||
|
||||
tap.test('generateText with openai model', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('OPENAI_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('OPENAI_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const model = smartai.getModel({
|
||||
provider: 'openai',
|
||||
model: 'gpt-4o-mini',
|
||||
apiKey,
|
||||
});
|
||||
|
||||
const result = await smartai.generateText({
|
||||
model,
|
||||
prompt: 'What is 2+2? Reply with just the number.',
|
||||
});
|
||||
|
||||
console.log('OpenAI response:', result.text);
|
||||
expect(result.text).toBeTruthy();
|
||||
expect(result.text).toInclude('4');
|
||||
});
|
||||
|
||||
tap.test('getModel should throw for unknown provider', async () => {
|
||||
let threw = false;
|
||||
try {
|
||||
smartai.getModel({
|
||||
provider: 'nonexistent' as any,
|
||||
model: 'test',
|
||||
});
|
||||
} catch (e) {
|
||||
threw = true;
|
||||
expect(e.message).toInclude('Unknown provider');
|
||||
}
|
||||
expect(threw).toBeTrue();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,151 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let anthropicProviderQuick: smartai.AnthropicProvider;
|
||||
let anthropicProviderNormal: smartai.AnthropicProvider;
|
||||
let anthropicProviderDeep: smartai.AnthropicProvider;
|
||||
let anthropicProviderOff: smartai.AnthropicProvider;
|
||||
|
||||
// Test 'quick' mode
|
||||
tap.test('Extended Thinking: should create Anthropic provider with quick mode', async () => {
|
||||
anthropicProviderQuick = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
extendedThinking: 'quick',
|
||||
});
|
||||
await anthropicProviderQuick.start();
|
||||
expect(anthropicProviderQuick).toBeInstanceOf(smartai.AnthropicProvider);
|
||||
});
|
||||
|
||||
tap.test('Extended Thinking: should chat with quick mode (2048 tokens)', async () => {
|
||||
const userMessage = 'Explain quantum entanglement in simple terms.';
|
||||
const response = await anthropicProviderQuick.chat({
|
||||
systemMessage: 'You are a helpful physics teacher.',
|
||||
userMessage: userMessage,
|
||||
messageHistory: [],
|
||||
});
|
||||
console.log(`Quick Mode - User: ${userMessage}`);
|
||||
console.log(`Quick Mode - Response length: ${response.message.length} chars`);
|
||||
expect(response.role).toEqual('assistant');
|
||||
expect(response.message).toBeTruthy();
|
||||
expect(response.message.toLowerCase()).toInclude('quantum');
|
||||
});
|
||||
|
||||
tap.test('Extended Thinking: should stop quick mode provider', async () => {
|
||||
await anthropicProviderQuick.stop();
|
||||
});
|
||||
|
||||
// Test 'normal' mode (default)
|
||||
tap.test('Extended Thinking: should create Anthropic provider with normal mode (default)', async () => {
|
||||
anthropicProviderNormal = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
// extendedThinking not specified, should default to 'normal'
|
||||
});
|
||||
await anthropicProviderNormal.start();
|
||||
expect(anthropicProviderNormal).toBeInstanceOf(smartai.AnthropicProvider);
|
||||
});
|
||||
|
||||
tap.test('Extended Thinking: should chat with normal mode (8000 tokens default)', async () => {
|
||||
const userMessage = 'What are the implications of the P vs NP problem?';
|
||||
const response = await anthropicProviderNormal.chat({
|
||||
systemMessage: 'You are a helpful computer science expert.',
|
||||
userMessage: userMessage,
|
||||
messageHistory: [],
|
||||
});
|
||||
console.log(`Normal Mode - User: ${userMessage}`);
|
||||
console.log(`Normal Mode - Response length: ${response.message.length} chars`);
|
||||
expect(response.role).toEqual('assistant');
|
||||
expect(response.message).toBeTruthy();
|
||||
expect(response.message.length).toBeGreaterThan(50);
|
||||
});
|
||||
|
||||
tap.test('Extended Thinking: should stop normal mode provider', async () => {
|
||||
await anthropicProviderNormal.stop();
|
||||
});
|
||||
|
||||
// Test 'deep' mode
|
||||
tap.test('Extended Thinking: should create Anthropic provider with deep mode', async () => {
|
||||
anthropicProviderDeep = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
extendedThinking: 'deep',
|
||||
});
|
||||
await anthropicProviderDeep.start();
|
||||
expect(anthropicProviderDeep).toBeInstanceOf(smartai.AnthropicProvider);
|
||||
});
|
||||
|
||||
tap.test('Extended Thinking: should chat with deep mode (16000 tokens)', async () => {
|
||||
const userMessage = 'Analyze the philosophical implications of artificial consciousness.';
|
||||
const response = await anthropicProviderDeep.chat({
|
||||
systemMessage: 'You are a philosopher and cognitive scientist.',
|
||||
userMessage: userMessage,
|
||||
messageHistory: [],
|
||||
});
|
||||
console.log(`Deep Mode - User: ${userMessage}`);
|
||||
console.log(`Deep Mode - Response length: ${response.message.length} chars`);
|
||||
expect(response.role).toEqual('assistant');
|
||||
expect(response.message).toBeTruthy();
|
||||
expect(response.message.length).toBeGreaterThan(100);
|
||||
});
|
||||
|
||||
tap.test('Extended Thinking: should stop deep mode provider', async () => {
|
||||
await anthropicProviderDeep.stop();
|
||||
});
|
||||
|
||||
// Test 'off' mode
|
||||
tap.test('Extended Thinking: should create Anthropic provider with thinking disabled', async () => {
|
||||
anthropicProviderOff = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
extendedThinking: 'off',
|
||||
});
|
||||
await anthropicProviderOff.start();
|
||||
expect(anthropicProviderOff).toBeInstanceOf(smartai.AnthropicProvider);
|
||||
});
|
||||
|
||||
tap.test('Extended Thinking: should chat with thinking disabled', async () => {
|
||||
const userMessage = 'What is 2 + 2?';
|
||||
const response = await anthropicProviderOff.chat({
|
||||
systemMessage: 'You are a helpful assistant.',
|
||||
userMessage: userMessage,
|
||||
messageHistory: [],
|
||||
});
|
||||
console.log(`Thinking Off - User: ${userMessage}`);
|
||||
console.log(`Thinking Off - Response: ${response.message}`);
|
||||
expect(response.role).toEqual('assistant');
|
||||
expect(response.message).toBeTruthy();
|
||||
expect(response.message).toInclude('4');
|
||||
});
|
||||
|
||||
tap.test('Extended Thinking: should stop off mode provider', async () => {
|
||||
await anthropicProviderOff.stop();
|
||||
});
|
||||
|
||||
// Test with vision method
|
||||
tap.test('Extended Thinking: should work with vision method', async () => {
|
||||
const provider = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
extendedThinking: 'normal',
|
||||
});
|
||||
await provider.start();
|
||||
|
||||
// Create a simple test image (1x1 red pixel PNG)
|
||||
const redPixelPng = Buffer.from(
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==',
|
||||
'base64'
|
||||
);
|
||||
|
||||
const response = await provider.vision({
|
||||
image: redPixelPng,
|
||||
prompt: 'What color is this image?',
|
||||
});
|
||||
|
||||
console.log(`Vision with Thinking - Response: ${response}`);
|
||||
expect(response).toBeTruthy();
|
||||
expect(response.toLowerCase()).toInclude('red');
|
||||
|
||||
await provider.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -1,96 +0,0 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
const smartfs = new SmartFs(new SmartFsProviderNode());
|
||||
|
||||
import * as smartai from '../ts/index.js';
|
||||
|
||||
let anthropicProvider: smartai.AnthropicProvider;
|
||||
|
||||
tap.test('Anthropic Vision: should create and start Anthropic provider', async () => {
|
||||
anthropicProvider = new smartai.AnthropicProvider({
|
||||
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
||||
});
|
||||
await anthropicProvider.start();
|
||||
expect(anthropicProvider).toBeInstanceOf(smartai.AnthropicProvider);
|
||||
});
|
||||
|
||||
tap.test('Anthropic Vision: should analyze coffee image with latte art', async () => {
|
||||
// Test 1: Coffee image from Unsplash by Dani
|
||||
const imagePath = './test/testimages/coffee-dani/coffee.jpg';
|
||||
console.log(`Loading coffee image from: ${imagePath}`);
|
||||
|
||||
const imageBuffer = await smartfs.file(imagePath).read();
|
||||
console.log(`Image loaded, size: ${imageBuffer.length} bytes`);
|
||||
|
||||
const result = await anthropicProvider.vision({
|
||||
image: imageBuffer,
|
||||
prompt: 'Describe this coffee image. What do you see in terms of the cup, foam pattern, and overall composition?'
|
||||
});
|
||||
|
||||
console.log(`Anthropic Vision (Coffee) - Result: ${result}`);
|
||||
expect(result).toBeTruthy();
|
||||
expect(typeof result).toEqual('string');
|
||||
expect(result.toLowerCase()).toInclude('coffee');
|
||||
// The image has a heart pattern in the latte art
|
||||
const mentionsLatte = result.toLowerCase().includes('heart') ||
|
||||
result.toLowerCase().includes('latte') ||
|
||||
result.toLowerCase().includes('foam');
|
||||
expect(mentionsLatte).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('Anthropic Vision: should analyze laptop/workspace image', async () => {
|
||||
// Test 2: Laptop image from Unsplash by Nicolas Bichon
|
||||
const imagePath = './test/testimages/laptop-nicolas/laptop.jpg';
|
||||
console.log(`Loading laptop image from: ${imagePath}`);
|
||||
|
||||
const imageBuffer = await smartfs.file(imagePath).read();
|
||||
console.log(`Image loaded, size: ${imageBuffer.length} bytes`);
|
||||
|
||||
const result = await anthropicProvider.vision({
|
||||
image: imageBuffer,
|
||||
prompt: 'Describe the technology and workspace setup in this image. What devices and equipment can you see?'
|
||||
});
|
||||
|
||||
console.log(`Anthropic Vision (Laptop) - Result: ${result}`);
|
||||
expect(result).toBeTruthy();
|
||||
expect(typeof result).toEqual('string');
|
||||
// Should mention laptop, computer, keyboard, or desk
|
||||
const mentionsTech = result.toLowerCase().includes('laptop') ||
|
||||
result.toLowerCase().includes('computer') ||
|
||||
result.toLowerCase().includes('keyboard') ||
|
||||
result.toLowerCase().includes('desk');
|
||||
expect(mentionsTech).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('Anthropic Vision: should analyze receipt/document image', async () => {
|
||||
// Test 3: Receipt image from Unsplash by Annie Spratt
|
||||
const imagePath = './test/testimages/receipt-annie/receipt.jpg';
|
||||
console.log(`Loading receipt image from: ${imagePath}`);
|
||||
|
||||
const imageBuffer = await smartfs.file(imagePath).read();
|
||||
console.log(`Image loaded, size: ${imageBuffer.length} bytes`);
|
||||
|
||||
const result = await anthropicProvider.vision({
|
||||
image: imageBuffer,
|
||||
prompt: 'What type of document is this? Can you identify any text or numbers visible in the image?'
|
||||
});
|
||||
|
||||
console.log(`Anthropic Vision (Receipt) - Result: ${result}`);
|
||||
expect(result).toBeTruthy();
|
||||
expect(typeof result).toEqual('string');
|
||||
// Should mention receipt, document, text, or paper
|
||||
const mentionsDocument = result.toLowerCase().includes('receipt') ||
|
||||
result.toLowerCase().includes('document') ||
|
||||
result.toLowerCase().includes('text') ||
|
||||
result.toLowerCase().includes('paper');
|
||||
expect(mentionsDocument).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('Anthropic Vision: should stop the provider', async () => {
|
||||
await anthropicProvider.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
66
test/test.vision.ts
Normal file
66
test/test.vision.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { getModel } from '../ts/index.js';
|
||||
import { analyzeImage } from '../ts_vision/index.js';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
tap.test('analyzeImage should describe a test image', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('ANTHROPIC_TOKEN not set, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
// Find an image file recursively in testimages/
|
||||
const testImageDir = path.join(process.cwd(), 'test', 'testimages');
|
||||
if (!fs.existsSync(testImageDir)) {
|
||||
console.log('No test images directory found, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const findImage = (dir: string): string | null => {
|
||||
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||||
const fullPath = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
const found = findImage(fullPath);
|
||||
if (found) return found;
|
||||
} else if (/\.(jpg|jpeg|png)$/i.test(entry.name)) {
|
||||
return fullPath;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const imagePath = findImage(testImageDir);
|
||||
if (!imagePath) {
|
||||
console.log('No test images found, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const imageBuffer = fs.readFileSync(imagePath);
|
||||
const ext = path.extname(imagePath).toLowerCase();
|
||||
const mediaType = ext === '.png' ? 'image/png' : 'image/jpeg';
|
||||
|
||||
const model = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey,
|
||||
promptCaching: false,
|
||||
});
|
||||
|
||||
const result = await analyzeImage({
|
||||
model,
|
||||
image: imageBuffer,
|
||||
prompt: 'Describe this image briefly.',
|
||||
mediaType: mediaType as 'image/jpeg' | 'image/png',
|
||||
});
|
||||
|
||||
console.log('Vision result:', result);
|
||||
expect(result).toBeTruthy();
|
||||
expect(result.length).toBeGreaterThan(10);
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/smartai',
|
||||
version: '0.13.3',
|
||||
description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.'
|
||||
version: '2.0.0',
|
||||
description: 'Provider registry and capability utilities for ai-sdk (Vercel AI SDK). Core export returns LanguageModel; subpath exports provide vision, audio, image, document and research capabilities.'
|
||||
}
|
||||
|
||||
@@ -1,240 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
/**
|
||||
* Message format for chat interactions
|
||||
*/
|
||||
export interface ChatMessage {
|
||||
role: 'assistant' | 'user' | 'system';
|
||||
content: string;
|
||||
/** Base64-encoded images for vision-capable models */
|
||||
images?: string[];
|
||||
/** Chain-of-thought reasoning for GPT-OSS models (e.g., Ollama) */
|
||||
reasoning?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for chat interactions
|
||||
*/
|
||||
export interface ChatOptions {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
messageHistory: ChatMessage[];
|
||||
/** Base64-encoded images for the current message (vision-capable models) */
|
||||
images?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for streaming chat interactions
|
||||
*/
|
||||
export interface StreamingChatOptions extends ChatOptions {
|
||||
/** Callback fired for each token during generation */
|
||||
onToken?: (token: string) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response format for chat interactions
|
||||
*/
|
||||
export interface ChatResponse {
|
||||
role: 'assistant';
|
||||
message: string;
|
||||
/** Chain-of-thought reasoning from reasoning models */
|
||||
reasoning?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for research interactions
|
||||
*/
|
||||
export interface ResearchOptions {
|
||||
query: string;
|
||||
searchDepth?: 'basic' | 'advanced' | 'deep';
|
||||
maxSources?: number;
|
||||
includeWebSearch?: boolean;
|
||||
background?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response format for research interactions
|
||||
*/
|
||||
export interface ResearchResponse {
|
||||
answer: string;
|
||||
sources: Array<{
|
||||
url: string;
|
||||
title: string;
|
||||
snippet: string;
|
||||
}>;
|
||||
searchQueries?: string[];
|
||||
metadata?: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for image generation
|
||||
*/
|
||||
export interface ImageGenerateOptions {
|
||||
prompt: string;
|
||||
model?: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2';
|
||||
quality?: 'low' | 'medium' | 'high' | 'standard' | 'hd' | 'auto';
|
||||
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | '1792x1024' | '1024x1792' | 'auto';
|
||||
style?: 'vivid' | 'natural';
|
||||
background?: 'transparent' | 'opaque' | 'auto';
|
||||
outputFormat?: 'png' | 'jpeg' | 'webp';
|
||||
outputCompression?: number; // 0-100 for webp/jpeg
|
||||
moderation?: 'low' | 'auto';
|
||||
n?: number; // Number of images to generate
|
||||
stream?: boolean;
|
||||
partialImages?: number; // 0-3 for streaming
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for image editing
|
||||
*/
|
||||
export interface ImageEditOptions {
|
||||
image: Buffer;
|
||||
prompt: string;
|
||||
mask?: Buffer;
|
||||
model?: 'gpt-image-1' | 'dall-e-2';
|
||||
quality?: 'low' | 'medium' | 'high' | 'standard' | 'auto';
|
||||
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | 'auto';
|
||||
background?: 'transparent' | 'opaque' | 'auto';
|
||||
outputFormat?: 'png' | 'jpeg' | 'webp';
|
||||
outputCompression?: number;
|
||||
n?: number;
|
||||
stream?: boolean;
|
||||
partialImages?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response format for image operations
|
||||
*/
|
||||
export interface ImageResponse {
|
||||
images: Array<{
|
||||
b64_json?: string;
|
||||
url?: string;
|
||||
revisedPrompt?: string;
|
||||
}>;
|
||||
metadata?: {
|
||||
model: string;
|
||||
quality?: string;
|
||||
size?: string;
|
||||
outputFormat?: string;
|
||||
tokensUsed?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract base class for multi-modal AI models.
|
||||
* Provides a common interface for different AI providers (OpenAI, Anthropic, Perplexity, Ollama)
|
||||
*/
|
||||
export abstract class MultiModalModel {
|
||||
/**
|
||||
* SmartPdf instance for document processing
|
||||
* Lazy-loaded only when PDF processing is needed to avoid starting browser unnecessarily
|
||||
*/
|
||||
protected smartpdfInstance: plugins.smartpdf.SmartPdf | null = null;
|
||||
|
||||
/**
|
||||
* Ensures SmartPdf instance is initialized and ready
|
||||
* Call this before using smartpdfInstance in document processing methods
|
||||
*/
|
||||
protected async ensureSmartpdfReady(): Promise<void> {
|
||||
if (!this.smartpdfInstance) {
|
||||
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
|
||||
await this.smartpdfInstance.start();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the model and any necessary resources
|
||||
* Should be called before using any other methods
|
||||
*/
|
||||
public async start(): Promise<void> {
|
||||
// SmartPdf is now lazy-loaded only when needed for PDF processing
|
||||
// This avoids starting a browser unless document() method is actually used
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans up any resources used by the model
|
||||
* Should be called when the model is no longer needed
|
||||
*/
|
||||
public async stop(): Promise<void> {
|
||||
if (this.smartpdfInstance) {
|
||||
await this.smartpdfInstance.stop();
|
||||
this.smartpdfInstance = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronous chat interaction with the model
|
||||
* @param optionsArg Options containing system message, user message, and message history
|
||||
* @returns Promise resolving to the assistant's response
|
||||
*/
|
||||
public abstract chat(optionsArg: ChatOptions): Promise<ChatResponse>;
|
||||
|
||||
/**
|
||||
* Streaming interface for chat interactions
|
||||
* Allows for real-time responses from the model
|
||||
* @param input Stream of user messages
|
||||
* @returns Stream of model responses
|
||||
*/
|
||||
public abstract chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>>;
|
||||
|
||||
/**
|
||||
* Streaming chat with token callback
|
||||
* Calls onToken for each token generated, returns final response
|
||||
* @param optionsArg Options containing system message, user message, message history, and onToken callback
|
||||
* @returns Promise resolving to the assistant's response
|
||||
*/
|
||||
public chatStreaming?(optionsArg: StreamingChatOptions): Promise<ChatResponse>;
|
||||
|
||||
/**
|
||||
* Text-to-speech conversion
|
||||
* @param optionsArg Options containing the message to convert to speech
|
||||
* @returns Promise resolving to a readable stream of audio data
|
||||
* @throws Error if the provider doesn't support audio generation
|
||||
*/
|
||||
public abstract audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream>;
|
||||
|
||||
/**
|
||||
* Vision-language processing
|
||||
* @param optionsArg Options containing the image and prompt for analysis
|
||||
* @returns Promise resolving to the model's description or analysis of the image
|
||||
* @throws Error if the provider doesn't support vision tasks
|
||||
*/
|
||||
public abstract vision(optionsArg: { image: Buffer; prompt: string }): Promise<string>;
|
||||
|
||||
/**
|
||||
* Document analysis and processing
|
||||
* @param optionsArg Options containing system message, user message, PDF documents, and message history
|
||||
* @returns Promise resolving to the model's analysis of the documents
|
||||
* @throws Error if the provider doesn't support document processing
|
||||
*/
|
||||
public abstract document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }>;
|
||||
|
||||
/**
|
||||
* Research and web search capabilities
|
||||
* @param optionsArg Options containing the research query and configuration
|
||||
* @returns Promise resolving to the research results with sources
|
||||
* @throws Error if the provider doesn't support research capabilities
|
||||
*/
|
||||
public abstract research(optionsArg: ResearchOptions): Promise<ResearchResponse>;
|
||||
|
||||
/**
|
||||
* Image generation from text prompts
|
||||
* @param optionsArg Options containing the prompt and generation parameters
|
||||
* @returns Promise resolving to the generated image(s)
|
||||
* @throws Error if the provider doesn't support image generation
|
||||
*/
|
||||
public abstract imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse>;
|
||||
|
||||
/**
|
||||
* Image editing and inpainting
|
||||
* @param optionsArg Options containing the image, prompt, and editing parameters
|
||||
* @returns Promise resolving to the edited image(s)
|
||||
* @throws Error if the provider doesn't support image editing
|
||||
*/
|
||||
public abstract imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse>;
|
||||
}
|
||||
@@ -1,176 +0,0 @@
|
||||
import type { SmartAi } from "./classes.smartai.js";
|
||||
import { OpenAiProvider } from "./provider.openai.js";
|
||||
|
||||
type TProcessFunction = (input: string) => Promise<string>;
|
||||
|
||||
export interface IConversationOptions {
|
||||
processFunction: TProcessFunction;
|
||||
}
|
||||
|
||||
/**
|
||||
* a conversation
|
||||
*/
|
||||
export class Conversation {
|
||||
// STATIC
|
||||
public static async createWithOpenAi(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.openaiProvider) {
|
||||
throw new Error('OpenAI provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithAnthropic(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.anthropicProvider) {
|
||||
throw new Error('Anthropic provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithPerplexity(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.perplexityProvider) {
|
||||
throw new Error('Perplexity provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithExo(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.exoProvider) {
|
||||
throw new Error('Exo provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithOllama(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.ollamaProvider) {
|
||||
throw new Error('Ollama provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithGroq(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.groqProvider) {
|
||||
throw new Error('Groq provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithMistral(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.mistralProvider) {
|
||||
throw new Error('Mistral provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithXai(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.xaiProvider) {
|
||||
throw new Error('XAI provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithElevenlabs(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.elevenlabsProvider) {
|
||||
throw new Error('ElevenLabs provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
// INSTANCE
|
||||
smartaiRef: SmartAi
|
||||
private systemMessage: string;
|
||||
private processFunction: TProcessFunction;
|
||||
private inputStreamWriter: WritableStreamDefaultWriter<string> | null = null;
|
||||
private outputStreamController: ReadableStreamDefaultController<string> | null = null;
|
||||
|
||||
constructor(smartairefArg: SmartAi, options: IConversationOptions) {
|
||||
this.processFunction = options.processFunction;
|
||||
}
|
||||
|
||||
public async setSystemMessage(systemMessageArg: string) {
|
||||
this.systemMessage = systemMessageArg;
|
||||
}
|
||||
|
||||
private setupOutputStream(): ReadableStream<string> {
|
||||
return new ReadableStream<string>({
|
||||
start: (controller) => {
|
||||
this.outputStreamController = controller;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private setupInputStream(): WritableStream<string> {
|
||||
const writableStream = new WritableStream<string>({
|
||||
write: async (chunk) => {
|
||||
const processedData = await this.processFunction(chunk);
|
||||
if (this.outputStreamController) {
|
||||
this.outputStreamController.enqueue(processedData);
|
||||
}
|
||||
},
|
||||
close: () => {
|
||||
this.outputStreamController?.close();
|
||||
},
|
||||
abort: (err) => {
|
||||
console.error('Stream aborted', err);
|
||||
this.outputStreamController?.error(err);
|
||||
}
|
||||
});
|
||||
return writableStream;
|
||||
}
|
||||
|
||||
public getInputStreamWriter(): WritableStreamDefaultWriter<string> {
|
||||
if (!this.inputStreamWriter) {
|
||||
const inputStream = this.setupInputStream();
|
||||
this.inputStreamWriter = inputStream.getWriter();
|
||||
}
|
||||
return this.inputStreamWriter;
|
||||
}
|
||||
|
||||
public getOutputStream(): ReadableStream<string> {
|
||||
return this.setupOutputStream();
|
||||
}
|
||||
}
|
||||
@@ -1,187 +0,0 @@
|
||||
import { Conversation } from './classes.conversation.js';
|
||||
import * as plugins from './plugins.js';
|
||||
import { AnthropicProvider } from './provider.anthropic.js';
|
||||
import { ElevenLabsProvider } from './provider.elevenlabs.js';
|
||||
import { MistralProvider } from './provider.mistral.js';
|
||||
import { OllamaProvider, type IOllamaModelOptions } from './provider.ollama.js';
|
||||
import { OpenAiProvider } from './provider.openai.js';
|
||||
import { PerplexityProvider } from './provider.perplexity.js';
|
||||
import { ExoProvider } from './provider.exo.js';
|
||||
import { GroqProvider } from './provider.groq.js';
|
||||
import { XAIProvider } from './provider.xai.js';
|
||||
|
||||
|
||||
export interface ISmartAiOptions {
|
||||
openaiToken?: string;
|
||||
anthropicToken?: string;
|
||||
perplexityToken?: string;
|
||||
groqToken?: string;
|
||||
mistralToken?: string;
|
||||
xaiToken?: string;
|
||||
elevenlabsToken?: string;
|
||||
exo?: {
|
||||
baseUrl?: string;
|
||||
apiKey?: string;
|
||||
};
|
||||
mistral?: {
|
||||
chatModel?: string;
|
||||
ocrModel?: string;
|
||||
tableFormat?: 'markdown' | 'html';
|
||||
};
|
||||
ollama?: {
|
||||
baseUrl?: string;
|
||||
model?: string;
|
||||
visionModel?: string;
|
||||
defaultOptions?: IOllamaModelOptions;
|
||||
defaultTimeout?: number;
|
||||
};
|
||||
elevenlabs?: {
|
||||
defaultVoiceId?: string;
|
||||
defaultModelId?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'mistral' | 'xai' | 'elevenlabs';
|
||||
|
||||
export class SmartAi {
|
||||
public options: ISmartAiOptions;
|
||||
|
||||
public openaiProvider: OpenAiProvider;
|
||||
public anthropicProvider: AnthropicProvider;
|
||||
public perplexityProvider: PerplexityProvider;
|
||||
public ollamaProvider: OllamaProvider;
|
||||
public exoProvider: ExoProvider;
|
||||
public groqProvider: GroqProvider;
|
||||
public mistralProvider: MistralProvider;
|
||||
public xaiProvider: XAIProvider;
|
||||
public elevenlabsProvider: ElevenLabsProvider;
|
||||
|
||||
constructor(optionsArg: ISmartAiOptions) {
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
if (this.options.openaiToken) {
|
||||
this.openaiProvider = new OpenAiProvider({
|
||||
openaiToken: this.options.openaiToken,
|
||||
});
|
||||
await this.openaiProvider.start();
|
||||
}
|
||||
if (this.options.anthropicToken) {
|
||||
this.anthropicProvider = new AnthropicProvider({
|
||||
anthropicToken: this.options.anthropicToken,
|
||||
});
|
||||
await this.anthropicProvider.start();
|
||||
}
|
||||
if (this.options.perplexityToken) {
|
||||
this.perplexityProvider = new PerplexityProvider({
|
||||
perplexityToken: this.options.perplexityToken,
|
||||
});
|
||||
await this.perplexityProvider.start();
|
||||
}
|
||||
if (this.options.groqToken) {
|
||||
this.groqProvider = new GroqProvider({
|
||||
groqToken: this.options.groqToken,
|
||||
});
|
||||
await this.groqProvider.start();
|
||||
}
|
||||
if (this.options.mistralToken) {
|
||||
this.mistralProvider = new MistralProvider({
|
||||
mistralToken: this.options.mistralToken,
|
||||
chatModel: this.options.mistral?.chatModel,
|
||||
ocrModel: this.options.mistral?.ocrModel,
|
||||
tableFormat: this.options.mistral?.tableFormat,
|
||||
});
|
||||
await this.mistralProvider.start();
|
||||
}
|
||||
if (this.options.xaiToken) {
|
||||
this.xaiProvider = new XAIProvider({
|
||||
xaiToken: this.options.xaiToken,
|
||||
});
|
||||
await this.xaiProvider.start();
|
||||
}
|
||||
if (this.options.elevenlabsToken) {
|
||||
this.elevenlabsProvider = new ElevenLabsProvider({
|
||||
elevenlabsToken: this.options.elevenlabsToken,
|
||||
defaultVoiceId: this.options.elevenlabs?.defaultVoiceId,
|
||||
defaultModelId: this.options.elevenlabs?.defaultModelId,
|
||||
});
|
||||
await this.elevenlabsProvider.start();
|
||||
}
|
||||
if (this.options.ollama) {
|
||||
this.ollamaProvider = new OllamaProvider({
|
||||
baseUrl: this.options.ollama.baseUrl,
|
||||
model: this.options.ollama.model,
|
||||
visionModel: this.options.ollama.visionModel,
|
||||
defaultOptions: this.options.ollama.defaultOptions,
|
||||
defaultTimeout: this.options.ollama.defaultTimeout,
|
||||
});
|
||||
await this.ollamaProvider.start();
|
||||
}
|
||||
if (this.options.exo) {
|
||||
this.exoProvider = new ExoProvider({
|
||||
exoBaseUrl: this.options.exo.baseUrl,
|
||||
apiKey: this.options.exo.apiKey,
|
||||
});
|
||||
await this.exoProvider.start();
|
||||
}
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
if (this.openaiProvider) {
|
||||
await this.openaiProvider.stop();
|
||||
}
|
||||
if (this.anthropicProvider) {
|
||||
await this.anthropicProvider.stop();
|
||||
}
|
||||
if (this.perplexityProvider) {
|
||||
await this.perplexityProvider.stop();
|
||||
}
|
||||
if (this.groqProvider) {
|
||||
await this.groqProvider.stop();
|
||||
}
|
||||
if (this.mistralProvider) {
|
||||
await this.mistralProvider.stop();
|
||||
}
|
||||
if (this.xaiProvider) {
|
||||
await this.xaiProvider.stop();
|
||||
}
|
||||
if (this.elevenlabsProvider) {
|
||||
await this.elevenlabsProvider.stop();
|
||||
}
|
||||
if (this.ollamaProvider) {
|
||||
await this.ollamaProvider.stop();
|
||||
}
|
||||
if (this.exoProvider) {
|
||||
await this.exoProvider.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new conversation
|
||||
*/
|
||||
createConversation(provider: TProvider) {
|
||||
switch (provider) {
|
||||
case 'exo':
|
||||
return Conversation.createWithExo(this);
|
||||
case 'openai':
|
||||
return Conversation.createWithOpenAi(this);
|
||||
case 'anthropic':
|
||||
return Conversation.createWithAnthropic(this);
|
||||
case 'perplexity':
|
||||
return Conversation.createWithPerplexity(this);
|
||||
case 'ollama':
|
||||
return Conversation.createWithOllama(this);
|
||||
case 'groq':
|
||||
return Conversation.createWithGroq(this);
|
||||
case 'mistral':
|
||||
return Conversation.createWithMistral(this);
|
||||
case 'xai':
|
||||
return Conversation.createWithXai(this);
|
||||
case 'elevenlabs':
|
||||
return Conversation.createWithElevenlabs(this);
|
||||
default:
|
||||
throw new Error('Provider not available');
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
import type { SmartAi } from './classes.smartai.js';
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
export class TTS {
|
||||
public static async createWithOpenAi(smartaiRef: SmartAi): Promise<TTS> {
|
||||
return new TTS(smartaiRef);
|
||||
}
|
||||
|
||||
// INSTANCE
|
||||
smartaiRef: SmartAi;
|
||||
|
||||
constructor(smartairefArg: SmartAi) {
|
||||
this.smartaiRef = smartairefArg;
|
||||
}
|
||||
}
|
||||
19
ts/index.ts
19
ts/index.ts
@@ -1,11 +1,8 @@
|
||||
export * from './classes.smartai.js';
|
||||
export * from './abstract.classes.multimodal.js';
|
||||
export * from './provider.openai.js';
|
||||
export * from './provider.anthropic.js';
|
||||
export * from './provider.perplexity.js';
|
||||
export * from './provider.groq.js';
|
||||
export * from './provider.mistral.js';
|
||||
export * from './provider.ollama.js';
|
||||
export * from './provider.xai.js';
|
||||
export * from './provider.exo.js';
|
||||
export * from './provider.elevenlabs.js';
|
||||
export { getModel } from './smartai.classes.smartai.js';
|
||||
export type { ISmartAiOptions, TProvider, IOllamaModelOptions, LanguageModelV3 } from './smartai.interfaces.js';
|
||||
export { createAnthropicCachingMiddleware } from './smartai.middleware.anthropic.js';
|
||||
export { createOllamaModel } from './smartai.provider.ollama.js';
|
||||
|
||||
// Re-export commonly used ai-sdk functions for consumer convenience
|
||||
export { generateText, streamText, tool, jsonSchema } from 'ai';
|
||||
export type { ModelMessage, ToolSet, StreamTextResult } from 'ai';
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
export const packageDir = plugins.path.join(plugins.smartpath.get.dirnameFromImportMetaUrl(import.meta.url), '../');
|
||||
export const nogitDir = plugins.path.join(packageDir, './.nogit');
|
||||
@@ -1,38 +1,22 @@
|
||||
// node native
|
||||
import * as path from 'path';
|
||||
// ai sdk core
|
||||
import { generateText, streamText, wrapLanguageModel, tool, jsonSchema } from 'ai';
|
||||
export { generateText, streamText, wrapLanguageModel, tool, jsonSchema };
|
||||
|
||||
// ai sdk providers
|
||||
import { createAnthropic } from '@ai-sdk/anthropic';
|
||||
import { createOpenAI } from '@ai-sdk/openai';
|
||||
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
||||
import { createGroq } from '@ai-sdk/groq';
|
||||
import { createMistral } from '@ai-sdk/mistral';
|
||||
import { createXai } from '@ai-sdk/xai';
|
||||
import { createPerplexity } from '@ai-sdk/perplexity';
|
||||
|
||||
export {
|
||||
path,
|
||||
}
|
||||
|
||||
// @push.rocks scope
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartarray from '@push.rocks/smartarray';
|
||||
import * as smartfs from '@push.rocks/smartfs';
|
||||
import * as smartpath from '@push.rocks/smartpath';
|
||||
import * as smartpdf from '@push.rocks/smartpdf';
|
||||
import * as smartpromise from '@push.rocks/smartpromise';
|
||||
import * as smartrequest from '@push.rocks/smartrequest';
|
||||
import * as webstream from '@push.rocks/webstream';
|
||||
|
||||
export {
|
||||
smartarray,
|
||||
qenv,
|
||||
smartfs,
|
||||
smartpath,
|
||||
smartpdf,
|
||||
smartpromise,
|
||||
smartrequest,
|
||||
webstream,
|
||||
}
|
||||
|
||||
// third party
|
||||
import * as anthropic from '@anthropic-ai/sdk';
|
||||
import * as mistralai from '@mistralai/mistralai';
|
||||
import * as openai from 'openai';
|
||||
|
||||
export {
|
||||
anthropic,
|
||||
mistralai,
|
||||
openai,
|
||||
}
|
||||
createAnthropic,
|
||||
createOpenAI,
|
||||
createGoogleGenerativeAI,
|
||||
createGroq,
|
||||
createMistral,
|
||||
createXai,
|
||||
createPerplexity,
|
||||
};
|
||||
|
||||
@@ -1,446 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
import type { ImageBlockParam, TextBlockParam } from '@anthropic-ai/sdk/resources/messages';
|
||||
|
||||
type ContentBlock = ImageBlockParam | TextBlockParam;
|
||||
|
||||
export interface IAnthropicProviderOptions {
|
||||
anthropicToken: string;
|
||||
enableWebSearch?: boolean;
|
||||
searchDomainAllowList?: string[];
|
||||
searchDomainBlockList?: string[];
|
||||
extendedThinking?: 'quick' | 'normal' | 'deep' | 'off';
|
||||
}
|
||||
|
||||
export class AnthropicProvider extends MultiModalModel {
|
||||
private options: IAnthropicProviderOptions;
|
||||
public anthropicApiClient: plugins.anthropic.default;
|
||||
|
||||
constructor(optionsArg: IAnthropicProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg // Ensure the token is stored
|
||||
}
|
||||
|
||||
async start() {
|
||||
await super.start();
|
||||
this.anthropicApiClient = new plugins.anthropic.default({
|
||||
apiKey: this.options.anthropicToken,
|
||||
});
|
||||
}
|
||||
|
||||
async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the thinking configuration based on provider options.
|
||||
* Defaults to 'normal' mode (8000 tokens) if not specified.
|
||||
*/
|
||||
private getThinkingConfig(): { type: 'enabled'; budget_tokens: number } | undefined {
|
||||
const mode = this.options.extendedThinking ?? 'normal';
|
||||
|
||||
const budgetMap = {
|
||||
quick: 2048,
|
||||
normal: 8000,
|
||||
deep: 16000,
|
||||
off: 0,
|
||||
};
|
||||
|
||||
const budget = budgetMap[mode];
|
||||
|
||||
return budget > 0 ? { type: 'enabled', budget_tokens: budget } : undefined;
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
async transform(chunk, controller) {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to Anthropic
|
||||
if (currentMessage) {
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
const stream = await this.anthropicApiClient.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
system: '',
|
||||
stream: true,
|
||||
max_tokens: 20000,
|
||||
...(thinkingConfig && { thinking: thinkingConfig }),
|
||||
});
|
||||
|
||||
// Process each chunk from Anthropic
|
||||
for await (const chunk of stream) {
|
||||
const content = chunk.delta?.text;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
// Convert message history to Anthropic format
|
||||
const messages = optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role === 'assistant' ? 'assistant' as const : 'user' as const,
|
||||
content: msg.content
|
||||
}));
|
||||
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
const result = await this.anthropicApiClient.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
system: optionsArg.systemMessage,
|
||||
messages: [
|
||||
...messages,
|
||||
{ role: 'user' as const, content: optionsArg.userMessage }
|
||||
],
|
||||
max_tokens: 20000,
|
||||
...(thinkingConfig && { thinking: thinkingConfig }),
|
||||
});
|
||||
|
||||
// Extract text content from the response
|
||||
let message = '';
|
||||
for (const block of result.content) {
|
||||
if ('text' in block) {
|
||||
message += block.text;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message,
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
// Anthropic does not provide an audio API, so this method is not implemented.
|
||||
throw new Error('Audio generation is not yet supported by Anthropic.');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
const base64Image = optionsArg.image.toString('base64');
|
||||
|
||||
const content: ContentBlock[] = [
|
||||
{
|
||||
type: 'text',
|
||||
text: optionsArg.prompt
|
||||
},
|
||||
{
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/jpeg',
|
||||
data: base64Image
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
const result = await this.anthropicApiClient.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content
|
||||
}],
|
||||
max_tokens: 10000,
|
||||
...(thinkingConfig && { thinking: thinkingConfig }),
|
||||
});
|
||||
|
||||
// Extract text content from the response
|
||||
let message = '';
|
||||
for (const block of result.content) {
|
||||
if ('text' in block) {
|
||||
message += block.text;
|
||||
}
|
||||
}
|
||||
return message;
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
// Ensure SmartPdf is initialized before processing documents
|
||||
await this.ensureSmartpdfReady();
|
||||
|
||||
// Convert PDF documents to images using SmartPDF
|
||||
let documentImageBytesArray: Uint8Array[] = [];
|
||||
|
||||
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
|
||||
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
|
||||
}
|
||||
|
||||
// Convert message history to Anthropic format
|
||||
const messages = optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role === 'assistant' ? 'assistant' as const : 'user' as const,
|
||||
content: msg.content
|
||||
}));
|
||||
|
||||
// Create content array with text and images
|
||||
const content: ContentBlock[] = [
|
||||
{
|
||||
type: 'text',
|
||||
text: optionsArg.userMessage
|
||||
}
|
||||
];
|
||||
|
||||
// Add each document page as an image
|
||||
for (const imageBytes of documentImageBytesArray) {
|
||||
content.push({
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/png',
|
||||
data: Buffer.from(imageBytes).toString('base64')
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
const result = await this.anthropicApiClient.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
system: optionsArg.systemMessage,
|
||||
messages: [
|
||||
...messages,
|
||||
{ role: 'user', content }
|
||||
],
|
||||
max_tokens: 20000,
|
||||
...(thinkingConfig && { thinking: thinkingConfig }),
|
||||
});
|
||||
|
||||
// Extract text content from the response
|
||||
let message = '';
|
||||
for (const block of result.content) {
|
||||
if ('text' in block) {
|
||||
message += block.text;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: message
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
// Prepare the messages for the research request
|
||||
const systemMessage = `You are a research assistant with web search capabilities.
|
||||
Provide comprehensive, well-researched answers with citations and sources.
|
||||
When searching the web, be thorough and cite your sources accurately.`;
|
||||
|
||||
try {
|
||||
// Build the tool configuration for web search
|
||||
const tools: any[] = [];
|
||||
|
||||
if (this.options.enableWebSearch) {
|
||||
const webSearchTool: any = {
|
||||
type: 'web_search_20250305',
|
||||
name: 'web_search'
|
||||
};
|
||||
|
||||
// Add optional parameters
|
||||
if (optionsArg.maxSources) {
|
||||
webSearchTool.max_uses = optionsArg.maxSources;
|
||||
}
|
||||
|
||||
if (this.options.searchDomainAllowList?.length) {
|
||||
webSearchTool.allowed_domains = this.options.searchDomainAllowList;
|
||||
} else if (this.options.searchDomainBlockList?.length) {
|
||||
webSearchTool.blocked_domains = this.options.searchDomainBlockList;
|
||||
}
|
||||
|
||||
tools.push(webSearchTool);
|
||||
}
|
||||
|
||||
// Configure the request based on search depth
|
||||
const maxTokens = optionsArg.searchDepth === 'deep' ? 20000 :
|
||||
optionsArg.searchDepth === 'advanced' ? 20000 : 20000;
|
||||
|
||||
// Add thinking configuration if enabled
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
|
||||
// Create the research request
|
||||
// Note: When thinking is enabled, temperature must be 1 (or omitted)
|
||||
const requestParams: any = {
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
system: systemMessage,
|
||||
messages: [
|
||||
{
|
||||
role: 'user' as const,
|
||||
content: optionsArg.query
|
||||
}
|
||||
],
|
||||
max_tokens: maxTokens,
|
||||
// Only set temperature when thinking is NOT enabled
|
||||
...(thinkingConfig ? {} : { temperature: 0.7 })
|
||||
};
|
||||
|
||||
// Add tools if web search is enabled
|
||||
if (tools.length > 0) {
|
||||
requestParams.tools = tools;
|
||||
}
|
||||
|
||||
// Add thinking configuration if enabled
|
||||
if (thinkingConfig) {
|
||||
requestParams.thinking = thinkingConfig;
|
||||
}
|
||||
|
||||
// Execute the research request
|
||||
const result = await this.anthropicApiClient.messages.create(requestParams);
|
||||
|
||||
// Extract the answer from content blocks
|
||||
let answer = '';
|
||||
const sources: Array<{ url: string; title: string; snippet: string }> = [];
|
||||
const searchQueries: string[] = [];
|
||||
|
||||
// Process content blocks
|
||||
for (const block of result.content) {
|
||||
if ('text' in block) {
|
||||
// Accumulate text content
|
||||
answer += block.text;
|
||||
|
||||
// Extract citations if present
|
||||
if ('citations' in block && Array.isArray(block.citations)) {
|
||||
for (const citation of block.citations) {
|
||||
if (citation.type === 'web_search_result_location') {
|
||||
sources.push({
|
||||
title: citation.title || '',
|
||||
url: citation.url || '',
|
||||
snippet: citation.cited_text || ''
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ('type' in block && block.type === 'server_tool_use') {
|
||||
// Extract search queries from server tool use
|
||||
if (block.name === 'web_search' && block.input && typeof block.input === 'object' && 'query' in block.input) {
|
||||
searchQueries.push((block.input as any).query);
|
||||
}
|
||||
} else if ('type' in block && block.type === 'web_search_tool_result') {
|
||||
// Extract sources from web search results
|
||||
if (Array.isArray(block.content)) {
|
||||
for (const result of block.content) {
|
||||
if (result.type === 'web_search_result') {
|
||||
// Only add if not already in sources (avoid duplicates from citations)
|
||||
if (!sources.some(s => s.url === result.url)) {
|
||||
sources.push({
|
||||
title: result.title || '',
|
||||
url: result.url || '',
|
||||
snippet: '' // Search results don't include snippets, only citations do
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Parse markdown-style links if no citations found
|
||||
if (sources.length === 0) {
|
||||
const urlRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = urlRegex.exec(answer)) !== null) {
|
||||
sources.push({
|
||||
title: match[1],
|
||||
url: match[2],
|
||||
snippet: ''
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Check if web search was used based on usage info
|
||||
const webSearchCount = result.usage?.server_tool_use?.web_search_requests || 0;
|
||||
|
||||
return {
|
||||
answer,
|
||||
sources,
|
||||
searchQueries: searchQueries.length > 0 ? searchQueries : undefined,
|
||||
metadata: {
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
searchDepth: optionsArg.searchDepth || 'basic',
|
||||
tokensUsed: result.usage?.output_tokens,
|
||||
webSearchesPerformed: webSearchCount
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Anthropic research error:', error);
|
||||
throw new Error(`Failed to perform research: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Anthropic
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Anthropic. Claude can only analyze images, not generate them. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Anthropic
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Anthropic. Claude can only analyze images, not edit them. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,116 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import { Readable } from 'stream';
|
||||
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IElevenLabsProviderOptions {
|
||||
elevenlabsToken: string;
|
||||
defaultVoiceId?: string;
|
||||
defaultModelId?: string;
|
||||
}
|
||||
|
||||
export interface IElevenLabsVoiceSettings {
|
||||
stability?: number;
|
||||
similarity_boost?: number;
|
||||
style?: number;
|
||||
use_speaker_boost?: boolean;
|
||||
}
|
||||
|
||||
export class ElevenLabsProvider extends MultiModalModel {
|
||||
private options: IElevenLabsProviderOptions;
|
||||
private baseUrl: string = 'https://api.elevenlabs.io/v1';
|
||||
|
||||
constructor(optionsArg: IElevenLabsProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
await super.start();
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
throw new Error('ElevenLabs does not support chat functionality. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
throw new Error('ElevenLabs does not support chat streaming functionality. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async audio(optionsArg: {
|
||||
message: string;
|
||||
voiceId?: string;
|
||||
modelId?: string;
|
||||
voiceSettings?: IElevenLabsVoiceSettings;
|
||||
}): Promise<NodeJS.ReadableStream> {
|
||||
// Use Samara voice as default fallback
|
||||
const voiceId = optionsArg.voiceId || this.options.defaultVoiceId || '19STyYD15bswVz51nqLf';
|
||||
|
||||
const modelId = optionsArg.modelId || this.options.defaultModelId || 'eleven_v3';
|
||||
|
||||
const url = `${this.baseUrl}/text-to-speech/${voiceId}`;
|
||||
|
||||
const requestBody: any = {
|
||||
text: optionsArg.message,
|
||||
model_id: modelId,
|
||||
};
|
||||
|
||||
if (optionsArg.voiceSettings) {
|
||||
requestBody.voice_settings = optionsArg.voiceSettings;
|
||||
}
|
||||
|
||||
const response = await plugins.smartrequest.SmartRequest.create()
|
||||
.url(url)
|
||||
.header('xi-api-key', this.options.elevenlabsToken)
|
||||
.json(requestBody)
|
||||
.autoDrain(false)
|
||||
.post();
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText} - ${errorText}`);
|
||||
}
|
||||
|
||||
const webStream = response.stream();
|
||||
const nodeStream = Readable.fromWeb(webStream as any);
|
||||
return nodeStream;
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('ElevenLabs does not support vision functionality. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: any[];
|
||||
}): Promise<{ message: any }> {
|
||||
throw new Error('ElevenLabs does not support document processing. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('ElevenLabs does not support research capabilities. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('ElevenLabs does not support image generation. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('ElevenLabs does not support image editing. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
}
|
||||
@@ -1,155 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
|
||||
|
||||
export interface IExoProviderOptions {
|
||||
exoBaseUrl?: string;
|
||||
apiKey?: string;
|
||||
}
|
||||
|
||||
export class ExoProvider extends MultiModalModel {
|
||||
private options: IExoProviderOptions;
|
||||
public openAiApiClient: plugins.openai.default;
|
||||
|
||||
constructor(optionsArg: IExoProviderOptions = {}) {
|
||||
super();
|
||||
this.options = {
|
||||
exoBaseUrl: 'http://localhost:8080/v1', // Default Exo API endpoint
|
||||
...optionsArg
|
||||
};
|
||||
}
|
||||
|
||||
public async start() {
|
||||
this.openAiApiClient = new plugins.openai.default({
|
||||
apiKey: this.options.apiKey || 'not-needed', // Exo might not require an API key for local deployment
|
||||
baseURL: this.options.exoBaseUrl,
|
||||
});
|
||||
}
|
||||
|
||||
public async stop() {}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
transform: async (chunk, controller) => {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = message;
|
||||
|
||||
// Process the message based on its type
|
||||
if (message.type === 'message') {
|
||||
const response = await this.chat({
|
||||
systemMessage: '',
|
||||
userMessage: message.content,
|
||||
messageHistory: [{ role: message.role as 'user' | 'assistant' | 'system', content: message.content }]
|
||||
});
|
||||
|
||||
controller.enqueue(JSON.stringify(response) + '\n');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error processing message:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
currentMessage = message;
|
||||
} catch (error) {
|
||||
console.error('Error processing remaining buffer:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
public async chat(options: ChatOptions): Promise<ChatResponse> {
|
||||
const messages: ChatCompletionMessageParam[] = [
|
||||
{ role: 'system', content: options.systemMessage },
|
||||
...options.messageHistory,
|
||||
{ role: 'user', content: options.userMessage }
|
||||
];
|
||||
|
||||
try {
|
||||
const response = await this.openAiApiClient.chat.completions.create({
|
||||
model: 'local-model', // Exo uses local models
|
||||
messages: messages,
|
||||
stream: false
|
||||
});
|
||||
|
||||
return {
|
||||
role: 'assistant',
|
||||
message: response.choices[0]?.message?.content || ''
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error in chat completion:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by Exo provider');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('Vision processing is not supported by Exo provider');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
throw new Error('Document processing is not supported by Exo provider');
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research capabilities are not yet supported by Exo provider.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Exo
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Exo. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Exo
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Exo. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,219 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IGroqProviderOptions {
|
||||
groqToken: string;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
export class GroqProvider extends MultiModalModel {
|
||||
private options: IGroqProviderOptions;
|
||||
private baseUrl = 'https://api.groq.com/v1';
|
||||
|
||||
constructor(optionsArg: IGroqProviderOptions) {
|
||||
super();
|
||||
this.options = {
|
||||
...optionsArg,
|
||||
model: optionsArg.model || 'llama-3.3-70b-versatile', // Default model
|
||||
};
|
||||
}
|
||||
|
||||
async start() {}
|
||||
|
||||
async stop() {}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
transform: async (chunk, controller) => {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to Groq
|
||||
if (currentMessage) {
|
||||
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.groqToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.options.model,
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
stream: true,
|
||||
}),
|
||||
});
|
||||
|
||||
// Process each chunk from Groq
|
||||
const reader = response.body?.getReader();
|
||||
if (reader) {
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = new TextDecoder().decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') break;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
const content = parsed.choices[0]?.delta?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to parse SSE data:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
const messages = [
|
||||
// System message
|
||||
{
|
||||
role: 'system',
|
||||
content: optionsArg.systemMessage,
|
||||
},
|
||||
// Message history
|
||||
...optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
})),
|
||||
// User message
|
||||
{
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
},
|
||||
];
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.groqToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.options.model,
|
||||
messages,
|
||||
temperature: 0.7,
|
||||
max_completion_tokens: 1024,
|
||||
stream: false,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(`Groq API error: ${error.message || response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
return {
|
||||
role: 'assistant',
|
||||
message: result.choices[0].message.content,
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
// Groq does not provide an audio API, so this method is not implemented.
|
||||
throw new Error('Audio generation is not yet supported by Groq.');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('Vision tasks are not yet supported by Groq.');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
throw new Error('Document processing is not yet supported by Groq.');
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research capabilities are not yet supported by Groq provider.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Groq
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Groq. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Groq
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Groq. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,352 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IMistralProviderOptions {
|
||||
mistralToken: string;
|
||||
chatModel?: string; // default: 'mistral-large-latest'
|
||||
ocrModel?: string; // default: 'mistral-ocr-latest'
|
||||
tableFormat?: 'markdown' | 'html';
|
||||
}
|
||||
|
||||
export class MistralProvider extends MultiModalModel {
|
||||
private options: IMistralProviderOptions;
|
||||
public mistralClient: plugins.mistralai.Mistral;
|
||||
|
||||
constructor(optionsArg: IMistralProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
async start() {
|
||||
await super.start();
|
||||
this.mistralClient = new plugins.mistralai.Mistral({
|
||||
apiKey: this.options.mistralToken,
|
||||
});
|
||||
}
|
||||
|
||||
async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronous chat interaction using Mistral's chat API
|
||||
*/
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
// Convert message history to Mistral format
|
||||
const messages: Array<{
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}> = [];
|
||||
|
||||
// Add system message first
|
||||
if (optionsArg.systemMessage) {
|
||||
messages.push({
|
||||
role: 'system',
|
||||
content: optionsArg.systemMessage
|
||||
});
|
||||
}
|
||||
|
||||
// Add message history
|
||||
for (const msg of optionsArg.messageHistory) {
|
||||
messages.push({
|
||||
role: msg.role === 'system' ? 'system' : msg.role === 'assistant' ? 'assistant' : 'user',
|
||||
content: msg.content
|
||||
});
|
||||
}
|
||||
|
||||
// Add current user message
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage
|
||||
});
|
||||
|
||||
const result = await this.mistralClient.chat.complete({
|
||||
model: this.options.chatModel || 'mistral-large-latest',
|
||||
messages: messages,
|
||||
});
|
||||
|
||||
// Extract content from response
|
||||
const choice = result.choices?.[0];
|
||||
let content = '';
|
||||
|
||||
if (choice?.message?.content) {
|
||||
if (typeof choice.message.content === 'string') {
|
||||
content = choice.message.content;
|
||||
} else if (Array.isArray(choice.message.content)) {
|
||||
// Handle array of content chunks
|
||||
content = choice.message.content
|
||||
.map((chunk: any) => {
|
||||
if (typeof chunk === 'string') return chunk;
|
||||
if (chunk && typeof chunk === 'object' && 'text' in chunk) return chunk.text;
|
||||
return '';
|
||||
})
|
||||
.join('');
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
role: 'assistant',
|
||||
message: content,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming chat using Mistral's streaming API
|
||||
*/
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
const mistralClient = this.mistralClient;
|
||||
const chatModel = this.options.chatModel || 'mistral-large-latest';
|
||||
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
async transform(chunk, controller) {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
|
||||
// Build messages array
|
||||
const messages: Array<{
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}> = [];
|
||||
|
||||
if (message.systemMessage) {
|
||||
messages.push({
|
||||
role: 'system',
|
||||
content: message.systemMessage
|
||||
});
|
||||
}
|
||||
|
||||
messages.push({
|
||||
role: message.role === 'assistant' ? 'assistant' : 'user',
|
||||
content: message.content
|
||||
});
|
||||
|
||||
// Use Mistral streaming
|
||||
const stream = await mistralClient.chat.stream({
|
||||
model: chatModel,
|
||||
messages: messages,
|
||||
});
|
||||
|
||||
// Process streaming events
|
||||
for await (const event of stream) {
|
||||
const delta = event.data?.choices?.[0]?.delta;
|
||||
if (delta?.content) {
|
||||
if (typeof delta.content === 'string') {
|
||||
controller.enqueue(delta.content);
|
||||
} else if (Array.isArray(delta.content)) {
|
||||
for (const chunk of delta.content) {
|
||||
if (typeof chunk === 'string') {
|
||||
controller.enqueue(chunk);
|
||||
} else if (chunk && typeof chunk === 'object' && 'text' in chunk) {
|
||||
controller.enqueue((chunk as any).text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
/**
|
||||
* Audio generation is not supported by Mistral
|
||||
*/
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by Mistral. Please use ElevenLabs or OpenAI provider for audio generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Vision using Mistral's OCR API for image analysis
|
||||
*/
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
const base64Image = optionsArg.image.toString('base64');
|
||||
|
||||
// Detect image type from buffer header
|
||||
let mimeType = 'image/jpeg';
|
||||
if (optionsArg.image[0] === 0x89 && optionsArg.image[1] === 0x50) {
|
||||
mimeType = 'image/png';
|
||||
} else if (optionsArg.image[0] === 0x47 && optionsArg.image[1] === 0x49) {
|
||||
mimeType = 'image/gif';
|
||||
} else if (optionsArg.image[0] === 0x52 && optionsArg.image[1] === 0x49) {
|
||||
mimeType = 'image/webp';
|
||||
}
|
||||
|
||||
// Use OCR API with image data URL
|
||||
const ocrResult = await this.mistralClient.ocr.process({
|
||||
model: this.options.ocrModel || 'mistral-ocr-latest',
|
||||
document: {
|
||||
imageUrl: `data:${mimeType};base64,${base64Image}`,
|
||||
type: 'image_url',
|
||||
},
|
||||
});
|
||||
|
||||
// Combine markdown from all pages
|
||||
const extractedText = ocrResult.pages.map(page => page.markdown).join('\n\n');
|
||||
|
||||
// If a prompt is provided, use chat to analyze the extracted text
|
||||
if (optionsArg.prompt && optionsArg.prompt.trim()) {
|
||||
const chatResponse = await this.chat({
|
||||
systemMessage: 'You are an assistant analyzing image content. The following is text extracted from an image using OCR.',
|
||||
userMessage: `${optionsArg.prompt}\n\nExtracted content:\n${extractedText}`,
|
||||
messageHistory: [],
|
||||
});
|
||||
return chatResponse.message;
|
||||
}
|
||||
|
||||
return extractedText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Document processing using Mistral's OCR API
|
||||
* PDFs are uploaded via Files API first, then processed with OCR
|
||||
*/
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
const extractedTexts: string[] = [];
|
||||
const uploadedFileIds: string[] = [];
|
||||
|
||||
try {
|
||||
// Process each PDF document using Mistral OCR
|
||||
for (let i = 0; i < optionsArg.pdfDocuments.length; i++) {
|
||||
const pdfDocument = optionsArg.pdfDocuments[i];
|
||||
|
||||
// Upload the PDF to Mistral's Files API first
|
||||
const uploadResult = await this.mistralClient.files.upload({
|
||||
file: {
|
||||
fileName: `document_${i + 1}.pdf`,
|
||||
content: pdfDocument,
|
||||
},
|
||||
purpose: 'ocr',
|
||||
});
|
||||
|
||||
uploadedFileIds.push(uploadResult.id);
|
||||
|
||||
// Now use OCR with the uploaded file
|
||||
const ocrResult = await this.mistralClient.ocr.process({
|
||||
model: this.options.ocrModel || 'mistral-ocr-latest',
|
||||
document: {
|
||||
type: 'file',
|
||||
fileId: uploadResult.id,
|
||||
},
|
||||
tableFormat: this.options.tableFormat || 'markdown',
|
||||
});
|
||||
|
||||
// Combine all page markdown with page separators
|
||||
const pageTexts = ocrResult.pages.map((page, index) => {
|
||||
let pageContent = `--- Page ${index + 1} ---\n${page.markdown}`;
|
||||
|
||||
// Include tables if present
|
||||
if (page.tables && page.tables.length > 0) {
|
||||
pageContent += '\n\n**Tables:**\n' + page.tables.map((t: any) => t.markdown || t.html || '').join('\n');
|
||||
}
|
||||
|
||||
// Include header/footer if present
|
||||
if (page.header) {
|
||||
pageContent = `Header: ${page.header}\n${pageContent}`;
|
||||
}
|
||||
if (page.footer) {
|
||||
pageContent += `\nFooter: ${page.footer}`;
|
||||
}
|
||||
|
||||
return pageContent;
|
||||
}).join('\n\n');
|
||||
|
||||
extractedTexts.push(pageTexts);
|
||||
}
|
||||
|
||||
// Combine all document texts
|
||||
const allDocumentText = extractedTexts.length === 1
|
||||
? extractedTexts[0]
|
||||
: extractedTexts.map((text, i) => `=== Document ${i + 1} ===\n${text}`).join('\n\n');
|
||||
|
||||
// Use chat API to process the extracted text with the user's query
|
||||
const chatResponse = await this.chat({
|
||||
systemMessage: optionsArg.systemMessage || 'You are a helpful assistant analyzing document content.',
|
||||
userMessage: `${optionsArg.userMessage}\n\n---\nDocument Content:\n${allDocumentText}`,
|
||||
messageHistory: optionsArg.messageHistory,
|
||||
});
|
||||
|
||||
return {
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: chatResponse.message
|
||||
}
|
||||
};
|
||||
} finally {
|
||||
// Clean up uploaded files
|
||||
for (const fileId of uploadedFileIds) {
|
||||
try {
|
||||
await this.mistralClient.files.delete({ fileId });
|
||||
} catch (cleanupError) {
|
||||
// Ignore cleanup errors - files may have already been auto-deleted
|
||||
console.warn(`Failed to delete temporary file ${fileId}:`, cleanupError);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Research is not natively supported by Mistral
|
||||
*/
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research/web search is not supported by Mistral. Please use Perplexity or Anthropic provider for research capabilities.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Mistral
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Mistral. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Mistral
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Mistral. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,705 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse,
|
||||
StreamingChatOptions
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
/**
|
||||
* Ollama model runtime options
|
||||
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
|
||||
*/
|
||||
export interface IOllamaModelOptions {
|
||||
num_ctx?: number; // Context window (default: 2048)
|
||||
temperature?: number; // 0 = deterministic (default: 0.8)
|
||||
top_k?: number; // Top-k sampling (default: 40)
|
||||
top_p?: number; // Nucleus sampling (default: 0.9)
|
||||
repeat_penalty?: number;// Repeat penalty (default: 1.1)
|
||||
num_predict?: number; // Max tokens to predict
|
||||
stop?: string[]; // Stop sequences
|
||||
seed?: number; // Random seed for reproducibility
|
||||
think?: boolean; // Enable thinking/reasoning mode (for GPT-OSS, QwQ, etc.)
|
||||
}
|
||||
|
||||
/**
|
||||
* JSON Schema tool definition for Ollama native tool calling
|
||||
* @see https://docs.ollama.com/capabilities/tool-calling
|
||||
*/
|
||||
export interface IOllamaTool {
|
||||
type: 'function';
|
||||
function: {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: {
|
||||
type: 'object';
|
||||
properties: Record<string, {
|
||||
type: string;
|
||||
description?: string;
|
||||
enum?: string[];
|
||||
}>;
|
||||
required?: string[];
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool call returned by model in native tool calling mode
|
||||
*/
|
||||
export interface IOllamaToolCall {
|
||||
function: {
|
||||
name: string;
|
||||
arguments: Record<string, unknown>;
|
||||
index?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface IOllamaProviderOptions {
|
||||
baseUrl?: string;
|
||||
model?: string;
|
||||
visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
|
||||
defaultOptions?: IOllamaModelOptions; // Default model options
|
||||
defaultTimeout?: number; // Default timeout in ms (default: 120000)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extended chat options with Ollama-specific settings
|
||||
*/
|
||||
export interface IOllamaChatOptions extends ChatOptions {
|
||||
options?: IOllamaModelOptions; // Per-request model options
|
||||
timeout?: number; // Per-request timeout in ms
|
||||
model?: string; // Per-request model override
|
||||
tools?: IOllamaTool[]; // Available tools for native function calling
|
||||
// images is inherited from ChatOptions
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk emitted during streaming
|
||||
*/
|
||||
export interface IOllamaStreamChunk {
|
||||
content: string;
|
||||
thinking?: string; // For models with extended thinking
|
||||
toolCalls?: IOllamaToolCall[]; // Tool calls in streaming mode
|
||||
done: boolean;
|
||||
stats?: {
|
||||
totalDuration?: number;
|
||||
evalCount?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extended chat response with Ollama-specific fields
|
||||
*/
|
||||
export interface IOllamaChatResponse extends ChatResponse {
|
||||
thinking?: string;
|
||||
toolCalls?: IOllamaToolCall[]; // Tool calls from model (native tool calling)
|
||||
stats?: {
|
||||
totalDuration?: number;
|
||||
evalCount?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export class OllamaProvider extends MultiModalModel {
|
||||
private options: IOllamaProviderOptions;
|
||||
private baseUrl: string;
|
||||
private model: string;
|
||||
private visionModel: string;
|
||||
private defaultOptions: IOllamaModelOptions;
|
||||
private defaultTimeout: number;
|
||||
|
||||
constructor(optionsArg: IOllamaProviderOptions = {}) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
|
||||
this.model = optionsArg.model || 'llama2';
|
||||
this.visionModel = optionsArg.visionModel || 'llava';
|
||||
this.defaultOptions = optionsArg.defaultOptions || {};
|
||||
this.defaultTimeout = optionsArg.defaultTimeout || 120000;
|
||||
}
|
||||
|
||||
async start() {
|
||||
await super.start();
|
||||
// Verify Ollama is running
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/tags`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to connect to Ollama server');
|
||||
}
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to connect to Ollama server at ${this.baseUrl}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
transform: async (chunk, controller) => {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to Ollama
|
||||
if (currentMessage) {
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
stream: true,
|
||||
}),
|
||||
});
|
||||
|
||||
// Process each chunk from Ollama
|
||||
const reader = response.body?.getReader();
|
||||
if (reader) {
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = new TextDecoder().decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const parsed = JSON.parse(line);
|
||||
const content = parsed.message?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to parse Ollama response:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
// Format messages for Ollama
|
||||
const historyMessages = optionsArg.messageHistory.map((msg) => {
|
||||
const formatted: { role: string; content: string; images?: string[]; reasoning?: string } = {
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
};
|
||||
if (msg.images && msg.images.length > 0) {
|
||||
formatted.images = msg.images;
|
||||
}
|
||||
if (msg.reasoning) {
|
||||
formatted.reasoning = msg.reasoning;
|
||||
}
|
||||
return formatted;
|
||||
});
|
||||
|
||||
// Build user message with optional images
|
||||
const userMessage: { role: string; content: string; images?: string[] } = {
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
};
|
||||
if (optionsArg.images && optionsArg.images.length > 0) {
|
||||
userMessage.images = optionsArg.images;
|
||||
}
|
||||
|
||||
const messages = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...historyMessages,
|
||||
userMessage,
|
||||
];
|
||||
|
||||
// Build request body - include think parameter if set
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model: this.model,
|
||||
messages: messages,
|
||||
stream: false,
|
||||
options: this.defaultOptions,
|
||||
};
|
||||
|
||||
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
|
||||
if (this.defaultOptions.think !== undefined) {
|
||||
requestBody.think = this.defaultOptions.think;
|
||||
}
|
||||
|
||||
// Make API call to Ollama with defaultOptions and timeout
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: AbortSignal.timeout(this.defaultTimeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: result.message.content,
|
||||
reasoning: result.message.thinking || result.message.reasoning,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming chat with token callback (implements MultiModalModel interface)
|
||||
* Calls onToken for each token generated during the response
|
||||
*/
|
||||
public async chatStreaming(optionsArg: StreamingChatOptions): Promise<ChatResponse> {
|
||||
const onToken = optionsArg.onToken;
|
||||
|
||||
// Use existing collectStreamResponse with callback, including images
|
||||
const response = await this.collectStreamResponse(
|
||||
{
|
||||
systemMessage: optionsArg.systemMessage,
|
||||
userMessage: optionsArg.userMessage,
|
||||
messageHistory: optionsArg.messageHistory,
|
||||
images: optionsArg.images,
|
||||
},
|
||||
(chunk) => {
|
||||
if (onToken) {
|
||||
if (chunk.thinking) onToken(chunk.thinking);
|
||||
if (chunk.content) onToken(chunk.content);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: response.message,
|
||||
reasoning: response.thinking,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming chat with async iteration and options support
|
||||
*/
|
||||
public async chatStreamResponse(
|
||||
optionsArg: IOllamaChatOptions
|
||||
): Promise<AsyncIterable<IOllamaStreamChunk>> {
|
||||
const model = optionsArg.model || this.model;
|
||||
const timeout = optionsArg.timeout || this.defaultTimeout;
|
||||
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
|
||||
|
||||
// Format history messages with optional images, reasoning, and tool_calls
|
||||
const historyMessages = optionsArg.messageHistory.map((msg) => {
|
||||
const formatted: { role: string; content: string; images?: string[]; reasoning?: string; tool_calls?: any[] } = {
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
};
|
||||
if (msg.images && msg.images.length > 0) {
|
||||
formatted.images = msg.images;
|
||||
}
|
||||
if (msg.reasoning) {
|
||||
formatted.reasoning = msg.reasoning;
|
||||
}
|
||||
// CRITICAL: Include tool_calls in history for native tool calling
|
||||
// Without this, the model doesn't know it already called a tool and may call it again
|
||||
if ((msg as any).tool_calls && Array.isArray((msg as any).tool_calls)) {
|
||||
formatted.tool_calls = (msg as any).tool_calls;
|
||||
}
|
||||
return formatted;
|
||||
});
|
||||
|
||||
// Build user message with optional images
|
||||
const userMessage: { role: string; content: string; images?: string[] } = {
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
};
|
||||
if (optionsArg.images && optionsArg.images.length > 0) {
|
||||
userMessage.images = optionsArg.images;
|
||||
}
|
||||
|
||||
const messages = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...historyMessages,
|
||||
userMessage,
|
||||
];
|
||||
|
||||
// Build request body with optional tools and think parameters
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model,
|
||||
messages,
|
||||
stream: true,
|
||||
options: modelOptions,
|
||||
};
|
||||
|
||||
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
|
||||
if (modelOptions.think !== undefined) {
|
||||
requestBody.think = modelOptions.think;
|
||||
}
|
||||
|
||||
// Add tools for native function calling
|
||||
if (optionsArg.tools && optionsArg.tools.length > 0) {
|
||||
requestBody.tools = optionsArg.tools;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: AbortSignal.timeout(timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const reader = response.body!.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
return {
|
||||
[Symbol.asyncIterator]: async function* () {
|
||||
let buffer = '';
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const json = JSON.parse(line);
|
||||
|
||||
// Parse tool_calls from response
|
||||
let toolCalls: IOllamaToolCall[] | undefined;
|
||||
if (json.message?.tool_calls && Array.isArray(json.message.tool_calls)) {
|
||||
toolCalls = json.message.tool_calls.map((tc: any) => ({
|
||||
function: {
|
||||
name: tc.function?.name || '',
|
||||
arguments: typeof tc.function?.arguments === 'string'
|
||||
? JSON.parse(tc.function.arguments)
|
||||
: tc.function?.arguments || {},
|
||||
index: tc.index,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
yield {
|
||||
content: json.message?.content || '',
|
||||
thinking: json.message?.thinking,
|
||||
toolCalls,
|
||||
done: json.done || false,
|
||||
stats: json.done ? {
|
||||
totalDuration: json.total_duration,
|
||||
evalCount: json.eval_count,
|
||||
} : undefined,
|
||||
} as IOllamaStreamChunk;
|
||||
} catch { /* skip malformed */ }
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream and collect full response with optional progress callback
|
||||
*/
|
||||
public async collectStreamResponse(
|
||||
optionsArg: IOllamaChatOptions,
|
||||
onChunk?: (chunk: IOllamaStreamChunk) => void
|
||||
): Promise<IOllamaChatResponse> {
|
||||
const stream = await this.chatStreamResponse(optionsArg);
|
||||
let content = '';
|
||||
let thinking = '';
|
||||
let toolCalls: IOllamaToolCall[] = [];
|
||||
let stats: IOllamaChatResponse['stats'];
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.content) content += chunk.content;
|
||||
if (chunk.thinking) thinking += chunk.thinking;
|
||||
if (chunk.toolCalls) toolCalls = toolCalls.concat(chunk.toolCalls);
|
||||
if (chunk.stats) stats = chunk.stats;
|
||||
if (onChunk) onChunk(chunk);
|
||||
}
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: content,
|
||||
thinking: thinking || undefined,
|
||||
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
||||
stats,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-streaming chat with full options support
|
||||
*/
|
||||
public async chatWithOptions(optionsArg: IOllamaChatOptions): Promise<IOllamaChatResponse> {
|
||||
const model = optionsArg.model || this.model;
|
||||
const timeout = optionsArg.timeout || this.defaultTimeout;
|
||||
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
|
||||
|
||||
// Format history messages with optional images, reasoning, tool_calls, and tool role
|
||||
const historyMessages = optionsArg.messageHistory.map((msg) => {
|
||||
// Handle tool result messages
|
||||
if ((msg as any).role === 'tool') {
|
||||
return {
|
||||
role: 'tool',
|
||||
content: msg.content,
|
||||
tool_name: (msg as any).toolName,
|
||||
};
|
||||
}
|
||||
|
||||
const formatted: { role: string; content: string; images?: string[]; reasoning?: string; tool_calls?: any[] } = {
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
};
|
||||
if (msg.images && msg.images.length > 0) {
|
||||
formatted.images = msg.images;
|
||||
}
|
||||
if (msg.reasoning) {
|
||||
formatted.reasoning = msg.reasoning;
|
||||
}
|
||||
// CRITICAL: Include tool_calls in history for native tool calling
|
||||
// Without this, the model doesn't know it already called a tool and may call it again
|
||||
if ((msg as any).tool_calls && Array.isArray((msg as any).tool_calls)) {
|
||||
formatted.tool_calls = (msg as any).tool_calls;
|
||||
}
|
||||
return formatted;
|
||||
});
|
||||
|
||||
// Build user message with optional images
|
||||
const userMessage: { role: string; content: string; images?: string[] } = {
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
};
|
||||
if (optionsArg.images && optionsArg.images.length > 0) {
|
||||
userMessage.images = optionsArg.images;
|
||||
}
|
||||
|
||||
const messages = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...historyMessages,
|
||||
userMessage,
|
||||
];
|
||||
|
||||
// Build request body with optional tools and think parameters
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model,
|
||||
messages,
|
||||
stream: false,
|
||||
options: modelOptions,
|
||||
};
|
||||
|
||||
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
|
||||
if (modelOptions.think !== undefined) {
|
||||
requestBody.think = modelOptions.think;
|
||||
}
|
||||
|
||||
// Add tools for native function calling
|
||||
if (optionsArg.tools && optionsArg.tools.length > 0) {
|
||||
requestBody.tools = optionsArg.tools;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: AbortSignal.timeout(timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
// Parse tool_calls from response
|
||||
let toolCalls: IOllamaToolCall[] | undefined;
|
||||
if (result.message?.tool_calls && Array.isArray(result.message.tool_calls)) {
|
||||
toolCalls = result.message.tool_calls.map((tc: any) => ({
|
||||
function: {
|
||||
name: tc.function?.name || '',
|
||||
arguments: typeof tc.function?.arguments === 'string'
|
||||
? JSON.parse(tc.function.arguments)
|
||||
: tc.function?.arguments || {},
|
||||
index: tc.index,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: result.message.content || '',
|
||||
thinking: result.message.thinking,
|
||||
toolCalls,
|
||||
stats: {
|
||||
totalDuration: result.total_duration,
|
||||
evalCount: result.eval_count,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by Ollama.');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
const base64Image = optionsArg.image.toString('base64');
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.visionModel,
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: optionsArg.prompt,
|
||||
images: [base64Image]
|
||||
}],
|
||||
stream: false
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
return result.message.content;
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
// Ensure SmartPdf is initialized before processing documents
|
||||
await this.ensureSmartpdfReady();
|
||||
|
||||
// Convert PDF documents to images using SmartPDF
|
||||
let documentImageBytesArray: Uint8Array[] = [];
|
||||
|
||||
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
|
||||
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
|
||||
}
|
||||
|
||||
// Convert images to base64
|
||||
const base64Images = documentImageBytesArray.map(bytes => Buffer.from(bytes).toString('base64'));
|
||||
|
||||
// Send request to Ollama with images
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.visionModel,
|
||||
messages: [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
images: base64Images
|
||||
}
|
||||
],
|
||||
stream: false
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
return {
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: result.message.content
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research capabilities are not yet supported by Ollama provider.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Ollama
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Ollama. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Ollama
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Ollama. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,462 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { Readable } from 'stream';
|
||||
import { toFile } from 'openai';
|
||||
|
||||
// Custom type definition for chat completion messages
|
||||
export type TChatCompletionRequestMessage = {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
};
|
||||
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IOpenaiProviderOptions {
|
||||
openaiToken: string;
|
||||
chatModel?: string;
|
||||
audioModel?: string;
|
||||
visionModel?: string;
|
||||
researchModel?: string;
|
||||
imageModel?: string;
|
||||
enableWebSearch?: boolean;
|
||||
}
|
||||
|
||||
export class OpenAiProvider extends MultiModalModel {
|
||||
private options: IOpenaiProviderOptions;
|
||||
public openAiApiClient: plugins.openai.default;
|
||||
|
||||
constructor(optionsArg: IOpenaiProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
await super.start();
|
||||
this.openAiApiClient = new plugins.openai.default({
|
||||
apiKey: this.options.openaiToken,
|
||||
dangerouslyAllowBrowser: true,
|
||||
});
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: {
|
||||
role: "function" | "user" | "system" | "assistant" | "tool" | "developer";
|
||||
content: string;
|
||||
} | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
transform: async (chunk, controller) => {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: (message.role || 'user') as "function" | "user" | "system" | "assistant" | "tool" | "developer",
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to OpenAI
|
||||
if (currentMessage) {
|
||||
const messageToSend = { role: "user" as const, content: currentMessage.content };
|
||||
const chatModel = this.options.chatModel ?? 'gpt-5-mini';
|
||||
const requestParams: any = {
|
||||
model: chatModel,
|
||||
messages: [messageToSend],
|
||||
stream: true,
|
||||
};
|
||||
// Temperature is omitted since the model does not support it.
|
||||
const stream = await this.openAiApiClient.chat.completions.create(requestParams);
|
||||
// Explicitly cast the stream as an async iterable to satisfy TypeScript.
|
||||
const streamAsyncIterable = stream as unknown as AsyncIterableIterator<any>;
|
||||
// Process each chunk from OpenAI
|
||||
for await (const chunk of streamAsyncIterable) {
|
||||
const content = chunk.choices[0]?.delta?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
}
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
messageHistory: {
|
||||
role: 'assistant' | 'user';
|
||||
content: string;
|
||||
}[];
|
||||
}) {
|
||||
const chatModel = this.options.chatModel ?? 'gpt-5-mini';
|
||||
const requestParams: any = {
|
||||
model: chatModel,
|
||||
messages: [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{ role: 'user', content: optionsArg.userMessage },
|
||||
],
|
||||
};
|
||||
// Temperature parameter removed to avoid unsupported error.
|
||||
const result = await this.openAiApiClient.chat.completions.create(requestParams);
|
||||
return {
|
||||
role: result.choices[0].message.role as 'assistant',
|
||||
message: result.choices[0].message.content,
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
const done = plugins.smartpromise.defer<NodeJS.ReadableStream>();
|
||||
const result = await this.openAiApiClient.audio.speech.create({
|
||||
model: this.options.audioModel ?? 'tts-1-hd',
|
||||
input: optionsArg.message,
|
||||
voice: 'nova',
|
||||
response_format: 'mp3',
|
||||
speed: 1,
|
||||
});
|
||||
const stream = result.body;
|
||||
const nodeStream = Readable.fromWeb(stream as any);
|
||||
done.resolve(nodeStream);
|
||||
return done.promise;
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: {
|
||||
role: 'assistant' | 'user';
|
||||
content: any;
|
||||
}[];
|
||||
}) {
|
||||
// Ensure SmartPdf is initialized before processing documents
|
||||
await this.ensureSmartpdfReady();
|
||||
|
||||
let pdfDocumentImageBytesArray: Uint8Array[] = [];
|
||||
|
||||
// Convert each PDF into one or more image byte arrays.
|
||||
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
|
||||
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
|
||||
}
|
||||
|
||||
console.log(`image smartfile array`);
|
||||
console.log(pdfDocumentImageBytesArray.map((smartfile) => smartfile.length));
|
||||
|
||||
// Filter out any empty buffers to avoid sending invalid image URLs.
|
||||
const validImageBytesArray = pdfDocumentImageBytesArray.filter(imageBytes => imageBytes && imageBytes.length > 0);
|
||||
const imageAttachments = validImageBytesArray.map(imageBytes => ({
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: 'data:image/png;base64,' + Buffer.from(imageBytes).toString('base64'),
|
||||
},
|
||||
}));
|
||||
|
||||
const chatModel = this.options.chatModel ?? 'gpt-5-mini';
|
||||
const requestParams: any = {
|
||||
model: chatModel,
|
||||
messages: [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: optionsArg.userMessage },
|
||||
...imageAttachments,
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
// Temperature parameter removed.
|
||||
const result = await this.openAiApiClient.chat.completions.create(requestParams);
|
||||
return {
|
||||
message: result.choices[0].message,
|
||||
};
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
const visionModel = this.options.visionModel ?? '04-mini';
|
||||
const requestParams: any = {
|
||||
model: visionModel,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: optionsArg.prompt },
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: `data:image/jpeg;base64,${optionsArg.image.toString('base64')}`
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
max_tokens: 300
|
||||
};
|
||||
const result = await this.openAiApiClient.chat.completions.create(requestParams);
|
||||
return result.choices[0].message.content || '';
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
// Determine which model to use - Deep Research API requires specific models
|
||||
let model: string;
|
||||
if (optionsArg.searchDepth === 'deep') {
|
||||
model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
|
||||
} else {
|
||||
// For basic/advanced, still use deep research models if web search is needed
|
||||
if (optionsArg.includeWebSearch) {
|
||||
model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
|
||||
} else {
|
||||
model = this.options.chatModel || 'gpt-5-mini';
|
||||
}
|
||||
}
|
||||
|
||||
const systemMessage = 'You are a research assistant. Provide comprehensive answers with citations and sources when available.';
|
||||
|
||||
// Prepare request parameters using Deep Research API format
|
||||
const requestParams: any = {
|
||||
model,
|
||||
instructions: systemMessage,
|
||||
input: optionsArg.query
|
||||
};
|
||||
|
||||
// Add web search tool if requested
|
||||
if (optionsArg.includeWebSearch || optionsArg.searchDepth === 'deep') {
|
||||
requestParams.tools = [
|
||||
{
|
||||
type: 'web_search_preview',
|
||||
search_context_size: optionsArg.searchDepth === 'deep' ? 'high' :
|
||||
optionsArg.searchDepth === 'advanced' ? 'medium' : 'low'
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
// Add background flag for deep research
|
||||
if (optionsArg.background && optionsArg.searchDepth === 'deep') {
|
||||
requestParams.background = true;
|
||||
}
|
||||
|
||||
try {
|
||||
// Execute the research request using Deep Research API
|
||||
const result = await this.openAiApiClient.responses.create(requestParams);
|
||||
|
||||
// Extract the answer from output items
|
||||
let answer = '';
|
||||
const sources: Array<{ url: string; title: string; snippet: string }> = [];
|
||||
const searchQueries: string[] = [];
|
||||
|
||||
// Process output items
|
||||
for (const item of result.output || []) {
|
||||
// Extract message content
|
||||
if (item.type === 'message' && 'content' in item) {
|
||||
const messageItem = item as any;
|
||||
for (const contentItem of messageItem.content || []) {
|
||||
if (contentItem.type === 'output_text' && 'text' in contentItem) {
|
||||
answer += contentItem.text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract web search queries
|
||||
if (item.type === 'web_search_call' && 'action' in item) {
|
||||
const searchItem = item as any;
|
||||
if (searchItem.action && searchItem.action.type === 'search' && 'query' in searchItem.action) {
|
||||
searchQueries.push(searchItem.action.query);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse sources from markdown links in the answer
|
||||
const urlRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = urlRegex.exec(answer)) !== null) {
|
||||
sources.push({
|
||||
title: match[1],
|
||||
url: match[2],
|
||||
snippet: ''
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
answer,
|
||||
sources,
|
||||
searchQueries: searchQueries.length > 0 ? searchQueries : undefined,
|
||||
metadata: {
|
||||
model,
|
||||
searchDepth: optionsArg.searchDepth || 'basic',
|
||||
tokensUsed: result.usage?.total_tokens
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Research API error:', error);
|
||||
throw new Error(`Failed to perform research: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation using OpenAI's gpt-image-1 or DALL-E models
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';
|
||||
|
||||
try {
|
||||
const requestParams: any = {
|
||||
model,
|
||||
prompt: optionsArg.prompt,
|
||||
n: optionsArg.n || 1,
|
||||
};
|
||||
|
||||
// Add gpt-image-1 specific parameters
|
||||
if (model === 'gpt-image-1') {
|
||||
if (optionsArg.quality) requestParams.quality = optionsArg.quality;
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
if (optionsArg.background) requestParams.background = optionsArg.background;
|
||||
if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
|
||||
if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
|
||||
if (optionsArg.moderation) requestParams.moderation = optionsArg.moderation;
|
||||
if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
|
||||
if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
|
||||
} else if (model === 'dall-e-3') {
|
||||
// DALL-E 3 specific parameters
|
||||
if (optionsArg.quality) requestParams.quality = optionsArg.quality;
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
if (optionsArg.style) requestParams.style = optionsArg.style;
|
||||
requestParams.response_format = 'b64_json'; // Always use base64 for consistency
|
||||
} else if (model === 'dall-e-2') {
|
||||
// DALL-E 2 specific parameters
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
requestParams.response_format = 'b64_json';
|
||||
}
|
||||
|
||||
const result = await this.openAiApiClient.images.generate(requestParams);
|
||||
|
||||
const images = (result.data || []).map(img => ({
|
||||
b64_json: img.b64_json,
|
||||
url: img.url,
|
||||
revisedPrompt: img.revised_prompt
|
||||
}));
|
||||
|
||||
return {
|
||||
images,
|
||||
metadata: {
|
||||
model,
|
||||
quality: result.quality,
|
||||
size: result.size,
|
||||
outputFormat: result.output_format,
|
||||
tokensUsed: result.usage?.total_tokens
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Image generation error:', error);
|
||||
throw new Error(`Failed to generate image: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing using OpenAI's gpt-image-1 or DALL-E 2 models
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';
|
||||
|
||||
try {
|
||||
// Convert Buffer to uploadable file format for OpenAI API
|
||||
const imageFile = await toFile(optionsArg.image, 'image.png', { type: 'image/png' });
|
||||
|
||||
const requestParams: any = {
|
||||
model,
|
||||
image: imageFile,
|
||||
prompt: optionsArg.prompt,
|
||||
n: optionsArg.n || 1,
|
||||
};
|
||||
|
||||
// Add mask if provided (also convert to file format)
|
||||
if (optionsArg.mask) {
|
||||
requestParams.mask = await toFile(optionsArg.mask, 'mask.png', { type: 'image/png' });
|
||||
}
|
||||
|
||||
// Add gpt-image-1 specific parameters
|
||||
if (model === 'gpt-image-1') {
|
||||
if (optionsArg.quality) requestParams.quality = optionsArg.quality;
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
if (optionsArg.background) requestParams.background = optionsArg.background;
|
||||
if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
|
||||
if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
|
||||
if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
|
||||
if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
|
||||
} else if (model === 'dall-e-2') {
|
||||
// DALL-E 2 specific parameters
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
requestParams.response_format = 'b64_json';
|
||||
}
|
||||
|
||||
const result = await this.openAiApiClient.images.edit(requestParams);
|
||||
|
||||
const images = (result.data || []).map(img => ({
|
||||
b64_json: img.b64_json,
|
||||
url: img.url,
|
||||
revisedPrompt: img.revised_prompt
|
||||
}));
|
||||
|
||||
return {
|
||||
images,
|
||||
metadata: {
|
||||
model,
|
||||
quality: result.quality,
|
||||
size: result.size,
|
||||
outputFormat: result.output_format,
|
||||
tokensUsed: result.usage?.total_tokens
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Image edit error:', error);
|
||||
throw new Error(`Failed to edit image: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,259 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IPerplexityProviderOptions {
|
||||
perplexityToken: string;
|
||||
}
|
||||
|
||||
export class PerplexityProvider extends MultiModalModel {
|
||||
private options: IPerplexityProviderOptions;
|
||||
|
||||
constructor(optionsArg: IPerplexityProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
async start() {
|
||||
// Initialize any necessary clients or resources
|
||||
}
|
||||
|
||||
async stop() {}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
async transform(chunk, controller) {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to Perplexity
|
||||
if (currentMessage) {
|
||||
const response = await fetch('https://api.perplexity.ai/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.perplexityToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'mixtral-8x7b-instruct',
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
stream: true,
|
||||
}),
|
||||
});
|
||||
|
||||
// Process each chunk from Perplexity
|
||||
const reader = response.body?.getReader();
|
||||
if (reader) {
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = new TextDecoder().decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') break;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
const content = parsed.choices[0]?.delta?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to parse SSE data:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
// Make API call to Perplexity
|
||||
const response = await fetch('https://api.perplexity.ai/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.perplexityToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'mixtral-8x7b-instruct', // Using Mixtral model
|
||||
messages: [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{ role: 'user', content: optionsArg.userMessage }
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Perplexity API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: result.choices[0].message.content,
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by Perplexity.');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('Vision tasks are not supported by Perplexity.');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
throw new Error('Document processing is not supported by Perplexity.');
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
// Perplexity has Sonar models that are optimized for search
|
||||
// sonar models: sonar, sonar-pro
|
||||
const model = optionsArg.searchDepth === 'deep' ? 'sonar-pro' : 'sonar';
|
||||
|
||||
try {
|
||||
const response = await fetch('https://api.perplexity.ai/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.perplexityToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: 'You are a helpful research assistant. Provide accurate information with sources.'
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: optionsArg.query
|
||||
}
|
||||
],
|
||||
temperature: 0.7,
|
||||
max_tokens: 4000
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Perplexity API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
const answer = result.choices[0].message.content;
|
||||
|
||||
// Parse citations from the response
|
||||
const sources: Array<{ url: string; title: string; snippet: string }> = [];
|
||||
|
||||
// Perplexity includes citations in the format [1], [2], etc. with sources listed
|
||||
// This is a simplified parser - could be enhanced based on actual Perplexity response format
|
||||
if (result.citations) {
|
||||
for (const citation of result.citations) {
|
||||
sources.push({
|
||||
url: citation.url || '',
|
||||
title: citation.title || '',
|
||||
snippet: citation.snippet || ''
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
answer,
|
||||
sources,
|
||||
metadata: {
|
||||
model,
|
||||
searchDepth: optionsArg.searchDepth || 'basic'
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Perplexity research error:', error);
|
||||
throw new Error(`Failed to perform research: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Perplexity
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Perplexity. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Perplexity
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Perplexity. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,214 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
|
||||
|
||||
export interface IXAIProviderOptions {
|
||||
xaiToken: string;
|
||||
}
|
||||
|
||||
export class XAIProvider extends MultiModalModel {
|
||||
private options: IXAIProviderOptions;
|
||||
public openAiApiClient: plugins.openai.default;
|
||||
|
||||
constructor(optionsArg: IXAIProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
await super.start();
|
||||
this.openAiApiClient = new plugins.openai.default({
|
||||
apiKey: this.options.xaiToken,
|
||||
baseURL: 'https://api.x.ai/v1',
|
||||
});
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
async transform(chunk, controller) {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to X.AI
|
||||
if (currentMessage) {
|
||||
const stream = await this.openAiApiClient.chat.completions.create({
|
||||
model: 'grok-2-latest',
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
stream: true,
|
||||
});
|
||||
|
||||
// Process each chunk from X.AI
|
||||
for await (const chunk of stream) {
|
||||
const content = chunk.choices[0]?.delta?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
public async chat(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
messageHistory: { role: string; content: string; }[];
|
||||
}): Promise<{ role: 'assistant'; message: string; }> {
|
||||
// Prepare messages array with system message, history, and user message
|
||||
const messages: ChatCompletionMessageParam[] = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role as 'system' | 'user' | 'assistant',
|
||||
content: msg.content
|
||||
})),
|
||||
{ role: 'user', content: optionsArg.userMessage }
|
||||
];
|
||||
|
||||
// Call X.AI's chat completion API
|
||||
const completion = await this.openAiApiClient.chat.completions.create({
|
||||
model: 'grok-2-latest',
|
||||
messages: messages,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
// Return the assistant's response
|
||||
return {
|
||||
role: 'assistant',
|
||||
message: completion.choices[0]?.message?.content || ''
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by X.AI');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('Vision tasks are not supported by X.AI');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: { role: string; content: string; }[];
|
||||
}): Promise<{ message: any }> {
|
||||
// Ensure SmartPdf is initialized before processing documents
|
||||
await this.ensureSmartpdfReady();
|
||||
|
||||
// First convert PDF documents to images
|
||||
let pdfDocumentImageBytesArray: Uint8Array[] = [];
|
||||
|
||||
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
|
||||
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
|
||||
}
|
||||
|
||||
// Convert images to base64 for inclusion in the message
|
||||
const imageBase64Array = pdfDocumentImageBytesArray.map(bytes =>
|
||||
Buffer.from(bytes).toString('base64')
|
||||
);
|
||||
|
||||
// Combine document images into the user message
|
||||
const enhancedUserMessage = `
|
||||
${optionsArg.userMessage}
|
||||
|
||||
Document contents (as images):
|
||||
${imageBase64Array.map((img, i) => `Image ${i + 1}: <image data>`).join('\n')}
|
||||
`;
|
||||
|
||||
// Use chat completion to analyze the documents
|
||||
const messages: ChatCompletionMessageParam[] = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role as 'system' | 'user' | 'assistant',
|
||||
content: msg.content
|
||||
})),
|
||||
{ role: 'user', content: enhancedUserMessage }
|
||||
];
|
||||
|
||||
const completion = await this.openAiApiClient.chat.completions.create({
|
||||
model: 'grok-2-latest',
|
||||
messages: messages,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
return {
|
||||
message: completion.choices[0]?.message?.content || ''
|
||||
};
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research capabilities are not yet supported by xAI provider.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by xAI
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by xAI. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by xAI
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by xAI. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
51
ts/smartai.classes.smartai.ts
Normal file
51
ts/smartai.classes.smartai.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import type { ISmartAiOptions, LanguageModelV3 } from './smartai.interfaces.js';
|
||||
import { createOllamaModel } from './smartai.provider.ollama.js';
|
||||
import { createAnthropicCachingMiddleware } from './smartai.middleware.anthropic.js';
|
||||
|
||||
/**
|
||||
* Returns a LanguageModelV3 for the given provider and model.
|
||||
* This is the primary API — consumers use the returned model with AI SDK's
|
||||
* generateText(), streamText(), etc.
|
||||
*/
|
||||
export function getModel(options: ISmartAiOptions): LanguageModelV3 {
|
||||
switch (options.provider) {
|
||||
case 'anthropic': {
|
||||
const p = plugins.createAnthropic({ apiKey: options.apiKey });
|
||||
const base = p(options.model) as LanguageModelV3;
|
||||
if (options.promptCaching === false) return base;
|
||||
return plugins.wrapLanguageModel({
|
||||
model: base,
|
||||
middleware: createAnthropicCachingMiddleware(),
|
||||
}) as unknown as LanguageModelV3;
|
||||
}
|
||||
case 'openai': {
|
||||
const p = plugins.createOpenAI({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'google': {
|
||||
const p = plugins.createGoogleGenerativeAI({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'groq': {
|
||||
const p = plugins.createGroq({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'mistral': {
|
||||
const p = plugins.createMistral({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'xai': {
|
||||
const p = plugins.createXai({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'perplexity': {
|
||||
const p = plugins.createPerplexity({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'ollama':
|
||||
return createOllamaModel(options);
|
||||
default:
|
||||
throw new Error(`Unknown provider: ${(options as ISmartAiOptions).provider}`);
|
||||
}
|
||||
}
|
||||
53
ts/smartai.interfaces.ts
Normal file
53
ts/smartai.interfaces.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import type { LanguageModelV3 } from '@ai-sdk/provider';
|
||||
|
||||
export type TProvider =
|
||||
| 'anthropic'
|
||||
| 'openai'
|
||||
| 'google'
|
||||
| 'groq'
|
||||
| 'mistral'
|
||||
| 'xai'
|
||||
| 'perplexity'
|
||||
| 'ollama';
|
||||
|
||||
export interface ISmartAiOptions {
|
||||
provider: TProvider;
|
||||
model: string;
|
||||
apiKey?: string;
|
||||
/** For Ollama: base URL of the local server. Default: http://localhost:11434 */
|
||||
baseUrl?: string;
|
||||
/**
|
||||
* Ollama-specific model runtime options.
|
||||
* Only used when provider === 'ollama'.
|
||||
*/
|
||||
ollamaOptions?: IOllamaModelOptions;
|
||||
/**
|
||||
* Enable Anthropic prompt caching on system + recent messages.
|
||||
* Only used when provider === 'anthropic'. Default: true.
|
||||
*/
|
||||
promptCaching?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ollama model runtime options passed in the request body `options` field.
|
||||
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
|
||||
*/
|
||||
export interface IOllamaModelOptions {
|
||||
/** Context window size. Default: 2048. */
|
||||
num_ctx?: number;
|
||||
/** 0 = deterministic. Default: 0.8. For Qwen models use 0.55. */
|
||||
temperature?: number;
|
||||
top_k?: number;
|
||||
top_p?: number;
|
||||
repeat_penalty?: number;
|
||||
num_predict?: number;
|
||||
stop?: string[];
|
||||
seed?: number;
|
||||
/**
|
||||
* Enable thinking/reasoning mode (Qwen3, QwQ, DeepSeek-R1 etc.).
|
||||
* The custom Ollama provider handles this directly.
|
||||
*/
|
||||
think?: boolean;
|
||||
}
|
||||
|
||||
export type { LanguageModelV3 };
|
||||
38
ts/smartai.middleware.anthropic.ts
Normal file
38
ts/smartai.middleware.anthropic.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import type { LanguageModelV3Middleware, LanguageModelV3Prompt } from '@ai-sdk/provider';
|
||||
|
||||
/**
|
||||
* Creates middleware that adds Anthropic prompt caching directives.
|
||||
* Marks the last system message and last user message with ephemeral cache control,
|
||||
* reducing input token cost and latency on repeated calls.
|
||||
*/
|
||||
export function createAnthropicCachingMiddleware(): LanguageModelV3Middleware {
|
||||
return {
|
||||
specificationVersion: 'v3',
|
||||
transformParams: async ({ params }) => {
|
||||
const messages = [...params.prompt] as Array<Record<string, unknown>>;
|
||||
|
||||
// Find the last system message and last user message
|
||||
let lastSystemIdx = -1;
|
||||
let lastUserIdx = -1;
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
if (messages[i].role === 'system') lastSystemIdx = i;
|
||||
if (messages[i].role === 'user') lastUserIdx = i;
|
||||
}
|
||||
|
||||
const targets = [lastSystemIdx, lastUserIdx].filter(i => i >= 0);
|
||||
for (const idx of targets) {
|
||||
const msg = { ...messages[idx] };
|
||||
msg.providerOptions = {
|
||||
...(msg.providerOptions as Record<string, unknown> || {}),
|
||||
anthropic: {
|
||||
...((msg.providerOptions as Record<string, unknown>)?.anthropic as Record<string, unknown> || {}),
|
||||
cacheControl: { type: 'ephemeral' },
|
||||
},
|
||||
};
|
||||
messages[idx] = msg;
|
||||
}
|
||||
|
||||
return { ...params, prompt: messages as unknown as LanguageModelV3Prompt };
|
||||
},
|
||||
};
|
||||
}
|
||||
426
ts/smartai.provider.ollama.ts
Normal file
426
ts/smartai.provider.ollama.ts
Normal file
@@ -0,0 +1,426 @@
|
||||
import type {
|
||||
LanguageModelV3,
|
||||
LanguageModelV3CallOptions,
|
||||
LanguageModelV3GenerateResult,
|
||||
LanguageModelV3StreamResult,
|
||||
LanguageModelV3StreamPart,
|
||||
LanguageModelV3Prompt,
|
||||
LanguageModelV3Content,
|
||||
LanguageModelV3Usage,
|
||||
LanguageModelV3FinishReason,
|
||||
} from '@ai-sdk/provider';
|
||||
import type { ISmartAiOptions, IOllamaModelOptions } from './smartai.interfaces.js';
|
||||
|
||||
interface IOllamaMessage {
|
||||
role: string;
|
||||
content: string;
|
||||
images?: string[];
|
||||
tool_calls?: Array<{
|
||||
function: { name: string; arguments: Record<string, unknown> };
|
||||
}>;
|
||||
thinking?: string;
|
||||
}
|
||||
|
||||
interface IOllamaTool {
|
||||
type: 'function';
|
||||
function: {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert AI SDK V3 prompt messages to Ollama's message format.
|
||||
*/
|
||||
function convertPromptToOllamaMessages(prompt: LanguageModelV3Prompt): IOllamaMessage[] {
|
||||
const messages: IOllamaMessage[] = [];
|
||||
|
||||
for (const msg of prompt) {
|
||||
if (msg.role === 'system') {
|
||||
// System message content is a plain string in V3
|
||||
messages.push({ role: 'system', content: msg.content });
|
||||
} else if (msg.role === 'user') {
|
||||
let text = '';
|
||||
const images: string[] = [];
|
||||
for (const part of msg.content) {
|
||||
if (part.type === 'text') {
|
||||
text += part.text;
|
||||
} else if (part.type === 'file' && part.mediaType?.startsWith('image/')) {
|
||||
// Handle image files — Ollama expects base64 images
|
||||
if (typeof part.data === 'string') {
|
||||
images.push(part.data);
|
||||
} else if (part.data instanceof Uint8Array) {
|
||||
images.push(Buffer.from(part.data).toString('base64'));
|
||||
}
|
||||
}
|
||||
}
|
||||
const m: IOllamaMessage = { role: 'user', content: text };
|
||||
if (images.length > 0) m.images = images;
|
||||
messages.push(m);
|
||||
} else if (msg.role === 'assistant') {
|
||||
let text = '';
|
||||
let thinking = '';
|
||||
const toolCalls: IOllamaMessage['tool_calls'] = [];
|
||||
for (const part of msg.content) {
|
||||
if (part.type === 'text') {
|
||||
text += part.text;
|
||||
} else if (part.type === 'reasoning') {
|
||||
thinking += part.text;
|
||||
} else if (part.type === 'tool-call') {
|
||||
const args = typeof part.input === 'string'
|
||||
? JSON.parse(part.input as string)
|
||||
: (part.input as Record<string, unknown>);
|
||||
toolCalls.push({
|
||||
function: {
|
||||
name: part.toolName,
|
||||
arguments: args,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
const m: IOllamaMessage = { role: 'assistant', content: text };
|
||||
if (toolCalls.length > 0) m.tool_calls = toolCalls;
|
||||
if (thinking) m.thinking = thinking;
|
||||
messages.push(m);
|
||||
} else if (msg.role === 'tool') {
|
||||
for (const part of msg.content) {
|
||||
if (part.type === 'tool-result') {
|
||||
let resultContent = '';
|
||||
if (part.output) {
|
||||
if (part.output.type === 'text') {
|
||||
resultContent = part.output.value;
|
||||
} else if (part.output.type === 'json') {
|
||||
resultContent = JSON.stringify(part.output.value);
|
||||
}
|
||||
}
|
||||
messages.push({ role: 'tool', content: resultContent });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert AI SDK V3 tools to Ollama's tool format.
|
||||
*/
|
||||
function convertToolsToOllamaTools(tools: LanguageModelV3CallOptions['tools']): IOllamaTool[] | undefined {
|
||||
if (!tools || tools.length === 0) return undefined;
|
||||
|
||||
return tools
|
||||
.filter((t): t is Extract<typeof t, { type: 'function' }> => t.type === 'function')
|
||||
.map(t => ({
|
||||
type: 'function' as const,
|
||||
function: {
|
||||
name: t.name,
|
||||
description: t.description ?? '',
|
||||
parameters: t.inputSchema as Record<string, unknown>,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
function makeUsage(promptTokens?: number, completionTokens?: number): LanguageModelV3Usage {
|
||||
return {
|
||||
inputTokens: {
|
||||
total: promptTokens,
|
||||
noCache: undefined,
|
||||
cacheRead: undefined,
|
||||
cacheWrite: undefined,
|
||||
},
|
||||
outputTokens: {
|
||||
total: completionTokens,
|
||||
text: completionTokens,
|
||||
reasoning: undefined,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function makeFinishReason(reason?: string): LanguageModelV3FinishReason {
|
||||
if (reason === 'tool_calls' || reason === 'tool-calls') {
|
||||
return { unified: 'tool-calls', raw: reason };
|
||||
}
|
||||
return { unified: 'stop', raw: reason ?? 'stop' };
|
||||
}
|
||||
|
||||
let idCounter = 0;
|
||||
function generateId(): string {
|
||||
return `ollama-${Date.now()}-${idCounter++}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom LanguageModelV3 implementation for Ollama.
|
||||
* Calls Ollama's native /api/chat endpoint directly to support
|
||||
* think, num_ctx, temperature, and other model options.
|
||||
*/
|
||||
export function createOllamaModel(options: ISmartAiOptions): LanguageModelV3 {
|
||||
const baseUrl = options.baseUrl ?? 'http://localhost:11434';
|
||||
const modelId = options.model;
|
||||
const ollamaOpts: IOllamaModelOptions = { ...options.ollamaOptions };
|
||||
|
||||
// Apply default temperature of 0.55 for Qwen models
|
||||
if (modelId.toLowerCase().includes('qwen') && ollamaOpts.temperature === undefined) {
|
||||
ollamaOpts.temperature = 0.55;
|
||||
}
|
||||
|
||||
const model: LanguageModelV3 = {
|
||||
specificationVersion: 'v3',
|
||||
provider: 'ollama',
|
||||
modelId,
|
||||
supportedUrls: {},
|
||||
|
||||
async doGenerate(callOptions: LanguageModelV3CallOptions): Promise<LanguageModelV3GenerateResult> {
|
||||
const messages = convertPromptToOllamaMessages(callOptions.prompt);
|
||||
const tools = convertToolsToOllamaTools(callOptions.tools);
|
||||
|
||||
const ollamaModelOptions: Record<string, unknown> = { ...ollamaOpts };
|
||||
// Override with call-level options if provided
|
||||
if (callOptions.temperature !== undefined) ollamaModelOptions.temperature = callOptions.temperature;
|
||||
if (callOptions.topP !== undefined) ollamaModelOptions.top_p = callOptions.topP;
|
||||
if (callOptions.topK !== undefined) ollamaModelOptions.top_k = callOptions.topK;
|
||||
if (callOptions.maxOutputTokens !== undefined) ollamaModelOptions.num_predict = callOptions.maxOutputTokens;
|
||||
if (callOptions.seed !== undefined) ollamaModelOptions.seed = callOptions.seed;
|
||||
if (callOptions.stopSequences) ollamaModelOptions.stop = callOptions.stopSequences;
|
||||
// Remove think from options — it goes at the top level
|
||||
const { think, ...modelOpts } = ollamaModelOptions;
|
||||
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model: modelId,
|
||||
messages,
|
||||
stream: false,
|
||||
options: modelOpts,
|
||||
};
|
||||
|
||||
// Add think parameter at the top level (Ollama API requirement)
|
||||
if (ollamaOpts.think !== undefined) {
|
||||
requestBody.think = ollamaOpts.think;
|
||||
}
|
||||
|
||||
if (tools) requestBody.tools = tools;
|
||||
|
||||
const response = await fetch(`${baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: callOptions.abortSignal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
throw new Error(`Ollama API error ${response.status}: ${body}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as Record<string, unknown>;
|
||||
const message = result.message as Record<string, unknown>;
|
||||
|
||||
// Build content array
|
||||
const content: LanguageModelV3Content[] = [];
|
||||
|
||||
// Add reasoning if present
|
||||
if (message.thinking && typeof message.thinking === 'string') {
|
||||
content.push({ type: 'reasoning', text: message.thinking });
|
||||
}
|
||||
|
||||
// Add text content
|
||||
if (message.content && typeof message.content === 'string') {
|
||||
content.push({ type: 'text', text: message.content });
|
||||
}
|
||||
|
||||
// Add tool calls if present
|
||||
if (Array.isArray(message.tool_calls)) {
|
||||
for (const tc of message.tool_calls as Array<Record<string, unknown>>) {
|
||||
const fn = tc.function as Record<string, unknown>;
|
||||
content.push({
|
||||
type: 'tool-call',
|
||||
toolCallId: generateId(),
|
||||
toolName: fn.name as string,
|
||||
input: JSON.stringify(fn.arguments),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const finishReason = Array.isArray(message.tool_calls) && (message.tool_calls as unknown[]).length > 0
|
||||
? makeFinishReason('tool_calls')
|
||||
: makeFinishReason('stop');
|
||||
|
||||
return {
|
||||
content,
|
||||
finishReason,
|
||||
usage: makeUsage(
|
||||
(result.prompt_eval_count as number) ?? undefined,
|
||||
(result.eval_count as number) ?? undefined,
|
||||
),
|
||||
warnings: [],
|
||||
request: { body: requestBody },
|
||||
};
|
||||
},
|
||||
|
||||
async doStream(callOptions: LanguageModelV3CallOptions): Promise<LanguageModelV3StreamResult> {
|
||||
const messages = convertPromptToOllamaMessages(callOptions.prompt);
|
||||
const tools = convertToolsToOllamaTools(callOptions.tools);
|
||||
|
||||
const ollamaModelOptions: Record<string, unknown> = { ...ollamaOpts };
|
||||
if (callOptions.temperature !== undefined) ollamaModelOptions.temperature = callOptions.temperature;
|
||||
if (callOptions.topP !== undefined) ollamaModelOptions.top_p = callOptions.topP;
|
||||
if (callOptions.topK !== undefined) ollamaModelOptions.top_k = callOptions.topK;
|
||||
if (callOptions.maxOutputTokens !== undefined) ollamaModelOptions.num_predict = callOptions.maxOutputTokens;
|
||||
if (callOptions.seed !== undefined) ollamaModelOptions.seed = callOptions.seed;
|
||||
if (callOptions.stopSequences) ollamaModelOptions.stop = callOptions.stopSequences;
|
||||
const { think, ...modelOpts } = ollamaModelOptions;
|
||||
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model: modelId,
|
||||
messages,
|
||||
stream: true,
|
||||
options: modelOpts,
|
||||
};
|
||||
|
||||
if (ollamaOpts.think !== undefined) {
|
||||
requestBody.think = ollamaOpts.think;
|
||||
}
|
||||
|
||||
if (tools) requestBody.tools = tools;
|
||||
|
||||
const response = await fetch(`${baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: callOptions.abortSignal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
throw new Error(`Ollama API error ${response.status}: ${body}`);
|
||||
}
|
||||
|
||||
const reader = response.body!.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
const textId = generateId();
|
||||
const reasoningId = generateId();
|
||||
let textStarted = false;
|
||||
let reasoningStarted = false;
|
||||
let hasToolCalls = false;
|
||||
let closed = false;
|
||||
|
||||
const stream = new ReadableStream<LanguageModelV3StreamPart>({
|
||||
async pull(controller) {
|
||||
if (closed) return;
|
||||
|
||||
const processLine = (line: string) => {
|
||||
if (!line.trim()) return;
|
||||
let json: Record<string, unknown>;
|
||||
try {
|
||||
json = JSON.parse(line);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
const msg = json.message as Record<string, unknown> | undefined;
|
||||
|
||||
// Handle thinking/reasoning content
|
||||
if (msg?.thinking && typeof msg.thinking === 'string') {
|
||||
if (!reasoningStarted) {
|
||||
reasoningStarted = true;
|
||||
controller.enqueue({ type: 'reasoning-start', id: reasoningId });
|
||||
}
|
||||
controller.enqueue({ type: 'reasoning-delta', id: reasoningId, delta: msg.thinking });
|
||||
}
|
||||
|
||||
// Handle text content
|
||||
if (msg?.content && typeof msg.content === 'string') {
|
||||
if (reasoningStarted && !textStarted) {
|
||||
controller.enqueue({ type: 'reasoning-end', id: reasoningId });
|
||||
}
|
||||
if (!textStarted) {
|
||||
textStarted = true;
|
||||
controller.enqueue({ type: 'text-start', id: textId });
|
||||
}
|
||||
controller.enqueue({ type: 'text-delta', id: textId, delta: msg.content });
|
||||
}
|
||||
|
||||
// Handle tool calls
|
||||
if (Array.isArray(msg?.tool_calls)) {
|
||||
hasToolCalls = true;
|
||||
for (const tc of msg!.tool_calls as Array<Record<string, unknown>>) {
|
||||
const fn = tc.function as Record<string, unknown>;
|
||||
const callId = generateId();
|
||||
controller.enqueue({
|
||||
type: 'tool-call',
|
||||
toolCallId: callId,
|
||||
toolName: fn.name as string,
|
||||
input: JSON.stringify(fn.arguments),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Handle done
|
||||
if (json.done) {
|
||||
if (reasoningStarted && !textStarted) {
|
||||
controller.enqueue({ type: 'reasoning-end', id: reasoningId });
|
||||
}
|
||||
if (textStarted) {
|
||||
controller.enqueue({ type: 'text-end', id: textId });
|
||||
}
|
||||
controller.enqueue({
|
||||
type: 'finish',
|
||||
finishReason: hasToolCalls
|
||||
? makeFinishReason('tool_calls')
|
||||
: makeFinishReason('stop'),
|
||||
usage: makeUsage(
|
||||
(json.prompt_eval_count as number) ?? undefined,
|
||||
(json.eval_count as number) ?? undefined,
|
||||
),
|
||||
});
|
||||
closed = true;
|
||||
controller.close();
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
let buffer = '';
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
if (buffer.trim()) processLine(buffer);
|
||||
if (!closed) {
|
||||
controller.enqueue({
|
||||
type: 'finish',
|
||||
finishReason: makeFinishReason('stop'),
|
||||
usage: makeUsage(undefined, undefined),
|
||||
});
|
||||
closed = true;
|
||||
controller.close();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
for (const line of lines) {
|
||||
processLine(line);
|
||||
if (closed) return;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (!closed) {
|
||||
controller.error(error);
|
||||
closed = true;
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
stream,
|
||||
request: { body: requestBody },
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
return model;
|
||||
}
|
||||
24
ts_audio/index.ts
Normal file
24
ts_audio/index.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import { Readable } from 'stream';
|
||||
|
||||
export interface IOpenAiTtsOptions {
|
||||
apiKey: string;
|
||||
text: string;
|
||||
voice?: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
|
||||
model?: 'tts-1' | 'tts-1-hd';
|
||||
responseFormat?: 'mp3' | 'opus' | 'aac' | 'flac';
|
||||
speed?: number;
|
||||
}
|
||||
|
||||
export async function textToSpeech(options: IOpenAiTtsOptions): Promise<NodeJS.ReadableStream> {
|
||||
const client = new plugins.OpenAI({ apiKey: options.apiKey });
|
||||
const result = await client.audio.speech.create({
|
||||
model: options.model ?? 'tts-1',
|
||||
voice: options.voice ?? 'alloy',
|
||||
input: options.text,
|
||||
response_format: options.responseFormat ?? 'mp3',
|
||||
speed: options.speed ?? 1,
|
||||
});
|
||||
const stream = result.body;
|
||||
return Readable.fromWeb(stream as any);
|
||||
}
|
||||
2
ts_audio/plugins.ts
Normal file
2
ts_audio/plugins.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
import OpenAI from 'openai';
|
||||
export { OpenAI };
|
||||
61
ts_document/index.ts
Normal file
61
ts_document/index.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import type { LanguageModelV3 } from '@ai-sdk/provider';
|
||||
import type { ModelMessage } from 'ai';
|
||||
|
||||
let smartpdfInstance: InstanceType<typeof plugins.smartpdf.SmartPdf> | null = null;
|
||||
|
||||
async function ensureSmartpdf(): Promise<InstanceType<typeof plugins.smartpdf.SmartPdf>> {
|
||||
if (!smartpdfInstance) {
|
||||
smartpdfInstance = new plugins.smartpdf.SmartPdf();
|
||||
await smartpdfInstance.start();
|
||||
}
|
||||
return smartpdfInstance;
|
||||
}
|
||||
|
||||
export interface IDocumentOptions {
|
||||
model: LanguageModelV3;
|
||||
systemMessage?: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory?: ModelMessage[];
|
||||
}
|
||||
|
||||
export async function analyzeDocuments(options: IDocumentOptions): Promise<string> {
|
||||
const pdf = await ensureSmartpdf();
|
||||
|
||||
const imagePages: Uint8Array[] = [];
|
||||
for (const doc of options.pdfDocuments) {
|
||||
const pages = await pdf.convertPDFToPngBytes(doc);
|
||||
imagePages.push(...pages);
|
||||
}
|
||||
|
||||
// Filter out empty buffers
|
||||
const validPages = imagePages.filter(page => page && page.length > 0);
|
||||
|
||||
const result = await plugins.generateText({
|
||||
model: options.model,
|
||||
system: options.systemMessage,
|
||||
messages: [
|
||||
...(options.messageHistory ?? []),
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: options.userMessage },
|
||||
...validPages.map(page => ({
|
||||
type: 'image' as const,
|
||||
image: page,
|
||||
mimeType: 'image/png' as const,
|
||||
})),
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
return result.text;
|
||||
}
|
||||
|
||||
export async function stopSmartpdf(): Promise<void> {
|
||||
if (smartpdfInstance) {
|
||||
await smartpdfInstance.stop();
|
||||
smartpdfInstance = null;
|
||||
}
|
||||
}
|
||||
3
ts_document/plugins.ts
Normal file
3
ts_document/plugins.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
import { generateText } from 'ai';
|
||||
import * as smartpdf from '@push.rocks/smartpdf';
|
||||
export { generateText, smartpdf };
|
||||
147
ts_image/index.ts
Normal file
147
ts_image/index.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
export interface IImageGenerateOptions {
|
||||
apiKey: string;
|
||||
prompt: string;
|
||||
model?: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2';
|
||||
quality?: 'low' | 'medium' | 'high' | 'standard' | 'hd' | 'auto';
|
||||
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | '1792x1024' | '1024x1792' | 'auto';
|
||||
style?: 'vivid' | 'natural';
|
||||
background?: 'transparent' | 'opaque' | 'auto';
|
||||
outputFormat?: 'png' | 'jpeg' | 'webp';
|
||||
outputCompression?: number;
|
||||
moderation?: 'low' | 'auto';
|
||||
n?: number;
|
||||
stream?: boolean;
|
||||
partialImages?: number;
|
||||
}
|
||||
|
||||
export interface IImageEditOptions {
|
||||
apiKey: string;
|
||||
image: Buffer;
|
||||
prompt: string;
|
||||
mask?: Buffer;
|
||||
model?: 'gpt-image-1' | 'dall-e-2';
|
||||
quality?: 'low' | 'medium' | 'high' | 'standard' | 'auto';
|
||||
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | 'auto';
|
||||
background?: 'transparent' | 'opaque' | 'auto';
|
||||
outputFormat?: 'png' | 'jpeg' | 'webp';
|
||||
outputCompression?: number;
|
||||
n?: number;
|
||||
stream?: boolean;
|
||||
partialImages?: number;
|
||||
}
|
||||
|
||||
export interface IImageResponse {
|
||||
images: Array<{
|
||||
b64_json?: string;
|
||||
url?: string;
|
||||
revisedPrompt?: string;
|
||||
}>;
|
||||
metadata?: {
|
||||
model: string;
|
||||
quality?: string;
|
||||
size?: string;
|
||||
outputFormat?: string;
|
||||
tokensUsed?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export async function generateImage(options: IImageGenerateOptions): Promise<IImageResponse> {
|
||||
const client = new plugins.OpenAI({ apiKey: options.apiKey });
|
||||
const model = options.model || 'gpt-image-1';
|
||||
|
||||
const requestParams: Record<string, unknown> = {
|
||||
model,
|
||||
prompt: options.prompt,
|
||||
n: options.n || 1,
|
||||
};
|
||||
|
||||
if (model === 'gpt-image-1') {
|
||||
if (options.quality) requestParams.quality = options.quality;
|
||||
if (options.size) requestParams.size = options.size;
|
||||
if (options.background) requestParams.background = options.background;
|
||||
if (options.outputFormat) requestParams.output_format = options.outputFormat;
|
||||
if (options.outputCompression !== undefined) requestParams.output_compression = options.outputCompression;
|
||||
if (options.moderation) requestParams.moderation = options.moderation;
|
||||
if (options.stream !== undefined) requestParams.stream = options.stream;
|
||||
if (options.partialImages !== undefined) requestParams.partial_images = options.partialImages;
|
||||
} else if (model === 'dall-e-3') {
|
||||
if (options.quality) requestParams.quality = options.quality;
|
||||
if (options.size) requestParams.size = options.size;
|
||||
if (options.style) requestParams.style = options.style;
|
||||
requestParams.response_format = 'b64_json';
|
||||
} else if (model === 'dall-e-2') {
|
||||
if (options.size) requestParams.size = options.size;
|
||||
requestParams.response_format = 'b64_json';
|
||||
}
|
||||
|
||||
const result: any = await client.images.generate(requestParams as any);
|
||||
|
||||
const images = (result.data || []).map((img: any) => ({
|
||||
b64_json: img.b64_json,
|
||||
url: img.url,
|
||||
revisedPrompt: img.revised_prompt,
|
||||
}));
|
||||
|
||||
return {
|
||||
images,
|
||||
metadata: {
|
||||
model,
|
||||
quality: result.quality,
|
||||
size: result.size,
|
||||
outputFormat: result.output_format,
|
||||
tokensUsed: result.usage?.total_tokens,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function editImage(options: IImageEditOptions): Promise<IImageResponse> {
|
||||
const client = new plugins.OpenAI({ apiKey: options.apiKey });
|
||||
const model = options.model || 'gpt-image-1';
|
||||
|
||||
const imageFile = await plugins.toFile(options.image, 'image.png', { type: 'image/png' });
|
||||
|
||||
const requestParams: Record<string, unknown> = {
|
||||
model,
|
||||
image: imageFile,
|
||||
prompt: options.prompt,
|
||||
n: options.n || 1,
|
||||
};
|
||||
|
||||
if (options.mask) {
|
||||
requestParams.mask = await plugins.toFile(options.mask, 'mask.png', { type: 'image/png' });
|
||||
}
|
||||
|
||||
if (model === 'gpt-image-1') {
|
||||
if (options.quality) requestParams.quality = options.quality;
|
||||
if (options.size) requestParams.size = options.size;
|
||||
if (options.background) requestParams.background = options.background;
|
||||
if (options.outputFormat) requestParams.output_format = options.outputFormat;
|
||||
if (options.outputCompression !== undefined) requestParams.output_compression = options.outputCompression;
|
||||
if (options.stream !== undefined) requestParams.stream = options.stream;
|
||||
if (options.partialImages !== undefined) requestParams.partial_images = options.partialImages;
|
||||
} else if (model === 'dall-e-2') {
|
||||
if (options.size) requestParams.size = options.size;
|
||||
requestParams.response_format = 'b64_json';
|
||||
}
|
||||
|
||||
const result: any = await client.images.edit(requestParams as any);
|
||||
|
||||
const images = (result.data || []).map((img: any) => ({
|
||||
b64_json: img.b64_json,
|
||||
url: img.url,
|
||||
revisedPrompt: img.revised_prompt,
|
||||
}));
|
||||
|
||||
return {
|
||||
images,
|
||||
metadata: {
|
||||
model,
|
||||
quality: result.quality,
|
||||
size: result.size,
|
||||
outputFormat: result.output_format,
|
||||
tokensUsed: result.usage?.total_tokens,
|
||||
},
|
||||
};
|
||||
}
|
||||
3
ts_image/plugins.ts
Normal file
3
ts_image/plugins.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
import OpenAI from 'openai';
|
||||
import { toFile } from 'openai';
|
||||
export { OpenAI, toFile };
|
||||
120
ts_research/index.ts
Normal file
120
ts_research/index.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
export interface IResearchOptions {
|
||||
apiKey: string;
|
||||
query: string;
|
||||
searchDepth?: 'basic' | 'advanced' | 'deep';
|
||||
maxSources?: number;
|
||||
allowedDomains?: string[];
|
||||
blockedDomains?: string[];
|
||||
}
|
||||
|
||||
export interface IResearchResponse {
|
||||
answer: string;
|
||||
sources: Array<{ url: string; title: string; snippet: string }>;
|
||||
searchQueries?: string[];
|
||||
metadata?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export async function research(options: IResearchOptions): Promise<IResearchResponse> {
|
||||
const client = new plugins.Anthropic({ apiKey: options.apiKey });
|
||||
|
||||
const systemMessage = `You are a research assistant with web search capabilities.
|
||||
Provide comprehensive, well-researched answers with citations and sources.
|
||||
When searching the web, be thorough and cite your sources accurately.`;
|
||||
|
||||
// Build web search tool config
|
||||
const webSearchTool: any = {
|
||||
type: 'web_search_20250305',
|
||||
name: 'web_search',
|
||||
};
|
||||
|
||||
if (options.maxSources) {
|
||||
webSearchTool.max_uses = options.maxSources;
|
||||
}
|
||||
if (options.allowedDomains?.length) {
|
||||
webSearchTool.allowed_domains = options.allowedDomains;
|
||||
} else if (options.blockedDomains?.length) {
|
||||
webSearchTool.blocked_domains = options.blockedDomains;
|
||||
}
|
||||
|
||||
const result = await client.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
system: systemMessage,
|
||||
messages: [
|
||||
{ role: 'user' as const, content: options.query },
|
||||
],
|
||||
max_tokens: 20000,
|
||||
temperature: 0.7,
|
||||
tools: [webSearchTool],
|
||||
});
|
||||
|
||||
// Extract answer, sources, and search queries
|
||||
let answer = '';
|
||||
const sources: Array<{ url: string; title: string; snippet: string }> = [];
|
||||
const searchQueries: string[] = [];
|
||||
|
||||
for (const block of result.content) {
|
||||
const b: any = block;
|
||||
if ('text' in b) {
|
||||
answer += b.text;
|
||||
|
||||
// Extract citations if present
|
||||
if (b.citations && Array.isArray(b.citations)) {
|
||||
for (const citation of b.citations) {
|
||||
if (citation.type === 'web_search_result_location') {
|
||||
sources.push({
|
||||
title: citation.title || '',
|
||||
url: citation.url || '',
|
||||
snippet: citation.cited_text || '',
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (b.type === 'server_tool_use') {
|
||||
if (b.name === 'web_search' && b.input?.query) {
|
||||
searchQueries.push(b.input.query);
|
||||
}
|
||||
} else if (b.type === 'web_search_tool_result') {
|
||||
if (Array.isArray(b.content)) {
|
||||
for (const item of b.content) {
|
||||
if (item.type === 'web_search_result') {
|
||||
if (!sources.some(s => s.url === item.url)) {
|
||||
sources.push({
|
||||
title: item.title || '',
|
||||
url: item.url || '',
|
||||
snippet: '',
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: parse markdown links if no citations found
|
||||
if (sources.length === 0) {
|
||||
const urlRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
|
||||
let match: RegExpExecArray | null;
|
||||
while ((match = urlRegex.exec(answer)) !== null) {
|
||||
sources.push({
|
||||
title: match[1],
|
||||
url: match[2],
|
||||
snippet: '',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const usage: any = result.usage;
|
||||
return {
|
||||
answer,
|
||||
sources,
|
||||
searchQueries: searchQueries.length > 0 ? searchQueries : undefined,
|
||||
metadata: {
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
searchDepth: options.searchDepth || 'basic',
|
||||
tokensUsed: usage?.output_tokens,
|
||||
webSearchesPerformed: usage?.server_tool_use?.web_search_requests ?? 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
2
ts_research/plugins.ts
Normal file
2
ts_research/plugins.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
import Anthropic from '@anthropic-ai/sdk';
|
||||
export { Anthropic };
|
||||
29
ts_vision/index.ts
Normal file
29
ts_vision/index.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import type { LanguageModelV3 } from '@ai-sdk/provider';
|
||||
|
||||
export interface IVisionOptions {
|
||||
model: LanguageModelV3;
|
||||
image: Buffer | Uint8Array;
|
||||
prompt: string;
|
||||
mediaType?: 'image/jpeg' | 'image/png' | 'image/webp' | 'image/gif';
|
||||
}
|
||||
|
||||
export async function analyzeImage(options: IVisionOptions): Promise<string> {
|
||||
const result = await plugins.generateText({
|
||||
model: options.model,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: options.prompt },
|
||||
{
|
||||
type: 'image',
|
||||
image: options.image,
|
||||
mediaType: options.mediaType ?? 'image/jpeg',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
return result.text;
|
||||
}
|
||||
2
ts_vision/plugins.ts
Normal file
2
ts_vision/plugins.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
import { generateText } from 'ai';
|
||||
export { generateText };
|
||||
Reference in New Issue
Block a user