Compare commits

..

9 Commits

Author SHA1 Message Date
e8a2a3ff1b 0.8.0
Some checks failed
Default (tags) / security (push) Failing after 24s
Default (tags) / test (push) Failing after 14s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-10-30 12:11:18 +00:00
cbc9d8d45b feat(provider.anthropic): Add extended thinking modes to AnthropicProvider and apply thinking budgets to API calls 2025-10-30 12:11:18 +00:00
d52e6ae67d 0.7.7
Some checks failed
Default (tags) / security (push) Failing after 23s
Default (tags) / test (push) Failing after 14s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-10-10 07:32:21 +00:00
b9745a1869 fix(MultiModalModel): Lazy-load SmartPdf and guard document processing across providers; ensure SmartPdf is initialized only when needed 2025-10-10 07:32:21 +00:00
af3b61cf74 0.7.6
Some checks failed
Default (tags) / security (push) Failing after 23s
Default (tags) / test (push) Failing after 14s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-10-09 07:00:15 +00:00
8666876879 fix(provider.elevenlabs): Provide default ElevenLabs TTS voice fallback and add local tool/project configs 2025-10-09 07:00:15 +00:00
b78168307b 0.7.5
Some checks failed
Default (tags) / security (push) Failing after 24s
Default (tags) / test (push) Failing after 15s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-10-08 22:56:53 +00:00
bbd8770205 fix(provider.elevenlabs): Update ElevenLabs default TTS model to eleven_v3 and add local Claude permissions file 2025-10-08 22:56:53 +00:00
28bb13dc0c update 2025-10-08 22:49:08 +00:00
19 changed files with 2663 additions and 714 deletions

1
.serena/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/cache

67
.serena/project.yml Normal file
View File

@@ -0,0 +1,67 @@
# language of the project (csharp, python, rust, java, typescript, go, cpp, or ruby)
# * For C, use cpp
# * For JavaScript, use typescript
# Special requirements:
# * csharp: Requires the presence of a .sln file in the project folder.
language: typescript
# whether to use the project's gitignore file to ignore files
# Added on 2025-04-07
ignore_all_files_in_gitignore: true
# list of additional paths to ignore
# same syntax as gitignore, so you can use * and **
# Was previously called `ignored_dirs`, please update your config if you are using that.
# Added (renamed) on 2025-04-07
ignored_paths: []
# whether the project is in read-only mode
# If set to true, all editing tools will be disabled and attempts to use them will result in an error
# Added on 2025-04-18
read_only: false
# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details.
# Below is the complete list of tools for convenience.
# To make sure you have the latest list of tools, and to view their descriptions,
# execute `uv run scripts/print_tool_overview.py`.
#
# * `activate_project`: Activates a project by name.
# * `check_onboarding_performed`: Checks whether project onboarding was already performed.
# * `create_text_file`: Creates/overwrites a file in the project directory.
# * `delete_lines`: Deletes a range of lines within a file.
# * `delete_memory`: Deletes a memory from Serena's project-specific memory store.
# * `execute_shell_command`: Executes a shell command.
# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced.
# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type).
# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type).
# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes.
# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file.
# * `initial_instructions`: Gets the initial instructions for the current project.
# Should only be used in settings where the system prompt cannot be set,
# e.g. in clients you have no control over, like Claude Desktop.
# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol.
# * `insert_at_line`: Inserts content at a given line in a file.
# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol.
# * `list_dir`: Lists files and directories in the given directory (optionally with recursion).
# * `list_memories`: Lists memories in Serena's project-specific memory store.
# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building).
# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context).
# * `read_file`: Reads a file within the project directory.
# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store.
# * `remove_project`: Removes a project from the Serena configuration.
# * `replace_lines`: Replaces a range of lines within a file with new content.
# * `replace_symbol_body`: Replaces the full definition of a symbol.
# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen.
# * `search_for_pattern`: Performs a search for a pattern in the project.
# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase.
# * `switch_modes`: Activates modes by providing a list of their names
# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information.
# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task.
# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed.
# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store.
excluded_tools: []
# initial prompt for the project. It will always be given to the LLM upon activating the project
# (contrary to the memories, which are loaded on demand).
initial_prompt: ""
project_name: "smartai"

View File

@@ -1,5 +1,38 @@
# Changelog
## 2025-10-30 - 0.8.0 - feat(provider.anthropic)
Add extended thinking modes to AnthropicProvider and apply thinking budgets to API calls
- Introduce IAnthropicProviderOptions.extendedThinking to configure thinking modes: 'quick' | 'normal' | 'deep' | 'off'.
- Add getThinkingConfig() helper mapping modes to token budgets (quick=2048, normal=8000, deep=16000, off=0).
- Apply thinking configuration to Anthropic API calls (chat, chatStream, vision, document, research) and increase max_tokens where appropriate (up to 20000).
- Add comprehensive tests (test/test.thinking.anthropic.ts) and update readme.hints.md with usage examples and recommendations.
- Add .claude/settings.local.json for local assistant permissions used in development/testing.
## 2025-10-10 - 0.7.7 - fix(MultiModalModel)
Lazy-load SmartPdf and guard document processing across providers; ensure SmartPdf is initialized only when needed
- Make SmartPdf lazy-loaded: smartpdfInstance is now nullable and no longer started automatically in start()
- Add ensureSmartpdfReady() to initialize and start SmartPdf on demand before document processing
- Providers updated (OpenAI, Anthropic, Ollama, xAI) to call ensureSmartpdfReady() and use the smartpdfInstance for PDF -> image conversion
- stop() now cleans up and nullifies smartpdfInstance to release resources
- Avoids starting a browser/process unless document() is actually used (reduces unnecessary resource usage)
- Add local Claude permissions file (.claude/settings.local.json) for tooling/configuration
## 2025-10-09 - 0.7.6 - fix(provider.elevenlabs)
Provide default ElevenLabs TTS voice fallback and add local tool/project configs
- ElevenLabsProvider: fallback to Samara voice id ('19STyYD15bswVz51nqLf') when no voiceId or defaultVoiceId is provided — avoids throwing an error on TTS calls.
- ElevenLabsProvider: continue to use 'eleven_v3' as the default model for TTS.
- Add .claude/settings.local.json with expanded allowed permissions for local tooling and web search.
- Add .serena/project.yml and .serena/.gitignore to include Serena project configuration and ignore cache.
## 2025-10-08 - 0.7.5 - fix(provider.elevenlabs)
Update ElevenLabs default TTS model to eleven_v3 and add local Claude permissions file
- Changed default ElevenLabs modelId from 'eleven_multilingual_v2' to 'eleven_v3' in ts/provider.elevenlabs.ts to use the newer/default TTS model.
- Added .claude/settings.local.json with a permissions allow-list for local Claude tooling and CI tasks.
## 2025-10-03 - 0.7.4 - fix(provider.anthropic)
Use image/png for embedded PDF images in Anthropic provider and add local Claude settings for development permissions

View File

@@ -1,6 +1,6 @@
{
"name": "@push.rocks/smartai",
"version": "0.7.4",
"version": "0.8.0",
"private": false,
"description": "SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.",
"main": "dist_ts/index.js",
@@ -15,22 +15,23 @@
"buildDocs": "(tsdoc)"
},
"devDependencies": {
"@git.zone/tsbuild": "^2.6.4",
"@git.zone/tsbuild": "^2.6.8",
"@git.zone/tsbundle": "^2.5.1",
"@git.zone/tsrun": "^1.3.3",
"@git.zone/tstest": "^2.3.2",
"@push.rocks/qenv": "^6.1.0",
"@git.zone/tstest": "^2.3.8",
"@push.rocks/qenv": "^6.1.3",
"@push.rocks/tapbundle": "^6.0.3",
"@types/node": "^22.15.17"
"@types/node": "^22.15.17",
"typescript": "^5.9.3"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.59.0",
"@anthropic-ai/sdk": "^0.65.0",
"@push.rocks/smartarray": "^1.1.0",
"@push.rocks/smartfile": "^11.2.5",
"@push.rocks/smartfile": "^11.2.7",
"@push.rocks/smartpath": "^6.0.0",
"@push.rocks/smartpdf": "^4.1.1",
"@push.rocks/smartpromise": "^4.2.3",
"@push.rocks/smartrequest": "^4.2.1",
"@push.rocks/smartrequest": "^4.3.1",
"@push.rocks/webstream": "^1.0.10",
"openai": "^5.12.2"
},

2602
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1 +1,183 @@
# SmartAI Project Hints
## Anthropic Extended Thinking Feature
### Overview
The Anthropic provider now supports extended thinking by default across all methods. Extended thinking enables Claude to spend more time reasoning about complex problems before generating responses, leading to higher quality answers for difficult questions.
### Configuration
Extended thinking is configured at the provider level during instantiation:
```typescript
import * as smartai from '@push.rocks/smartai';
const provider = new smartai.AnthropicProvider({
anthropicToken: 'your-token-here',
extendedThinking: 'normal', // Options: 'quick' | 'normal' | 'deep' | 'off'
});
```
### Thinking Modes
The `extendedThinking` parameter accepts four modes:
| Mode | Budget Tokens | Use Case |
|------|---------------|----------|
| `'quick'` | 2,048 | Lightweight reasoning for simple queries |
| `'normal'` | 8,000 | **Default** - Balanced reasoning for most tasks |
| `'deep'` | 16,000 | Complex reasoning for difficult problems |
| `'off'` | 0 | Disable extended thinking |
**Default Behavior**: If `extendedThinking` is not specified, it defaults to `'normal'` mode (8,000 tokens).
### Supported Methods
Extended thinking is automatically applied to all Anthropic provider methods:
- `chat()` - Synchronous chat
- `chatStream()` - Streaming chat
- `vision()` - Image analysis
- `document()` - PDF document processing
- `research()` - Web research with citations
### Token Budget Constraints
**Important**: The thinking budget must be less than `max_tokens` for the API call. The current `max_tokens` values are:
- `chatStream()`: 20,000 tokens (sufficient for all modes ✓)
- `chat()`: 20,000 tokens (sufficient for all modes ✓)
- `vision()`: 10,000 tokens (sufficient for all modes ✓)
- `document()`: 20,000 tokens (sufficient for all modes ✓)
- `research()`: 20,000 tokens for all searchDepth levels (sufficient ✓)
### Performance and Cost Implications
**Token Usage**:
- You are charged for the **full thinking tokens** generated, not just the summary
- Higher thinking budgets may result in more thorough reasoning but increased costs
- The budget is a **target**, not a strict limit - actual usage may vary
**Response Quality**:
- `'quick'`: Fast responses, basic reasoning
- `'normal'`: Good balance between quality and speed (recommended for most use cases)
- `'deep'`: Highest quality reasoning for complex problems, slower responses
**Recommendations**:
- Start with `'normal'` (default) for general usage
- Use `'deep'` for complex analytical tasks, philosophy, mathematics, or research
- Use `'quick'` for simple factual queries where deep reasoning isn't needed
- Use `'off'` only if you want traditional Claude behavior without extended thinking
### Usage Examples
#### Example 1: Default (Normal Mode)
```typescript
const provider = new smartai.AnthropicProvider({
anthropicToken: process.env.ANTHROPIC_TOKEN,
// extendedThinking defaults to 'normal'
});
await provider.start();
const response = await provider.chat({
systemMessage: 'You are a helpful assistant.',
userMessage: 'Explain the implications of quantum computing.',
messageHistory: [],
});
```
#### Example 2: Deep Thinking for Complex Analysis
```typescript
const provider = new smartai.AnthropicProvider({
anthropicToken: process.env.ANTHROPIC_TOKEN,
extendedThinking: 'deep', // 16,000 token budget
});
await provider.start();
const response = await provider.chat({
systemMessage: 'You are a philosopher and ethicist.',
userMessage: 'Analyze the trolley problem from multiple ethical frameworks.',
messageHistory: [],
});
```
#### Example 3: Quick Mode for Simple Queries
```typescript
const provider = new smartai.AnthropicProvider({
anthropicToken: process.env.ANTHROPIC_TOKEN,
extendedThinking: 'quick', // 2,048 token budget
});
await provider.start();
const response = await provider.chat({
systemMessage: 'You are a helpful assistant.',
userMessage: 'What is the capital of France?',
messageHistory: [],
});
```
#### Example 4: Disable Thinking
```typescript
const provider = new smartai.AnthropicProvider({
anthropicToken: process.env.ANTHROPIC_TOKEN,
extendedThinking: 'off', // No extended thinking
});
await provider.start();
const response = await provider.chat({
systemMessage: 'You are a helpful assistant.',
userMessage: 'Tell me a joke.',
messageHistory: [],
});
```
#### Example 5: Extended Thinking with Vision
```typescript
const provider = new smartai.AnthropicProvider({
anthropicToken: process.env.ANTHROPIC_TOKEN,
extendedThinking: 'normal',
});
await provider.start();
const imageBuffer = await fs.promises.readFile('./image.jpg');
const analysis = await provider.vision({
image: imageBuffer,
prompt: 'Analyze this image in detail and explain what you see.',
});
```
### Testing
Comprehensive tests for extended thinking are available in:
- `test/test.thinking.anthropic.ts` - Tests all thinking modes
Run tests with:
```bash
pnpm test
```
Run specific thinking tests:
```bash
npx tstest test/test.thinking.anthropic.ts --verbose
```
### API Reference
According to Anthropic's documentation:
- Extended thinking is supported on Claude Sonnet 4.5, 4, 3.7, Haiku 4.5, and Opus 4.1, 4
- The current model used is `claude-sonnet-4-5-20250929`
- Minimum thinking budget is 1,024 tokens
- Thinking budget must be less than `max_tokens`
### Implementation Details
The extended thinking feature is implemented via:
1. **Interface**: `IAnthropicProviderOptions.extendedThinking` property
2. **Helper Method**: `getThinkingConfig()` private method that maps modes to token budgets
3. **API Parameter**: Adds `thinking: { type: 'enabled', budget_tokens: number }` to all API calls
The thinking configuration is applied automatically to all API calls when the provider is instantiated.

View File

@@ -5,7 +5,7 @@
[![TypeScript](https://img.shields.io/badge/TypeScript-5.x-blue.svg)](https://www.typescriptlang.org/)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
SmartAI unifies the world's leading AI providers - OpenAI, Anthropic, Perplexity, Ollama, Groq, XAI, and Exo - under a single, elegant TypeScript interface. Build AI applications at lightning speed without vendor lock-in.
SmartAI unifies the world's leading AI providers - OpenAI, Anthropic, Perplexity, Ollama, Groq, XAI, Exo, and ElevenLabs - under a single, elegant TypeScript interface. Build AI applications at lightning speed without vendor lock-in.
## 🎯 Why SmartAI?
@@ -28,7 +28,11 @@ import { SmartAi } from '@push.rocks/smartai';
// Initialize with your favorite providers
const ai = new SmartAi({
openaiToken: 'sk-...',
anthropicToken: 'sk-ant-...'
anthropicToken: 'sk-ant-...',
elevenlabsToken: 'sk-...',
elevenlabs: {
defaultVoiceId: '19STyYD15bswVz51nqLf' // Optional: Samara voice
}
});
await ai.start();
@@ -49,6 +53,7 @@ Choose the right provider for your use case:
|----------|:----:|:---------:|:---:|:------:|:---------:|:--------:|:------:|------------|
| **OpenAI** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | • gpt-image-1<br>• DALL-E 3<br>• Deep research API |
| **Anthropic** | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | • Claude Sonnet 4.5<br>• Superior reasoning<br>• Web search API |
| **ElevenLabs** | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | • Premium TTS<br>• 70+ languages<br>• Natural voices |
| **Ollama** | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | • 100% local<br>• Privacy-first<br>• No API costs |
| **XAI** | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | • Grok models<br>• Real-time data<br>• Uncensored |
| **Perplexity** | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | • Web-aware<br>• Research-focused<br>• Sonar Pro models |
@@ -105,13 +110,27 @@ while (true) {
### 🎙️ Text-to-Speech
Generate natural voices with OpenAI:
Generate natural voices with OpenAI or ElevenLabs:
```typescript
// OpenAI TTS
const audioStream = await ai.openaiProvider.audio({
message: 'Welcome to the future of AI development!'
});
// ElevenLabs TTS - Premium quality, natural voices (uses v3 by default)
const elevenLabsAudio = await ai.elevenlabsProvider.audio({
message: 'Experience the most lifelike text to speech technology.',
voiceId: '19STyYD15bswVz51nqLf', // Optional: Samara voice
modelId: 'eleven_v3', // Optional: defaults to eleven_v3 (70+ languages, most expressive)
voiceSettings: { // Optional: fine-tune voice characteristics
stability: 0.5, // 0-1: Speech consistency
similarity_boost: 0.8, // 0-1: Voice similarity to original
style: 0.0, // 0-1: Expressiveness (higher = more expressive)
use_speaker_boost: true // Enhanced clarity
}
});
// Stream directly to speakers
audioStream.pipe(speakerOutput);
@@ -548,6 +567,7 @@ npm install @push.rocks/smartai
export OPENAI_API_KEY=sk-...
export ANTHROPIC_API_KEY=sk-ant-...
export PERPLEXITY_API_KEY=pplx-...
export ELEVENLABS_API_KEY=sk-...
# ... etc
```
@@ -574,6 +594,7 @@ export PERPLEXITY_API_KEY=pplx-...
| **Complex Reasoning** | Anthropic | Superior logical thinking, safer outputs |
| **Research & Facts** | Perplexity | Web-aware, provides citations |
| **Deep Research** | OpenAI | Deep Research API with comprehensive analysis |
| **Premium TTS** | ElevenLabs | Most natural voices, 70+ languages, superior quality (v3) |
| **Speed Critical** | Groq | 10x faster inference, sub-second responses |
| **Privacy Critical** | Ollama | 100% local, no data leaves your servers |
| **Real-time Data** | XAI | Access to current information |

View File

@@ -0,0 +1,54 @@
import { expect, tap } from '@push.rocks/tapbundle';
import * as qenv from '@push.rocks/qenv';
import * as smartfile from '@push.rocks/smartfile';
const testQenv = new qenv.Qenv('./', './.nogit/');
import * as smartai from '../ts/index.js';
let testSmartai: smartai.SmartAi;
tap.test('ElevenLabs Audio: should create a smartai instance with ElevenLabs provider', async () => {
testSmartai = new smartai.SmartAi({
elevenlabsToken: await testQenv.getEnvVarOnDemand('ELEVENLABS_TOKEN'),
elevenlabs: {
defaultVoiceId: '19STyYD15bswVz51nqLf',
},
});
await testSmartai.start();
});
tap.test('ElevenLabs Audio: should create audio response', async () => {
const audioStream = await testSmartai.elevenlabsProvider.audio({
message: 'Welcome to SmartAI, the unified interface for the world\'s leading artificial intelligence providers. SmartAI brings together OpenAI, Anthropic, Perplexity, and ElevenLabs under a single elegant TypeScript API. Whether you need text generation, vision analysis, document processing, or premium text-to-speech capabilities, SmartAI provides a consistent and powerful interface for all your AI needs. Build intelligent applications at lightning speed without vendor lock-in.',
});
const chunks: Uint8Array[] = [];
for await (const chunk of audioStream) {
chunks.push(chunk as Uint8Array);
}
const audioBuffer = Buffer.concat(chunks);
await smartfile.fs.toFs(audioBuffer, './.nogit/testoutput_elevenlabs.mp3');
console.log(`Audio Buffer length: ${audioBuffer.length}`);
expect(audioBuffer.length).toBeGreaterThan(0);
});
tap.test('ElevenLabs Audio: should create audio with custom voice', async () => {
const audioStream = await testSmartai.elevenlabsProvider.audio({
message: 'Testing with a different voice.',
voiceId: 'JBFqnCBsd6RMkjVDRZzb',
});
const chunks: Uint8Array[] = [];
for await (const chunk of audioStream) {
chunks.push(chunk as Uint8Array);
}
const audioBuffer = Buffer.concat(chunks);
await smartfile.fs.toFs(audioBuffer, './.nogit/testoutput_elevenlabs_custom.mp3');
console.log(`Audio Buffer length (custom voice): ${audioBuffer.length}`);
expect(audioBuffer.length).toBeGreaterThan(0);
});
tap.test('ElevenLabs Audio: should stop the smartai instance', async () => {
await testSmartai.stop();
});
export default tap.start();

View File

@@ -0,0 +1,151 @@
import { expect, tap } from '@push.rocks/tapbundle';
import * as qenv from '@push.rocks/qenv';
const testQenv = new qenv.Qenv('./', './.nogit/');
import * as smartai from '../ts/index.js';
let anthropicProviderQuick: smartai.AnthropicProvider;
let anthropicProviderNormal: smartai.AnthropicProvider;
let anthropicProviderDeep: smartai.AnthropicProvider;
let anthropicProviderOff: smartai.AnthropicProvider;
// Test 'quick' mode
tap.test('Extended Thinking: should create Anthropic provider with quick mode', async () => {
anthropicProviderQuick = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
extendedThinking: 'quick',
});
await anthropicProviderQuick.start();
expect(anthropicProviderQuick).toBeInstanceOf(smartai.AnthropicProvider);
});
tap.test('Extended Thinking: should chat with quick mode (2048 tokens)', async () => {
const userMessage = 'Explain quantum entanglement in simple terms.';
const response = await anthropicProviderQuick.chat({
systemMessage: 'You are a helpful physics teacher.',
userMessage: userMessage,
messageHistory: [],
});
console.log(`Quick Mode - User: ${userMessage}`);
console.log(`Quick Mode - Response length: ${response.message.length} chars`);
expect(response.role).toEqual('assistant');
expect(response.message).toBeTruthy();
expect(response.message.toLowerCase()).toInclude('quantum');
});
tap.test('Extended Thinking: should stop quick mode provider', async () => {
await anthropicProviderQuick.stop();
});
// Test 'normal' mode (default)
tap.test('Extended Thinking: should create Anthropic provider with normal mode (default)', async () => {
anthropicProviderNormal = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
// extendedThinking not specified, should default to 'normal'
});
await anthropicProviderNormal.start();
expect(anthropicProviderNormal).toBeInstanceOf(smartai.AnthropicProvider);
});
tap.test('Extended Thinking: should chat with normal mode (8000 tokens default)', async () => {
const userMessage = 'What are the implications of the P vs NP problem?';
const response = await anthropicProviderNormal.chat({
systemMessage: 'You are a helpful computer science expert.',
userMessage: userMessage,
messageHistory: [],
});
console.log(`Normal Mode - User: ${userMessage}`);
console.log(`Normal Mode - Response length: ${response.message.length} chars`);
expect(response.role).toEqual('assistant');
expect(response.message).toBeTruthy();
expect(response.message.length).toBeGreaterThan(50);
});
tap.test('Extended Thinking: should stop normal mode provider', async () => {
await anthropicProviderNormal.stop();
});
// Test 'deep' mode
tap.test('Extended Thinking: should create Anthropic provider with deep mode', async () => {
anthropicProviderDeep = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
extendedThinking: 'deep',
});
await anthropicProviderDeep.start();
expect(anthropicProviderDeep).toBeInstanceOf(smartai.AnthropicProvider);
});
tap.test('Extended Thinking: should chat with deep mode (16000 tokens)', async () => {
const userMessage = 'Analyze the philosophical implications of artificial consciousness.';
const response = await anthropicProviderDeep.chat({
systemMessage: 'You are a philosopher and cognitive scientist.',
userMessage: userMessage,
messageHistory: [],
});
console.log(`Deep Mode - User: ${userMessage}`);
console.log(`Deep Mode - Response length: ${response.message.length} chars`);
expect(response.role).toEqual('assistant');
expect(response.message).toBeTruthy();
expect(response.message.length).toBeGreaterThan(100);
});
tap.test('Extended Thinking: should stop deep mode provider', async () => {
await anthropicProviderDeep.stop();
});
// Test 'off' mode
tap.test('Extended Thinking: should create Anthropic provider with thinking disabled', async () => {
anthropicProviderOff = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
extendedThinking: 'off',
});
await anthropicProviderOff.start();
expect(anthropicProviderOff).toBeInstanceOf(smartai.AnthropicProvider);
});
tap.test('Extended Thinking: should chat with thinking disabled', async () => {
const userMessage = 'What is 2 + 2?';
const response = await anthropicProviderOff.chat({
systemMessage: 'You are a helpful assistant.',
userMessage: userMessage,
messageHistory: [],
});
console.log(`Thinking Off - User: ${userMessage}`);
console.log(`Thinking Off - Response: ${response.message}`);
expect(response.role).toEqual('assistant');
expect(response.message).toBeTruthy();
expect(response.message).toInclude('4');
});
tap.test('Extended Thinking: should stop off mode provider', async () => {
await anthropicProviderOff.stop();
});
// Test with vision method
tap.test('Extended Thinking: should work with vision method', async () => {
const provider = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
extendedThinking: 'normal',
});
await provider.start();
// Create a simple test image (1x1 red pixel PNG)
const redPixelPng = Buffer.from(
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==',
'base64'
);
const response = await provider.vision({
image: redPixelPng,
prompt: 'What color is this image?',
});
console.log(`Vision with Thinking - Response: ${response}`);
expect(response).toBeTruthy();
expect(response.toLowerCase()).toInclude('red');
await provider.stop();
});
export default tap.start();

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartai',
version: '0.7.4',
version: '0.8.0',
description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.'
}

View File

@@ -111,19 +111,30 @@ export interface ImageResponse {
export abstract class MultiModalModel {
/**
* SmartPdf instance for document processing
* Shared across all methods that need PDF functionality
* Lazy-loaded only when PDF processing is needed to avoid starting browser unnecessarily
*/
protected smartpdfInstance: plugins.smartpdf.SmartPdf;
protected smartpdfInstance: plugins.smartpdf.SmartPdf | null = null;
/**
* Ensures SmartPdf instance is initialized and ready
* Call this before using smartpdfInstance in document processing methods
*/
protected async ensureSmartpdfReady(): Promise<void> {
if (!this.smartpdfInstance) {
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
await this.smartpdfInstance.start();
}
}
/**
* Initializes the model and any necessary resources
* Should be called before using any other methods
*/
public async start(): Promise<void> {
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
await this.smartpdfInstance.start();
// SmartPdf is now lazy-loaded only when needed for PDF processing
// This avoids starting a browser unless document() method is actually used
}
/**
* Cleans up any resources used by the model
* Should be called when the model is no longer needed
@@ -131,6 +142,7 @@ export abstract class MultiModalModel {
public async stop(): Promise<void> {
if (this.smartpdfInstance) {
await this.smartpdfInstance.stop();
this.smartpdfInstance = null;
}
}

View File

@@ -96,6 +96,18 @@ export class Conversation {
return conversation;
}
public static async createWithElevenlabs(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.elevenlabsProvider) {
throw new Error('ElevenLabs provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
// INSTANCE
smartaiRef: SmartAi
private systemMessage: string;

View File

@@ -1,6 +1,7 @@
import { Conversation } from './classes.conversation.js';
import * as plugins from './plugins.js';
import { AnthropicProvider } from './provider.anthropic.js';
import { ElevenLabsProvider } from './provider.elevenlabs.js';
import { OllamaProvider } from './provider.ollama.js';
import { OpenAiProvider } from './provider.openai.js';
import { PerplexityProvider } from './provider.perplexity.js';
@@ -15,6 +16,7 @@ export interface ISmartAiOptions {
perplexityToken?: string;
groqToken?: string;
xaiToken?: string;
elevenlabsToken?: string;
exo?: {
baseUrl?: string;
apiKey?: string;
@@ -24,9 +26,13 @@ export interface ISmartAiOptions {
model?: string;
visionModel?: string;
};
elevenlabs?: {
defaultVoiceId?: string;
defaultModelId?: string;
};
}
export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'xai';
export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'xai' | 'elevenlabs';
export class SmartAi {
public options: ISmartAiOptions;
@@ -38,6 +44,7 @@ export class SmartAi {
public exoProvider: ExoProvider;
public groqProvider: GroqProvider;
public xaiProvider: XAIProvider;
public elevenlabsProvider: ElevenLabsProvider;
constructor(optionsArg: ISmartAiOptions) {
this.options = optionsArg;
@@ -74,6 +81,14 @@ export class SmartAi {
});
await this.xaiProvider.start();
}
if (this.options.elevenlabsToken) {
this.elevenlabsProvider = new ElevenLabsProvider({
elevenlabsToken: this.options.elevenlabsToken,
defaultVoiceId: this.options.elevenlabs?.defaultVoiceId,
defaultModelId: this.options.elevenlabs?.defaultModelId,
});
await this.elevenlabsProvider.start();
}
if (this.options.ollama) {
this.ollamaProvider = new OllamaProvider({
baseUrl: this.options.ollama.baseUrl,
@@ -107,6 +122,9 @@ export class SmartAi {
if (this.xaiProvider) {
await this.xaiProvider.stop();
}
if (this.elevenlabsProvider) {
await this.elevenlabsProvider.stop();
}
if (this.ollamaProvider) {
await this.ollamaProvider.stop();
}
@@ -134,6 +152,8 @@ export class SmartAi {
return Conversation.createWithGroq(this);
case 'xai':
return Conversation.createWithXai(this);
case 'elevenlabs':
return Conversation.createWithElevenlabs(this);
default:
throw new Error('Provider not available');
}

View File

@@ -7,3 +7,4 @@ export * from './provider.groq.js';
export * from './provider.ollama.js';
export * from './provider.xai.js';
export * from './provider.exo.js';
export * from './provider.elevenlabs.js';

View File

@@ -20,6 +20,7 @@ export interface IAnthropicProviderOptions {
enableWebSearch?: boolean;
searchDomainAllowList?: string[];
searchDomainBlockList?: string[];
extendedThinking?: 'quick' | 'normal' | 'deep' | 'off';
}
export class AnthropicProvider extends MultiModalModel {
@@ -42,6 +43,25 @@ export class AnthropicProvider extends MultiModalModel {
await super.stop();
}
/**
* Returns the thinking configuration based on provider options.
* Defaults to 'normal' mode (8000 tokens) if not specified.
*/
private getThinkingConfig(): { type: 'enabled'; budget_tokens: number } | undefined {
const mode = this.options.extendedThinking ?? 'normal';
const budgetMap = {
quick: 2048,
normal: 8000,
deep: 16000,
off: 0,
};
const budget = budgetMap[mode];
return budget > 0 ? { type: 'enabled', budget_tokens: budget } : undefined;
}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
@@ -76,12 +96,14 @@ export class AnthropicProvider extends MultiModalModel {
// If we have a complete message, send it to Anthropic
if (currentMessage) {
const thinkingConfig = this.getThinkingConfig();
const stream = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929',
messages: [{ role: currentMessage.role, content: currentMessage.content }],
system: '',
stream: true,
max_tokens: 4000,
max_tokens: 20000,
...(thinkingConfig && { thinking: thinkingConfig }),
});
// Process each chunk from Anthropic
@@ -120,6 +142,7 @@ export class AnthropicProvider extends MultiModalModel {
content: msg.content
}));
const thinkingConfig = this.getThinkingConfig();
const result = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929',
system: optionsArg.systemMessage,
@@ -127,7 +150,8 @@ export class AnthropicProvider extends MultiModalModel {
...messages,
{ role: 'user' as const, content: optionsArg.userMessage }
],
max_tokens: 4000,
max_tokens: 20000,
...(thinkingConfig && { thinking: thinkingConfig }),
});
// Extract text content from the response
@@ -167,13 +191,15 @@ export class AnthropicProvider extends MultiModalModel {
}
];
const thinkingConfig = this.getThinkingConfig();
const result = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929',
messages: [{
role: 'user',
content
}],
max_tokens: 1024
max_tokens: 10000,
...(thinkingConfig && { thinking: thinkingConfig }),
});
// Extract text content from the response
@@ -192,11 +218,14 @@ export class AnthropicProvider extends MultiModalModel {
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// Convert PDF documents to images using SmartPDF
let documentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
}
@@ -226,6 +255,7 @@ export class AnthropicProvider extends MultiModalModel {
});
}
const thinkingConfig = this.getThinkingConfig();
const result = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929',
system: optionsArg.systemMessage,
@@ -233,7 +263,8 @@ export class AnthropicProvider extends MultiModalModel {
...messages,
{ role: 'user', content }
],
max_tokens: 4096
max_tokens: 20000,
...(thinkingConfig && { thinking: thinkingConfig }),
});
// Extract text content from the response
@@ -283,8 +314,8 @@ export class AnthropicProvider extends MultiModalModel {
}
// Configure the request based on search depth
const maxTokens = optionsArg.searchDepth === 'deep' ? 8192 :
optionsArg.searchDepth === 'advanced' ? 6144 : 4096;
const maxTokens = optionsArg.searchDepth === 'deep' ? 20000 :
optionsArg.searchDepth === 'advanced' ? 20000 : 20000;
// Create the research request
const requestParams: any = {
@@ -305,6 +336,12 @@ export class AnthropicProvider extends MultiModalModel {
requestParams.tools = tools;
}
// Add thinking configuration if enabled
const thinkingConfig = this.getThinkingConfig();
if (thinkingConfig) {
requestParams.thinking = thinkingConfig;
}
// Execute the research request
const result = await this.anthropicApiClient.messages.create(requestParams);

114
ts/provider.elevenlabs.ts Normal file
View File

@@ -0,0 +1,114 @@
import * as plugins from './plugins.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
export interface IElevenLabsProviderOptions {
elevenlabsToken: string;
defaultVoiceId?: string;
defaultModelId?: string;
}
export interface IElevenLabsVoiceSettings {
stability?: number;
similarity_boost?: number;
style?: number;
use_speaker_boost?: boolean;
}
export class ElevenLabsProvider extends MultiModalModel {
private options: IElevenLabsProviderOptions;
private baseUrl: string = 'https://api.elevenlabs.io/v1';
constructor(optionsArg: IElevenLabsProviderOptions) {
super();
this.options = optionsArg;
}
public async start() {
await super.start();
}
public async stop() {
await super.stop();
}
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
throw new Error('ElevenLabs does not support chat functionality. This provider is specialized for text-to-speech only.');
}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
throw new Error('ElevenLabs does not support chat streaming functionality. This provider is specialized for text-to-speech only.');
}
public async audio(optionsArg: {
message: string;
voiceId?: string;
modelId?: string;
voiceSettings?: IElevenLabsVoiceSettings;
}): Promise<NodeJS.ReadableStream> {
// Use Samara voice as default fallback
const voiceId = optionsArg.voiceId || this.options.defaultVoiceId || '19STyYD15bswVz51nqLf';
const modelId = optionsArg.modelId || this.options.defaultModelId || 'eleven_v3';
const url = `${this.baseUrl}/text-to-speech/${voiceId}`;
const requestBody: any = {
text: optionsArg.message,
model_id: modelId,
};
if (optionsArg.voiceSettings) {
requestBody.voice_settings = optionsArg.voiceSettings;
}
const response = await plugins.smartrequest.SmartRequest.create()
.url(url)
.header('xi-api-key', this.options.elevenlabsToken)
.json(requestBody)
.autoDrain(false)
.post();
if (!response.ok) {
const errorText = await response.text();
throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText} - ${errorText}`);
}
const nodeStream = response.streamNode();
return nodeStream;
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
throw new Error('ElevenLabs does not support vision functionality. This provider is specialized for text-to-speech only.');
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: any[];
}): Promise<{ message: any }> {
throw new Error('ElevenLabs does not support document processing. This provider is specialized for text-to-speech only.');
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
throw new Error('ElevenLabs does not support research capabilities. This provider is specialized for text-to-speech only.');
}
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('ElevenLabs does not support image generation. This provider is specialized for text-to-speech only.');
}
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('ElevenLabs does not support image editing. This provider is specialized for text-to-speech only.');
}
}

View File

@@ -216,11 +216,14 @@ export class OllamaProvider extends MultiModalModel {
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// Convert PDF documents to images using SmartPDF
let documentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
}

View File

@@ -173,11 +173,14 @@ export class OpenAiProvider extends MultiModalModel {
content: any;
}[];
}) {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
let pdfDocumentImageBytesArray: Uint8Array[] = [];
// Convert each PDF into one or more image byte arrays.
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
}

View File

@@ -149,11 +149,14 @@ export class XAIProvider extends MultiModalModel {
pdfDocuments: Uint8Array[];
messageHistory: { role: string; content: string; }[];
}): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// First convert PDF documents to images
let pdfDocumentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument);
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
}