Compare commits

...

27 Commits

Author SHA1 Message Date
d296a1b676 v0.13.2
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-01-20 02:50:46 +00:00
f74d1cf2ba fix(repo): no changes detected in diff; nothing to commit 2026-01-20 02:50:46 +00:00
b29d7f5df3 fix(classes.smartai): use IOllamaModelOptions type for defaultOptions instead of inline type 2026-01-20 02:50:32 +00:00
00b8312fa7 v0.13.1
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-01-20 02:40:29 +00:00
4be91d678a fix(): no changes detected; no release required 2026-01-20 02:40:29 +00:00
1156320546 feat(provider.ollama): add native tool calling support for Ollama API
- Add IOllamaTool and IOllamaToolCall types for native function calling
- Add think parameter to IOllamaModelOptions for reasoning models (GPT-OSS, QwQ)
- Add tools parameter to IOllamaChatOptions
- Add toolCalls to response interfaces (IOllamaStreamChunk, IOllamaChatResponse)
- Update chat(), chatStreamResponse(), collectStreamResponse(), chatWithOptions() to support native tools
- Parse tool_calls from Ollama API responses
- Add support for tool message role in conversation history
2026-01-20 02:39:28 +00:00
7cb9bc24dc v0.13.0
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-01-20 02:03:20 +00:00
9ad039f77b feat(provider.ollama): add chain-of-thought reasoning support to chat messages and Ollama provider 2026-01-20 02:03:20 +00:00
6c6652d75d v0.12.1
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-01-20 01:27:52 +00:00
2040b3c629 fix(docs): update documentation: clarify provider capabilities, add provider capabilities summary, polish examples and formatting, and remove Serena project config 2026-01-20 01:27:52 +00:00
ae8d3ccf33 v0.12.0
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-01-20 01:10:27 +00:00
3b900d0ba9 feat(ollama): add support for base64-encoded images in chat messages and forward them to the Ollama provider 2026-01-20 01:10:27 +00:00
d49152390f v0.11.1 2026-01-20 00:37:59 +00:00
d615ec9227 feat(streaming): add chatStreaming method with token callback for real-time generation progress
- Add StreamingChatOptions interface with onToken callback
- Add optional chatStreaming method to MultiModalModel abstract class
- Implement chatStreaming in OllamaProvider using collectStreamResponse
2026-01-20 00:37:49 +00:00
dfa863ee7d v0.11.0
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-01-20 00:12:21 +00:00
c84ede1f1d feat(ollama): support defaultOptions and defaultTimeout for ollama provider 2026-01-20 00:12:21 +00:00
4937dbf6ab v0.10.1
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-01-20 00:03:06 +00:00
8cb052449e fix(): no changes detected — no release necessary 2026-01-20 00:03:06 +00:00
126e9b239b feat(OllamaProvider): add model options, streaming support, and thinking tokens
- Add IOllamaModelOptions interface for runtime options (num_ctx, temperature, etc.)
- Extend IOllamaProviderOptions with defaultOptions and defaultTimeout
- Add IOllamaChatOptions for per-request overrides
- Add IOllamaStreamChunk and IOllamaChatResponse interfaces
- Add chatStreamResponse() for async iteration with options
- Add collectStreamResponse() for streaming with progress callback
- Add chatWithOptions() for non-streaming with full options
- Update chat() to use defaultOptions and defaultTimeout
2026-01-20 00:02:45 +00:00
a556053510 v0.10.0
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-01-18 22:11:52 +00:00
e4dc81edc9 feat(mistral): add Mistral provider with native PDF OCR and chat integration 2026-01-18 22:11:52 +00:00
6f79dc3535 v0.9.0
Some checks failed
Default (tags) / security (push) Failing after 1s
Default (tags) / test (push) Failing after 1s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-01-18 16:26:16 +00:00
b4ced080f2 feat(providers): Add Anthropic extended thinking and adapt providers to new streaming/file APIs; bump dependencies and update docs, tests and configuration 2026-01-18 16:26:16 +00:00
e8a2a3ff1b 0.8.0
Some checks failed
Default (tags) / security (push) Failing after 24s
Default (tags) / test (push) Failing after 14s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-10-30 12:11:18 +00:00
cbc9d8d45b feat(provider.anthropic): Add extended thinking modes to AnthropicProvider and apply thinking budgets to API calls 2025-10-30 12:11:18 +00:00
d52e6ae67d 0.7.7
Some checks failed
Default (tags) / security (push) Failing after 23s
Default (tags) / test (push) Failing after 14s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-10-10 07:32:21 +00:00
b9745a1869 fix(MultiModalModel): Lazy-load SmartPdf and guard document processing across providers; ensure SmartPdf is initialized only when needed 2025-10-10 07:32:21 +00:00
39 changed files with 3865 additions and 3593 deletions

7
.gitignore vendored
View File

@@ -3,7 +3,6 @@
# artifacts # artifacts
coverage/ coverage/
public/ public/
pages/
# installs # installs
node_modules/ node_modules/
@@ -17,4 +16,8 @@ node_modules/
dist/ dist/
dist_*/ dist_*/
# custom # AI
.claude/
.serena/
#------# custom

1
.serena/.gitignore vendored
View File

@@ -1 +0,0 @@
/cache

View File

@@ -1,67 +0,0 @@
# language of the project (csharp, python, rust, java, typescript, go, cpp, or ruby)
# * For C, use cpp
# * For JavaScript, use typescript
# Special requirements:
# * csharp: Requires the presence of a .sln file in the project folder.
language: typescript
# whether to use the project's gitignore file to ignore files
# Added on 2025-04-07
ignore_all_files_in_gitignore: true
# list of additional paths to ignore
# same syntax as gitignore, so you can use * and **
# Was previously called `ignored_dirs`, please update your config if you are using that.
# Added (renamed) on 2025-04-07
ignored_paths: []
# whether the project is in read-only mode
# If set to true, all editing tools will be disabled and attempts to use them will result in an error
# Added on 2025-04-18
read_only: false
# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details.
# Below is the complete list of tools for convenience.
# To make sure you have the latest list of tools, and to view their descriptions,
# execute `uv run scripts/print_tool_overview.py`.
#
# * `activate_project`: Activates a project by name.
# * `check_onboarding_performed`: Checks whether project onboarding was already performed.
# * `create_text_file`: Creates/overwrites a file in the project directory.
# * `delete_lines`: Deletes a range of lines within a file.
# * `delete_memory`: Deletes a memory from Serena's project-specific memory store.
# * `execute_shell_command`: Executes a shell command.
# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced.
# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type).
# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type).
# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes.
# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file.
# * `initial_instructions`: Gets the initial instructions for the current project.
# Should only be used in settings where the system prompt cannot be set,
# e.g. in clients you have no control over, like Claude Desktop.
# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol.
# * `insert_at_line`: Inserts content at a given line in a file.
# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol.
# * `list_dir`: Lists files and directories in the given directory (optionally with recursion).
# * `list_memories`: Lists memories in Serena's project-specific memory store.
# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building).
# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context).
# * `read_file`: Reads a file within the project directory.
# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store.
# * `remove_project`: Removes a project from the Serena configuration.
# * `replace_lines`: Replaces a range of lines within a file with new content.
# * `replace_symbol_body`: Replaces the full definition of a symbol.
# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen.
# * `search_for_pattern`: Performs a search for a pattern in the project.
# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase.
# * `switch_modes`: Activates modes by providing a list of their names
# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information.
# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task.
# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed.
# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store.
excluded_tools: []
# initial prompt for the project. It will always be given to the LLM upon activating the project
# (contrary to the memories, which are loaded on demand).
initial_prompt: ""
project_name: "smartai"

View File

@@ -1,6 +1,97 @@
# Changelog # Changelog
## 2026-01-20 - 0.13.2 - fix(repo)
no changes detected in diff; nothing to commit
- Git diff reported no changes — no files modified
- No code or dependency updates detected, so no version bump required
## 2026-01-20 - 0.13.1 - fix()
no changes detected; no release required
- No changes found in the provided git diff
- Current package version is 0.13.0
## 2026-01-20 - 0.13.0 - feat(provider.ollama)
add chain-of-thought reasoning support to chat messages and Ollama provider
- Added optional reasoning?: string to chat message and chat response interfaces to surface chain-of-thought data.
- Propagates reasoning from message history into formatted requests sent to Ollama.
- Maps Ollama response fields (thinking or reasoning) into ChatResponse.reasoning so downstream code can access model reasoning output.
## 2026-01-20 - 0.12.1 - fix(docs)
update documentation: clarify provider capabilities, add provider capabilities summary, polish examples and formatting, and remove Serena project config
- Removed .serena/project.yml and cleaned up .serena/.gitignore
- Added Provider Capabilities Summary and expanded/clarified provider tables in readme.md and readme.hints.md
- Clarified Anthropic extended thinking details and Mistral native PDF OCR notes
- Polished example code snippets and fixed minor typos/formatting (GPT-5 mention, ElevenLabs model note, consistent punctuation)
- Updated test command references and other README usage instructions
## 2026-01-20 - 0.12.0 - feat(ollama)
add support for base64-encoded images in chat messages and forward them to the Ollama provider
- Add optional images?: string[] to ChatMessage and ChatOptions interfaces (multimodal/vision support)
- Propagate images from messageHistory and ChatOptions to the Ollama API payload in chat, chatStreaming, and streaming handlers
- Changes are non-breaking: images are optional and existing behavior is preserved when absent
## 2026-01-20 - 0.11.0 - feat(ollama)
support defaultOptions and defaultTimeout for ollama provider
- Added ollama.defaultOptions object with fields: num_ctx, temperature, top_k, top_p, repeat_penalty, num_predict, stop, seed
- Added ollama.defaultTimeout option
- Pass defaultOptions and defaultTimeout into OllamaProvider constructor when initializing the provider
- Non-breaking change: existing behavior preserved if new fields are undefined
## 2026-01-20 - 0.10.1 - fix()
no changes detected — no release necessary
- No files changed in the provided diff; there are no code, documentation, or configuration modifications to release.
## 2026-01-18 - 0.10.0 - feat(mistral)
add Mistral provider with native PDF OCR and chat integration
- Adds dependency @mistralai/mistralai
- Implements ts/provider.mistral.ts providing chat() and document() (OCR) functionality
- Registers and exposes MistralProvider in SmartAi (options, lifecycle, conversation routing)
- Adds unit/integration tests: test.chat.mistral.ts and test.document.mistral.ts
- Updates readme.hints.md with Mistral usage, configuration and notes
## 2026-01-18 - 0.9.0 - feat(providers)
Add Anthropic extended thinking and adapt providers to new streaming/file APIs; bump dependencies and update docs, tests and configuration
- Add IAnthropicProviderOptions.extendedThinking with thinking modes (quick/normal/deep/off) and getThinkingConfig mapping budgets; apply thinking to Anthropic requests and omit temperature when thinking is enabled.
- Update Anthropic research flow to include thinking configuration and conditionally set temperature.
- OpenAI image editing: use openai.toFile to convert image/mask Buffers to uploadable files (image/png) before sending.
- ElevenLabs streaming: switch from response.streamNode() to response.stream() and convert web stream to Node stream using Readable.fromWeb().
- Upgrade dependencies and dev tools: @anthropic-ai/sdk ^0.71.2, @push.rocks/smartrequest ^5.0.1, @git.zone/tsbuild and related @git.zone packages, and other bumps in package.json.
- Tests and test imports updated to use @git.zone/tstest/tapbundle; many test files adjusted accordingly.
- Docs and hints updated: README and readme.hints.md include extended thinking docs, examples, formatting fixes, security/issue reporting guidance, and trademark/license clarifications.
- Project config tweaks: package build script changed, tsconfig baseUrl/paths added, npmextra.json reorganized (release registries added), .gitignore updated to ignore .claude/.serena local tooling files.
## 2025-10-30 - 0.8.0 - feat(provider.anthropic)
Add extended thinking modes to AnthropicProvider and apply thinking budgets to API calls
- Introduce IAnthropicProviderOptions.extendedThinking to configure thinking modes: 'quick' | 'normal' | 'deep' | 'off'.
- Add getThinkingConfig() helper mapping modes to token budgets (quick=2048, normal=8000, deep=16000, off=0).
- Apply thinking configuration to Anthropic API calls (chat, chatStream, vision, document, research) and increase max_tokens where appropriate (up to 20000).
- Add comprehensive tests (test/test.thinking.anthropic.ts) and update readme.hints.md with usage examples and recommendations.
- Add .claude/settings.local.json for local assistant permissions used in development/testing.
## 2025-10-10 - 0.7.7 - fix(MultiModalModel)
Lazy-load SmartPdf and guard document processing across providers; ensure SmartPdf is initialized only when needed
- Make SmartPdf lazy-loaded: smartpdfInstance is now nullable and no longer started automatically in start()
- Add ensureSmartpdfReady() to initialize and start SmartPdf on demand before document processing
- Providers updated (OpenAI, Anthropic, Ollama, xAI) to call ensureSmartpdfReady() and use the smartpdfInstance for PDF -> image conversion
- stop() now cleans up and nullifies smartpdfInstance to release resources
- Avoids starting a browser/process unless document() is actually used (reduces unnecessary resource usage)
- Add local Claude permissions file (.claude/settings.local.json) for tooling/configuration
## 2025-10-09 - 0.7.6 - fix(provider.elevenlabs) ## 2025-10-09 - 0.7.6 - fix(provider.elevenlabs)
Provide default ElevenLabs TTS voice fallback and add local tool/project configs Provide default ElevenLabs TTS voice fallback and add local tool/project configs
- ElevenLabsProvider: fallback to Samara voice id ('19STyYD15bswVz51nqLf') when no voiceId or defaultVoiceId is provided — avoids throwing an error on TTS calls. - ElevenLabsProvider: fallback to Samara voice id ('19STyYD15bswVz51nqLf') when no voiceId or defaultVoiceId is provided — avoids throwing an error on TTS calls.
@@ -9,18 +100,21 @@ Provide default ElevenLabs TTS voice fallback and add local tool/project configs
- Add .serena/project.yml and .serena/.gitignore to include Serena project configuration and ignore cache. - Add .serena/project.yml and .serena/.gitignore to include Serena project configuration and ignore cache.
## 2025-10-08 - 0.7.5 - fix(provider.elevenlabs) ## 2025-10-08 - 0.7.5 - fix(provider.elevenlabs)
Update ElevenLabs default TTS model to eleven_v3 and add local Claude permissions file Update ElevenLabs default TTS model to eleven_v3 and add local Claude permissions file
- Changed default ElevenLabs modelId from 'eleven_multilingual_v2' to 'eleven_v3' in ts/provider.elevenlabs.ts to use the newer/default TTS model. - Changed default ElevenLabs modelId from 'eleven_multilingual_v2' to 'eleven_v3' in ts/provider.elevenlabs.ts to use the newer/default TTS model.
- Added .claude/settings.local.json with a permissions allow-list for local Claude tooling and CI tasks. - Added .claude/settings.local.json with a permissions allow-list for local Claude tooling and CI tasks.
## 2025-10-03 - 0.7.4 - fix(provider.anthropic) ## 2025-10-03 - 0.7.4 - fix(provider.anthropic)
Use image/png for embedded PDF images in Anthropic provider and add local Claude settings for development permissions Use image/png for embedded PDF images in Anthropic provider and add local Claude settings for development permissions
- AnthropicProvider: change media_type from 'image/jpeg' to 'image/png' when embedding images extracted from PDFs to ensure correct format in Anthropic requests. - AnthropicProvider: change media_type from 'image/jpeg' to 'image/png' when embedding images extracted from PDFs to ensure correct format in Anthropic requests.
- Add .claude/settings.local.json with development/testing permissions for local Claude usage (shell commands, webfetch, websearch, test/run tasks). - Add .claude/settings.local.json with development/testing permissions for local Claude usage (shell commands, webfetch, websearch, test/run tasks).
## 2025-10-03 - 0.7.3 - fix(tests) ## 2025-10-03 - 0.7.3 - fix(tests)
Add extensive provider/feature tests and local Claude CI permissions Add extensive provider/feature tests and local Claude CI permissions
- Add many focused test files covering providers and features: OpenAI, Anthropic, Perplexity, Groq, Ollama, Exo, XAI (chat, audio, vision, document, research, image generation, stubs, interfaces, basic) - Add many focused test files covering providers and features: OpenAI, Anthropic, Perplexity, Groq, Ollama, Exo, XAI (chat, audio, vision, document, research, image generation, stubs, interfaces, basic)
@@ -29,12 +123,14 @@ Add extensive provider/feature tests and local Claude CI permissions
- No changes to library runtime code — this change adds tests and CI/local agent configuration only - No changes to library runtime code — this change adds tests and CI/local agent configuration only
## 2025-10-03 - 0.7.2 - fix(anthropic) ## 2025-10-03 - 0.7.2 - fix(anthropic)
Update Anthropic provider branding to Claude Sonnet 4.5 and add local Claude permissions Update Anthropic provider branding to Claude Sonnet 4.5 and add local Claude permissions
- Docs: Replace 'Claude 3 Opus' with 'Claude Sonnet 4.5' in README provider capabilities matrix. - Docs: Replace 'Claude 3 Opus' with 'Claude Sonnet 4.5' in README provider capabilities matrix.
- Config: Add .claude/settings.local.json to define local Claude permissions for tests and development commands. - Config: Add .claude/settings.local.json to define local Claude permissions for tests and development commands.
## 2025-10-03 - 0.7.1 - fix(docs) ## 2025-10-03 - 0.7.1 - fix(docs)
Add README image generation docs and .claude local settings Add README image generation docs and .claude local settings
- Add .claude/settings.local.json with permission allow-list for local assistant tooling and web search - Add .claude/settings.local.json with permission allow-list for local assistant tooling and web search
@@ -43,6 +139,7 @@ Add README image generation docs and .claude local settings
- Mark image generation support as implemented in the roadmap and remove duplicate entry - Mark image generation support as implemented in the roadmap and remove duplicate entry
## 2025-10-03 - 0.7.0 - feat(providers) ## 2025-10-03 - 0.7.0 - feat(providers)
Add research API and image generation/editing support; extend providers and tests Add research API and image generation/editing support; extend providers and tests
- Introduce ResearchOptions and ResearchResponse to the MultiModalModel interface and implement research() where supported - Introduce ResearchOptions and ResearchResponse to the MultiModalModel interface and implement research() where supported
@@ -56,14 +153,16 @@ Add research API and image generation/editing support; extend providers and test
- Add local Claude agent permissions file (.claude/settings.local.json) and various provider type/import updates - Add local Claude agent permissions file (.claude/settings.local.json) and various provider type/import updates
## 2025-09-28 - 0.6.1 - fix(provider.anthropic) ## 2025-09-28 - 0.6.1 - fix(provider.anthropic)
Fix Anthropic research tool identifier and add tests + local Claude permissions Fix Anthropic research tool identifier and add tests + local Claude permissions
- Replace Anthropic research tool type from 'computer_20241022' to 'web_search_20250305' to match the expected web-search tool schema. - Replace Anthropic research tool type from 'computer_20241022' to 'web_search_20250305' to match the expected web-search tool schema.
- Add comprehensive test suites and fixtures for providers and research features (new/updated tests under test/ including anthropic, openai, research.* and stubs). - Add comprehensive test suites and fixtures for providers and research features (new/updated tests under test/ including anthropic, openai, research.\* and stubs).
- Fix test usage of XAI provider class name (use XAIProvider) and adjust basic provider test expectations (provider instantiation moved to start()). - Fix test usage of XAI provider class name (use XAIProvider) and adjust basic provider test expectations (provider instantiation moved to start()).
- Add .claude/settings.local.json with local Claude permissions to allow common CI/dev commands and web search during testing. - Add .claude/settings.local.json with local Claude permissions to allow common CI/dev commands and web search during testing.
## 2025-09-28 - 0.6.0 - feat(research) ## 2025-09-28 - 0.6.0 - feat(research)
Introduce research API with provider implementations, docs and tests Introduce research API with provider implementations, docs and tests
- Add ResearchOptions and ResearchResponse interfaces and a new abstract research() method to MultiModalModel - Add ResearchOptions and ResearchResponse interfaces and a new abstract research() method to MultiModalModel
@@ -78,6 +177,7 @@ Introduce research API with provider implementations, docs and tests
- Add .claude/settings.local.json (local agent permissions for CI/dev tasks) - Add .claude/settings.local.json (local agent permissions for CI/dev tasks)
## 2025-08-12 - 0.5.11 - fix(openaiProvider) ## 2025-08-12 - 0.5.11 - fix(openaiProvider)
Update default chat model to gpt-5-mini and bump dependency versions Update default chat model to gpt-5-mini and bump dependency versions
- Changed default chat model in OpenAiProvider from 'o3-mini' and 'o4-mini' to 'gpt-5-mini' - Changed default chat model in OpenAiProvider from 'o3-mini' and 'o4-mini' to 'gpt-5-mini'
@@ -86,6 +186,7 @@ Update default chat model to gpt-5-mini and bump dependency versions
- Added new local Claude settings configuration (.claude/settings.local.json) - Added new local Claude settings configuration (.claude/settings.local.json)
## 2025-08-03 - 0.5.10 - fix(dependencies) ## 2025-08-03 - 0.5.10 - fix(dependencies)
Update SmartPdf to v4.1.1 for enhanced PDF processing capabilities Update SmartPdf to v4.1.1 for enhanced PDF processing capabilities
- Updated @push.rocks/smartpdf from ^3.3.0 to ^4.1.1 - Updated @push.rocks/smartpdf from ^3.3.0 to ^4.1.1
@@ -93,12 +194,14 @@ Update SmartPdf to v4.1.1 for enhanced PDF processing capabilities
- Dependency updates for better performance and compatibility - Dependency updates for better performance and compatibility
## 2025-08-01 - 0.5.9 - fix(documentation) ## 2025-08-01 - 0.5.9 - fix(documentation)
Remove contribution section from readme Remove contribution section from readme
- Removed the contribution section from readme.md as requested - Removed the contribution section from readme.md as requested
- Kept the roadmap section for future development plans - Kept the roadmap section for future development plans
## 2025-08-01 - 0.5.8 - fix(core) ## 2025-08-01 - 0.5.8 - fix(core)
Fix SmartPdf lifecycle management and update dependencies Fix SmartPdf lifecycle management and update dependencies
- Moved SmartPdf instance management to the MultiModalModel base class for better resource sharing - Moved SmartPdf instance management to the MultiModalModel base class for better resource sharing
@@ -108,12 +211,14 @@ Fix SmartPdf lifecycle management and update dependencies
- Enhanced readme with professional documentation and feature matrix - Enhanced readme with professional documentation and feature matrix
## 2025-07-26 - 0.5.7 - fix(provider.openai) ## 2025-07-26 - 0.5.7 - fix(provider.openai)
Fix stream type mismatch in audio method Fix stream type mismatch in audio method
- Fixed type error where OpenAI SDK returns a web ReadableStream but the audio method needs to return a Node.js ReadableStream - Fixed type error where OpenAI SDK returns a web ReadableStream but the audio method needs to return a Node.js ReadableStream
- Added conversion using Node.js's built-in Readable.fromWeb() method - Added conversion using Node.js's built-in Readable.fromWeb() method
## 2025-07-25 - 0.5.5 - feat(documentation) ## 2025-07-25 - 0.5.5 - feat(documentation)
Comprehensive documentation enhancement and test improvements Comprehensive documentation enhancement and test improvements
- Completely rewrote readme.md with detailed provider comparisons, advanced usage examples, and performance tips - Completely rewrote readme.md with detailed provider comparisons, advanced usage examples, and performance tips
@@ -122,6 +227,7 @@ Comprehensive documentation enhancement and test improvements
- Added verbose flag to test script for better debugging - Added verbose flag to test script for better debugging
## 2025-05-13 - 0.5.4 - fix(provider.openai) ## 2025-05-13 - 0.5.4 - fix(provider.openai)
Update dependency versions, clean test imports, and adjust default OpenAI model configurations Update dependency versions, clean test imports, and adjust default OpenAI model configurations
- Bump dependency versions in package.json (@git.zone/tsbuild, @push.rocks/tapbundle, openai, etc.) - Bump dependency versions in package.json (@git.zone/tsbuild, @push.rocks/tapbundle, openai, etc.)
@@ -129,17 +235,20 @@ Update dependency versions, clean test imports, and adjust default OpenAI model
- Remove unused 'expectAsync' import from test file - Remove unused 'expectAsync' import from test file
## 2025-04-03 - 0.5.3 - fix(package.json) ## 2025-04-03 - 0.5.3 - fix(package.json)
Add explicit packageManager field to package.json Add explicit packageManager field to package.json
- Include the packageManager property to specify the pnpm version and checksum. - Include the packageManager property to specify the pnpm version and checksum.
- Align package metadata with current standards. - Align package metadata with current standards.
## 2025-04-03 - 0.5.2 - fix(readme) ## 2025-04-03 - 0.5.2 - fix(readme)
Remove redundant conclusion section from README to streamline documentation. Remove redundant conclusion section from README to streamline documentation.
- Eliminated the conclusion block describing SmartAi's capabilities and documentation pointers. - Eliminated the conclusion block describing SmartAi's capabilities and documentation pointers.
## 2025-02-25 - 0.5.1 - fix(OpenAiProvider) ## 2025-02-25 - 0.5.1 - fix(OpenAiProvider)
Corrected audio model ID in OpenAiProvider Corrected audio model ID in OpenAiProvider
- Fixed audio model identifier from 'o3-mini' to 'tts-1-hd' in the OpenAiProvider's audio method. - Fixed audio model identifier from 'o3-mini' to 'tts-1-hd' in the OpenAiProvider's audio method.
@@ -147,6 +256,7 @@ Corrected audio model ID in OpenAiProvider
- Corrected spelling errors in test documentation and comments. - Corrected spelling errors in test documentation and comments.
## 2025-02-25 - 0.5.0 - feat(documentation and configuration) ## 2025-02-25 - 0.5.0 - feat(documentation and configuration)
Enhanced package and README documentation Enhanced package and README documentation
- Expanded the package description to better reflect the library's capabilities. - Expanded the package description to better reflect the library's capabilities.
@@ -154,6 +264,7 @@ Enhanced package and README documentation
- Provided error handling strategies and advanced streaming customization examples. - Provided error handling strategies and advanced streaming customization examples.
## 2025-02-25 - 0.4.2 - fix(core) ## 2025-02-25 - 0.4.2 - fix(core)
Fix OpenAI chat streaming and PDF document processing logic. Fix OpenAI chat streaming and PDF document processing logic.
- Updated OpenAI chat streaming to handle new async iterable format. - Updated OpenAI chat streaming to handle new async iterable format.
@@ -161,6 +272,7 @@ Fix OpenAI chat streaming and PDF document processing logic.
- Removed unsupported temperature options from OpenAI requests. - Removed unsupported temperature options from OpenAI requests.
## 2025-02-25 - 0.4.1 - fix(provider) ## 2025-02-25 - 0.4.1 - fix(provider)
Fix provider modules for consistency Fix provider modules for consistency
- Updated TypeScript interfaces and options in provider modules for better type safety. - Updated TypeScript interfaces and options in provider modules for better type safety.
@@ -168,6 +280,7 @@ Fix provider modules for consistency
- Added optional model options to OpenAI provider for custom model usage. - Added optional model options to OpenAI provider for custom model usage.
## 2025-02-08 - 0.4.0 - feat(core) ## 2025-02-08 - 0.4.0 - feat(core)
Added support for Exo AI provider Added support for Exo AI provider
- Introduced ExoProvider with chat functionalities. - Introduced ExoProvider with chat functionalities.
@@ -175,18 +288,21 @@ Added support for Exo AI provider
- Extended Conversation class to support ExoProvider. - Extended Conversation class to support ExoProvider.
## 2025-02-05 - 0.3.3 - fix(documentation) ## 2025-02-05 - 0.3.3 - fix(documentation)
Update readme with detailed license and legal information. Update readme with detailed license and legal information.
- Added explicit section on License and Legal Information in the README. - Added explicit section on License and Legal Information in the README.
- Clarified the use of trademarks and company information. - Clarified the use of trademarks and company information.
## 2025-02-05 - 0.3.2 - fix(documentation) ## 2025-02-05 - 0.3.2 - fix(documentation)
Remove redundant badges from readme Remove redundant badges from readme
- Removed Build Status badge from the readme file. - Removed Build Status badge from the readme file.
- Removed License badge from the readme file. - Removed License badge from the readme file.
## 2025-02-05 - 0.3.1 - fix(documentation) ## 2025-02-05 - 0.3.1 - fix(documentation)
Updated README structure and added detailed usage examples Updated README structure and added detailed usage examples
- Introduced a Table of Contents - Introduced a Table of Contents
@@ -195,6 +311,7 @@ Updated README structure and added detailed usage examples
- Clarified the development setup with instructions for running tests and building the project - Clarified the development setup with instructions for running tests and building the project
## 2025-02-05 - 0.3.0 - feat(integration-xai) ## 2025-02-05 - 0.3.0 - feat(integration-xai)
Add support for X.AI provider with chat and document processing capabilities. Add support for X.AI provider with chat and document processing capabilities.
- Introduced XAIProvider class for integrating X.AI features. - Introduced XAIProvider class for integrating X.AI features.
@@ -202,6 +319,7 @@ Add support for X.AI provider with chat and document processing capabilities.
- Enabled document processing capabilities with PDF conversion in X.AI. - Enabled document processing capabilities with PDF conversion in X.AI.
## 2025-02-03 - 0.2.0 - feat(provider.anthropic) ## 2025-02-03 - 0.2.0 - feat(provider.anthropic)
Add support for vision and document processing in Anthropic provider Add support for vision and document processing in Anthropic provider
- Implemented vision tasks for Anthropic provider using Claude-3-opus-20240229 model. - Implemented vision tasks for Anthropic provider using Claude-3-opus-20240229 model.
@@ -209,6 +327,7 @@ Add support for vision and document processing in Anthropic provider
- Updated documentation to reflect the new capabilities of the Anthropic provider. - Updated documentation to reflect the new capabilities of the Anthropic provider.
## 2025-02-03 - 0.1.0 - feat(providers) ## 2025-02-03 - 0.1.0 - feat(providers)
Add vision and document processing capabilities to providers Add vision and document processing capabilities to providers
- OpenAI and Ollama providers now support vision tasks using GPT-4 Vision and Llava models respectively. - OpenAI and Ollama providers now support vision tasks using GPT-4 Vision and Llava models respectively.
@@ -217,6 +336,7 @@ Add vision and document processing capabilities to providers
- Updated the readme file with examples for vision and document processing. - Updated the readme file with examples for vision and document processing.
## 2025-02-03 - 0.0.19 - fix(core) ## 2025-02-03 - 0.0.19 - fix(core)
Enhanced chat streaming and error handling across providers Enhanced chat streaming and error handling across providers
- Refactored chatStream method to properly handle input streams and processes in Perplexity, OpenAI, Ollama, and Anthropic providers. - Refactored chatStream method to properly handle input streams and processes in Perplexity, OpenAI, Ollama, and Anthropic providers.
@@ -225,6 +345,7 @@ Enhanced chat streaming and error handling across providers
- Adjusted the test logic in test/test.ts for the new classification response requirement. - Adjusted the test logic in test/test.ts for the new classification response requirement.
## 2024-09-19 - 0.0.18 - fix(dependencies) ## 2024-09-19 - 0.0.18 - fix(dependencies)
Update dependencies to the latest versions. Update dependencies to the latest versions.
- Updated @git.zone/tsbuild from ^2.1.76 to ^2.1.84 - Updated @git.zone/tsbuild from ^2.1.76 to ^2.1.84
@@ -238,46 +359,53 @@ Update dependencies to the latest versions.
- Updated openai from ^4.47.1 to ^4.62.1 - Updated openai from ^4.47.1 to ^4.62.1
## 2024-05-29 - 0.0.17 - Documentation ## 2024-05-29 - 0.0.17 - Documentation
Updated project description. Updated project description.
- Improved project description for clarity and details. - Improved project description for clarity and details.
## 2024-05-17 - 0.0.16 to 0.0.15 - Core ## 2024-05-17 - 0.0.16 to 0.0.15 - Core
Fixes and updates. Fixes and updates.
- Various core updates and fixes for stability improvements. - Various core updates and fixes for stability improvements.
## 2024-04-29 - 0.0.14 to 0.0.13 - Core ## 2024-04-29 - 0.0.14 to 0.0.13 - Core
Fixes and updates. Fixes and updates.
- Multiple core updates and fixes for enhanced functionality. - Multiple core updates and fixes for enhanced functionality.
## 2024-04-29 - 0.0.12 - Core ## 2024-04-29 - 0.0.12 - Core
Fixes and updates. Fixes and updates.
- Core update and bug fixes. - Core update and bug fixes.
## 2024-04-29 - 0.0.11 - Provider ## 2024-04-29 - 0.0.11 - Provider
Fix integration for anthropic provider. Fix integration for anthropic provider.
- Correction in the integration process with anthropic provider for better compatibility. - Correction in the integration process with anthropic provider for better compatibility.
## 2024-04-27 - 0.0.10 to 0.0.9 - Core ## 2024-04-27 - 0.0.10 to 0.0.9 - Core
Fixes and updates. Fixes and updates.
- Updates and fixes to core components. - Updates and fixes to core components.
- Updated tsconfig for improved TypeScript configuration. - Updated tsconfig for improved TypeScript configuration.
## 2024-04-01 - 0.0.8 to 0.0.7 - Core and npmextra ## 2024-04-01 - 0.0.8 to 0.0.7 - Core and npmextra
Core updates and npmextra configuration. Core updates and npmextra configuration.
- Core fixes and updates. - Core fixes and updates.
- Updates to npmextra.json for githost configuration. - Updates to npmextra.json for githost configuration.
## 2024-03-31 - 0.0.6 to 0.0.2 - Core ## 2024-03-31 - 0.0.6 to 0.0.2 - Core
Initial core updates and fixes. Initial core updates and fixes.
- Multiple updates and fixes to core following initial versions. - Multiple updates and fixes to core following initial versions.
This summarizes the relevant updates and changes based on the provided commit messages. The changelog excludes commits that are version tags without meaningful content or repeated entries.
This summarizes the relevant updates and changes based on the provided commit messages. The changelog excludes commits that are version tags without meaningful content or repeated entries.

View File

@@ -1,5 +1,5 @@
{ {
"gitzone": { "@git.zone/cli": {
"projectType": "npm", "projectType": "npm",
"module": { "module": {
"githost": "code.foss.global", "githost": "code.foss.global",
@@ -33,13 +33,19 @@
"AI toolkit", "AI toolkit",
"provider switching" "provider switching"
] ]
},
"release": {
"accessLevel": "public",
"registries": [
"https://verdaccio.lossless.digital",
"https://registry.npmjs.org"
]
} }
}, },
"npmci": { "@git.zone/tsdoc": {
"npmGlobalTools": [],
"npmAccessLevel": "public"
},
"tsdoc": {
"legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n" "legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n"
},
"@ship.zone/szci": {
"npmGlobalTools": []
} }
} }

View File

@@ -1,6 +1,6 @@
{ {
"name": "@push.rocks/smartai", "name": "@push.rocks/smartai",
"version": "0.7.6", "version": "0.13.2",
"private": false, "private": false,
"description": "SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.", "description": "SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",
@@ -11,29 +11,29 @@
"scripts": { "scripts": {
"test": "(tstest test/ --web --verbose)", "test": "(tstest test/ --web --verbose)",
"typecheck": "tsbuild check", "typecheck": "tsbuild check",
"build": "(tsbuild --web --allowimplicitany)", "build": "(tsbuild tsfolders --allowimplicitany)",
"buildDocs": "(tsdoc)" "buildDocs": "(tsdoc)"
}, },
"devDependencies": { "devDependencies": {
"@git.zone/tsbuild": "^2.6.8", "@git.zone/tsbuild": "^4.1.2",
"@git.zone/tsbundle": "^2.5.1", "@git.zone/tsbundle": "^2.8.1",
"@git.zone/tsrun": "^1.3.3", "@git.zone/tsrun": "^2.0.1",
"@git.zone/tstest": "^2.3.8", "@git.zone/tstest": "^3.1.6",
"@push.rocks/qenv": "^6.1.3", "@push.rocks/qenv": "^6.1.3",
"@push.rocks/tapbundle": "^6.0.3", "@types/node": "^25.0.9",
"@types/node": "^22.15.17",
"typescript": "^5.9.3" "typescript": "^5.9.3"
}, },
"dependencies": { "dependencies": {
"@anthropic-ai/sdk": "^0.65.0", "@anthropic-ai/sdk": "^0.71.2",
"@mistralai/mistralai": "^1.12.0",
"@push.rocks/smartarray": "^1.1.0", "@push.rocks/smartarray": "^1.1.0",
"@push.rocks/smartfile": "^11.2.7", "@push.rocks/smartfs": "^1.3.1",
"@push.rocks/smartpath": "^6.0.0", "@push.rocks/smartpath": "^6.0.0",
"@push.rocks/smartpdf": "^4.1.1", "@push.rocks/smartpdf": "^4.1.1",
"@push.rocks/smartpromise": "^4.2.3", "@push.rocks/smartpromise": "^4.2.3",
"@push.rocks/smartrequest": "^4.3.1", "@push.rocks/smartrequest": "^5.0.1",
"@push.rocks/webstream": "^1.0.10", "@push.rocks/webstream": "^1.0.10",
"openai": "^5.12.2" "openai": "^6.16.0"
}, },
"repository": { "repository": {
"type": "git", "type": "git",
@@ -86,7 +86,8 @@
"onlyBuiltDependencies": [ "onlyBuiltDependencies": [
"esbuild", "esbuild",
"puppeteer" "puppeteer"
] ],
"overrides": {}
}, },
"packageManager": "pnpm@10.7.0+sha512.6b865ad4b62a1d9842b61d674a393903b871d9244954f652b8842c2b553c72176b278f64c463e52d40fff8aba385c235c8c9ecf5cc7de4fd78b8bb6d49633ab6" "packageManager": "pnpm@10.7.0+sha512.6b865ad4b62a1d9842b61d674a393903b871d9244954f652b8842c2b553c72176b278f64c463e52d40fff8aba385c235c8c9ecf5cc7de4fd78b8bb6d49633ab6"
} }

5278
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1 +1,104 @@
# SmartAI Project Hints
## Dependencies
- Uses `@git.zone/tstest` v3.x for testing (import from `@git.zone/tstest/tapbundle`)
- `@push.rocks/smartfs` v1.x for file system operations
- `@anthropic-ai/sdk` v0.71.x with extended thinking support
- `@mistralai/mistralai` v1.x for Mistral OCR and chat capabilities
- `openai` v6.x for OpenAI API integration
- `@push.rocks/smartrequest` v5.x - uses `response.stream()` + `Readable.fromWeb()` for streaming
## Important Notes
- When extended thinking is enabled, temperature parameter must NOT be set (or set to 1)
- The `streamNode()` method was removed in smartrequest v5, use `response.stream()` with `Readable.fromWeb()` instead
## Provider Capabilities Summary
| Provider | Chat | Stream | TTS | Vision | Documents | Research | Images |
|--------------|------|--------|-----|--------|-----------|----------|--------|
| OpenAI | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Anthropic | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ |
| Mistral | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ |
| ElevenLabs | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
| Ollama | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ |
| XAI | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
| Perplexity | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
| Groq | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
| Exo | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
## Mistral Provider Integration
### Overview
The Mistral provider supports:
- **Document AI** via Mistral OCR (December 2025) - native PDF processing without image conversion
- **Chat capabilities** using Mistral's chat models (`mistral-large-latest`, etc.)
### Key Advantage: Native PDF Support
Unlike other providers that require converting PDFs to images (using SmartPdf), Mistral OCR natively accepts PDF documents as base64-encoded data. This makes document processing potentially faster and more accurate for text extraction.
### Configuration
```typescript
import * as smartai from '@push.rocks/smartai';
const provider = new smartai.MistralProvider({
mistralToken: 'your-token-here',
chatModel: 'mistral-large-latest', // default
ocrModel: 'mistral-ocr-latest', // default
tableFormat: 'markdown', // 'markdown' or 'html'
});
await provider.start();
```
### API Key
Tests require `MISTRAL_API_KEY` in `.nogit/env.json`.
## Anthropic Extended Thinking Feature
### Configuration
Extended thinking is configured at the provider level during instantiation:
```typescript
import * as smartai from '@push.rocks/smartai';
const provider = new smartai.AnthropicProvider({
anthropicToken: 'your-token-here',
extendedThinking: 'normal', // Options: 'quick' | 'normal' | 'deep' | 'off'
});
```
### Thinking Modes
| Mode | Budget Tokens | Use Case |
| ---------- | ------------- | ----------------------------------------------- |
| `'quick'` | 2,048 | Lightweight reasoning for simple queries |
| `'normal'` | 8,000 | **Default** - Balanced reasoning for most tasks |
| `'deep'` | 16,000 | Complex reasoning for difficult problems |
| `'off'` | 0 | Disable extended thinking |
### Implementation Details
- Extended thinking is implemented via `getThinkingConfig()` private method
- When thinking is enabled, temperature must NOT be set
- Uses `claude-sonnet-4-5-20250929` model
## Testing
Run tests with:
```bash
pnpm test
```
Run specific tests:
```bash
npx tstest test/test.something.ts --verbose
```

467
readme.md
View File

@@ -1,27 +1,36 @@
# @push.rocks/smartai # @push.rocks/smartai
**One API to rule them all** 🚀 **One API to rule them all** 🚀
[![npm version](https://img.shields.io/npm/v/@push.rocks/smartai.svg)](https://www.npmjs.com/package/@push.rocks/smartai) [![npm version](https://img.shields.io/npm/v/@push.rocks/smartai.svg)](https://www.npmjs.com/package/@push.rocks/smartai)
[![TypeScript](https://img.shields.io/badge/TypeScript-5.x-blue.svg)](https://www.typescriptlang.org/) [![TypeScript](https://img.shields.io/badge/TypeScript-5.x-blue.svg)](https://www.typescriptlang.org/)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
SmartAI unifies the world's leading AI providers - OpenAI, Anthropic, Perplexity, Ollama, Groq, XAI, Exo, and ElevenLabs - under a single, elegant TypeScript interface. Build AI applications at lightning speed without vendor lock-in. SmartAI unifies the world's leading AI providers OpenAI, Anthropic, Mistral, Perplexity, Ollama, Groq, XAI, Exo, and ElevenLabs under a single, elegant TypeScript interface. Build AI applications at lightning speed without vendor lock-in.
## Issue Reporting and Security
For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly.
## 🎯 Why SmartAI? ## 🎯 Why SmartAI?
- **🔌 Universal Interface**: Write once, run with any AI provider. Switch between GPT-4, Claude, Llama, or Grok with a single line change. - **🔌 Universal Interface**: Write once, run with any AI provider. Switch between GPT-5, Claude, Llama, or Grok with a single line change.
- **🛡️ Type-Safe**: Full TypeScript support with comprehensive type definitions for all operations - **🛡️ Type-Safe**: Full TypeScript support with comprehensive type definitions for all operations.
- **🌊 Streaming First**: Built for real-time applications with native streaming support - **🌊 Streaming First**: Built for real-time applications with native streaming support.
- **🎨 Multi-Modal**: Seamlessly work with text, images, audio, and documents - **🎨 Multi-Modal**: Seamlessly work with text, images, audio, and documents.
- **🏠 Local & Cloud**: Support for both cloud providers and local models via Ollama - **🏠 Local & Cloud**: Support for both cloud providers and local models via Ollama/Exo.
- **⚡ Zero Lock-In**: Your code remains portable across all AI providers - **⚡ Zero Lock-In**: Your code remains portable across all AI providers.
## 🚀 Quick Start ## 📦 Installation
```bash ```bash
npm install @push.rocks/smartai npm install @push.rocks/smartai
# or
pnpm install @push.rocks/smartai
``` ```
## 🚀 Quick Start
```typescript ```typescript
import { SmartAi } from '@push.rocks/smartai'; import { SmartAi } from '@push.rocks/smartai';
@@ -31,8 +40,8 @@ const ai = new SmartAi({
anthropicToken: 'sk-ant-...', anthropicToken: 'sk-ant-...',
elevenlabsToken: 'sk-...', elevenlabsToken: 'sk-...',
elevenlabs: { elevenlabs: {
defaultVoiceId: '19STyYD15bswVz51nqLf' // Optional: Samara voice defaultVoiceId: '19STyYD15bswVz51nqLf', // Optional: Samara voice
} },
}); });
await ai.start(); await ai.start();
@@ -41,24 +50,27 @@ await ai.start();
const response = await ai.openaiProvider.chat({ const response = await ai.openaiProvider.chat({
systemMessage: 'You are a helpful assistant.', systemMessage: 'You are a helpful assistant.',
userMessage: 'Explain quantum computing in simple terms', userMessage: 'Explain quantum computing in simple terms',
messageHistory: [] messageHistory: [],
}); });
console.log(response.message);
``` ```
## 📊 Provider Capabilities Matrix ## 📊 Provider Capabilities Matrix
Choose the right provider for your use case: Choose the right provider for your use case:
| Provider | Chat | Streaming | TTS | Vision | Documents | Research | Images | Highlights | | Provider | Chat | Streaming | TTS | Vision | Documents | Research | Images | Highlights |
|----------|:----:|:---------:|:---:|:------:|:---------:|:--------:|:------:|------------| | -------------- | :--: | :-------: | :-: | :----: | :-------: | :------: | :----: | --------------------------------------------------------------- |
| **OpenAI** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | • gpt-image-1<br>• DALL-E 3<br>• Deep research API | | **OpenAI** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | gpt-image-1 • DALL-E 3 • Deep Research API |
| **Anthropic** | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | • Claude Sonnet 4.5<br>• Superior reasoning<br>• Web search API | | **Anthropic** | | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | Claude Sonnet 4.5 • Extended Thinking • Web Search API |
| **ElevenLabs** | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | • Premium TTS<br>• 70+ languages<br>• Natural voices | | **Mistral** | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | Native PDF OCR • mistral-large • Fast inference |
| **Ollama** | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | • 100% local<br>• Privacy-first<br>• No API costs | | **ElevenLabs** | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | Premium TTS • 70+ languages • v3 model |
| **XAI** | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | • Grok models<br>• Real-time data<br>• Uncensored | | **Ollama** | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | 100% local • Privacy-first • No API costs |
| **Perplexity** | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | • Web-aware<br>• Research-focused<br>• Sonar Pro models | | **XAI** | | | ❌ | ❌ | ✅ | ❌ | ❌ | Grok 2 • Real-time data |
| **Groq** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | • 10x faster<br>• LPU inference<br>• Low latency | | **Perplexity** | ✅ | ✅ | ❌ | | ❌ | ✅ | ❌ | Web-aware • Research-focused • Sonar Pro |
| **Exo** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | • Distributed<br>• P2P compute<br>• Decentralized | | **Groq** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | 10x faster • LPU inference • Llama 3.3 |
| **Exo** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | Distributed • P2P compute • Decentralized |
## 🎮 Core Features ## 🎮 Core Features
@@ -67,25 +79,25 @@ Choose the right provider for your use case:
Works identically across all providers: Works identically across all providers:
```typescript ```typescript
// Use GPT-4 for complex reasoning // Use GPT-5 for complex reasoning
const gptResponse = await ai.openaiProvider.chat({ const gptResponse = await ai.openaiProvider.chat({
systemMessage: 'You are a expert physicist.', systemMessage: 'You are an expert physicist.',
userMessage: 'Explain the implications of quantum entanglement', userMessage: 'Explain the implications of quantum entanglement',
messageHistory: [] messageHistory: [],
}); });
// Use Claude for safety-critical applications // Use Claude for safety-critical applications
const claudeResponse = await ai.anthropicProvider.chat({ const claudeResponse = await ai.anthropicProvider.chat({
systemMessage: 'You are a medical advisor.', systemMessage: 'You are a medical advisor.',
userMessage: 'Review this patient data for concerns', userMessage: 'Review this patient data for concerns',
messageHistory: [] messageHistory: [],
}); });
// Use Groq for lightning-fast responses // Use Groq for lightning-fast responses
const groqResponse = await ai.groqProvider.chat({ const groqResponse = await ai.groqProvider.chat({
systemMessage: 'You are a code reviewer.', systemMessage: 'You are a code reviewer.',
userMessage: 'Quick! Find the bug in this code: ...', userMessage: 'Quick! Find the bug in this code: ...',
messageHistory: [] messageHistory: [],
}); });
``` ```
@@ -102,7 +114,7 @@ const reader = stream.getReader();
while (true) { while (true) {
const { done, value } = await reader.read(); const { done, value } = await reader.read();
if (done) break; if (done) break;
// Update UI in real-time // Update UI in real-time
process.stdout.write(value); process.stdout.write(value);
} }
@@ -115,26 +127,24 @@ Generate natural voices with OpenAI or ElevenLabs:
```typescript ```typescript
// OpenAI TTS // OpenAI TTS
const audioStream = await ai.openaiProvider.audio({ const audioStream = await ai.openaiProvider.audio({
message: 'Welcome to the future of AI development!' message: 'Welcome to the future of AI development!',
}); });
// ElevenLabs TTS - Premium quality, natural voices (uses v3 by default) // ElevenLabs TTS - Premium quality, natural voices (uses v3 by default)
const elevenLabsAudio = await ai.elevenlabsProvider.audio({ const elevenLabsAudio = await ai.elevenlabsProvider.audio({
message: 'Experience the most lifelike text to speech technology.', message: 'Experience the most lifelike text to speech technology.',
voiceId: '19STyYD15bswVz51nqLf', // Optional: Samara voice voiceId: '19STyYD15bswVz51nqLf', // Optional: Samara voice
modelId: 'eleven_v3', // Optional: defaults to eleven_v3 (70+ languages, most expressive) modelId: 'eleven_v3', // Optional: defaults to eleven_v3 (70+ languages)
voiceSettings: { // Optional: fine-tune voice characteristics voiceSettings: {
stability: 0.5, // 0-1: Speech consistency // Optional: fine-tune voice characteristics
stability: 0.5, // 0-1: Speech consistency
similarity_boost: 0.8, // 0-1: Voice similarity to original similarity_boost: 0.8, // 0-1: Voice similarity to original
style: 0.0, // 0-1: Expressiveness (higher = more expressive) style: 0.0, // 0-1: Expressiveness
use_speaker_boost: true // Enhanced clarity use_speaker_boost: true, // Enhanced clarity
} },
}); });
// Stream directly to speakers // Stream directly to speakers or save to file
audioStream.pipe(speakerOutput);
// Or save to file
audioStream.pipe(fs.createWriteStream('welcome.mp3')); audioStream.pipe(fs.createWriteStream('welcome.mp3'));
``` ```
@@ -148,19 +158,19 @@ const image = fs.readFileSync('product-photo.jpg');
// OpenAI: General purpose vision // OpenAI: General purpose vision
const gptVision = await ai.openaiProvider.vision({ const gptVision = await ai.openaiProvider.vision({
image, image,
prompt: 'Describe this product and suggest marketing angles' prompt: 'Describe this product and suggest marketing angles',
}); });
// Anthropic: Detailed analysis // Anthropic: Detailed analysis with extended thinking
const claudeVision = await ai.anthropicProvider.vision({ const claudeVision = await ai.anthropicProvider.vision({
image, image,
prompt: 'Identify any safety concerns or defects' prompt: 'Identify any safety concerns or defects',
}); });
// Ollama: Private, local analysis // Ollama: Private, local analysis
const ollamaVision = await ai.ollamaProvider.vision({ const ollamaVision = await ai.ollamaProvider.vision({
image, image,
prompt: 'Extract all text and categorize the content' prompt: 'Extract all text and categorize the content',
}); });
``` ```
@@ -172,21 +182,21 @@ Extract insights from PDFs with AI:
const contract = fs.readFileSync('contract.pdf'); const contract = fs.readFileSync('contract.pdf');
const invoice = fs.readFileSync('invoice.pdf'); const invoice = fs.readFileSync('invoice.pdf');
// Analyze documents // Analyze documents with OpenAI
const analysis = await ai.openaiProvider.document({ const analysis = await ai.openaiProvider.document({
systemMessage: 'You are a legal expert.', systemMessage: 'You are a legal expert.',
userMessage: 'Compare these documents and highlight key differences', userMessage: 'Compare these documents and highlight key differences',
messageHistory: [], messageHistory: [],
pdfDocuments: [contract, invoice] pdfDocuments: [contract, invoice],
}); });
// Multi-document analysis // Multi-document analysis with Anthropic
const taxDocs = [form1099, w2, receipts]; const taxDocs = [form1099, w2, receipts];
const taxAnalysis = await ai.anthropicProvider.document({ const taxAnalysis = await ai.anthropicProvider.document({
systemMessage: 'You are a tax advisor.', systemMessage: 'You are a tax advisor.',
userMessage: 'Prepare a tax summary from these documents', userMessage: 'Prepare a tax summary from these documents',
messageHistory: [], messageHistory: [],
pdfDocuments: taxDocs pdfDocuments: taxDocs,
}); });
``` ```
@@ -199,42 +209,118 @@ Perform deep research with web search capabilities across multiple providers:
const deepResearch = await ai.openaiProvider.research({ const deepResearch = await ai.openaiProvider.research({
query: 'What are the latest developments in quantum computing?', query: 'What are the latest developments in quantum computing?',
searchDepth: 'deep', searchDepth: 'deep',
includeWebSearch: true includeWebSearch: true,
}); });
console.log(deepResearch.answer); console.log(deepResearch.answer);
console.log('Sources:', deepResearch.sources); console.log('Sources:', deepResearch.sources);
// Anthropic Web Search - Domain-filtered research // Anthropic Web Search - Domain-filtered research
import { AnthropicProvider } from '@push.rocks/smartai';
const anthropic = new AnthropicProvider({ const anthropic = new AnthropicProvider({
anthropicToken: 'sk-ant-...', anthropicToken: 'sk-ant-...',
enableWebSearch: true, enableWebSearch: true,
searchDomainAllowList: ['nature.com', 'science.org'] searchDomainAllowList: ['nature.com', 'science.org'],
}); });
const scientificResearch = await anthropic.research({ const scientificResearch = await anthropic.research({
query: 'Latest breakthroughs in CRISPR gene editing', query: 'Latest breakthroughs in CRISPR gene editing',
searchDepth: 'advanced' searchDepth: 'advanced',
}); });
// Perplexity - Research-focused with citations // Perplexity - Research-focused with citations
const perplexityResearch = await ai.perplexityProvider.research({ const perplexityResearch = await ai.perplexityProvider.research({
query: 'Current state of autonomous vehicle technology', query: 'Current state of autonomous vehicle technology',
searchDepth: 'deep' // Uses Sonar Pro model searchDepth: 'deep', // Uses Sonar Pro model
}); });
``` ```
**Research Options:** **Research Options:**
- `searchDepth`: 'basic' | 'advanced' | 'deep'
- `searchDepth`: `'basic'` | `'advanced'` | `'deep'`
- `maxSources`: Number of sources to include - `maxSources`: Number of sources to include
- `includeWebSearch`: Enable web search (OpenAI) - `includeWebSearch`: Enable web search (OpenAI)
- `background`: Run as background task (OpenAI) - `background`: Run as background task (OpenAI)
**Supported Providers:** **Supported Providers:**
- **OpenAI**: Deep Research API with specialized models (`o3-deep-research-2025-06-26`, `o4-mini-deep-research-2025-06-26`)
- **OpenAI**: Deep Research API with specialized models (`o3-deep-research-*`, `o4-mini-deep-research-*`)
- **Anthropic**: Web Search API with domain filtering - **Anthropic**: Web Search API with domain filtering
- **Perplexity**: Sonar and Sonar Pro models with built-in citations - **Perplexity**: Sonar and Sonar Pro models with built-in citations
### 🧠 Extended Thinking (Anthropic)
Enable Claude to spend more time reasoning about complex problems before generating responses:
```typescript
import { AnthropicProvider } from '@push.rocks/smartai';
// Configure extended thinking mode at provider level
const anthropic = new AnthropicProvider({
anthropicToken: 'sk-ant-...',
extendedThinking: 'normal', // Options: 'quick' | 'normal' | 'deep' | 'off'
});
await anthropic.start();
// Extended thinking is automatically applied to all methods
const response = await anthropic.chat({
systemMessage: 'You are an expert mathematician.',
userMessage: 'Prove the Pythagorean theorem from first principles',
messageHistory: [],
});
```
**Thinking Modes:**
| Mode | Budget Tokens | Use Case |
| ---------- | ------------- | ------------------------------------------------ |
| `'quick'` | 2,048 | Lightweight reasoning for simple queries |
| `'normal'` | 8,000 | **Default** — Balanced reasoning for most tasks |
| `'deep'` | 16,000 | Complex reasoning for difficult problems |
| `'off'` | 0 | Disable extended thinking |
**Best Practices:**
- Start with `'normal'` (default) for general usage
- Use `'deep'` for complex analytical tasks, philosophy, mathematics, or research
- Use `'quick'` for simple factual queries where deep reasoning isn't needed
- Thinking budget counts against total token usage
### 📑 Native PDF OCR (Mistral)
Mistral provides native PDF document processing via their OCR API — no image conversion required:
```typescript
import { MistralProvider } from '@push.rocks/smartai';
const mistral = new MistralProvider({
mistralToken: 'your-api-key',
chatModel: 'mistral-large-latest', // Default
ocrModel: 'mistral-ocr-latest', // Default
tableFormat: 'markdown', // 'markdown' | 'html'
});
await mistral.start();
// Direct PDF processing - no image conversion overhead
const result = await mistral.document({
systemMessage: 'You are a document analyst.',
userMessage: 'Extract all invoice details and calculate the total.',
pdfDocuments: [invoicePdfBuffer],
messageHistory: [],
});
```
**Key Advantage**: Unlike other providers that convert PDFs to images first, Mistral's OCR API processes PDFs natively, potentially offering faster and more accurate text extraction for document-heavy workloads.
**Supported Formats:**
- Native PDF processing via Files API
- Image OCR (JPEG, PNG, GIF, WebP) for vision tasks
- Table extraction with markdown or HTML output
### 🎨 Image Generation & Editing ### 🎨 Image Generation & Editing
Generate and edit images with OpenAI's cutting-edge models: Generate and edit images with OpenAI's cutting-edge models:
@@ -245,7 +331,7 @@ const image = await ai.openaiProvider.imageGenerate({
prompt: 'A futuristic robot assistant in a modern office, digital art', prompt: 'A futuristic robot assistant in a modern office, digital art',
model: 'gpt-image-1', model: 'gpt-image-1',
quality: 'high', quality: 'high',
size: '1024x1024' size: '1024x1024',
}); });
// Save the generated image // Save the generated image
@@ -259,7 +345,7 @@ const logo = await ai.openaiProvider.imageGenerate({
quality: 'high', quality: 'high',
size: '1024x1024', size: '1024x1024',
background: 'transparent', background: 'transparent',
outputFormat: 'png' outputFormat: 'png',
}); });
// WebP with compression for web use // WebP with compression for web use
@@ -269,15 +355,16 @@ const webImage = await ai.openaiProvider.imageGenerate({
quality: 'high', quality: 'high',
size: '1536x1024', size: '1536x1024',
outputFormat: 'webp', outputFormat: 'webp',
outputCompression: 85 outputCompression: 85,
}); });
// Superior text rendering (gpt-image-1's strength) // Superior text rendering (gpt-image-1's strength)
const signage = await ai.openaiProvider.imageGenerate({ const signage = await ai.openaiProvider.imageGenerate({
prompt: 'Vintage cafe sign saying "COFFEE & CODE" in hand-lettered typography', prompt:
'Vintage cafe sign saying "COFFEE & CODE" in hand-lettered typography',
model: 'gpt-image-1', model: 'gpt-image-1',
quality: 'high', quality: 'high',
size: '1024x1024' size: '1024x1024',
}); });
// Generate multiple variations at once // Generate multiple variations at once
@@ -286,7 +373,7 @@ const variations = await ai.openaiProvider.imageGenerate({
model: 'gpt-image-1', model: 'gpt-image-1',
n: 3, n: 3,
quality: 'medium', quality: 'medium',
size: '1024x1024' size: '1024x1024',
}); });
// Edit an existing image // Edit an existing image
@@ -294,21 +381,23 @@ const editedImage = await ai.openaiProvider.imageEdit({
image: originalImageBuffer, image: originalImageBuffer,
prompt: 'Add sunglasses and change the background to a beach sunset', prompt: 'Add sunglasses and change the background to a beach sunset',
model: 'gpt-image-1', model: 'gpt-image-1',
quality: 'high' quality: 'high',
}); });
``` ```
**Image Generation Options:** **Image Generation Options:**
- `model`: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2'
- `quality`: 'low' | 'medium' | 'high' | 'auto' - `model`: `'gpt-image-1'` | `'dall-e-3'` | `'dall-e-2'`
- `quality`: `'low'` | `'medium'` | `'high'` | `'auto'`
- `size`: Multiple aspect ratios up to 4096×4096 - `size`: Multiple aspect ratios up to 4096×4096
- `background`: 'transparent' | 'opaque' | 'auto' - `background`: `'transparent'` | `'opaque'` | `'auto'`
- `outputFormat`: 'png' | 'jpeg' | 'webp' - `outputFormat`: `'png'` | `'jpeg'` | `'webp'`
- `outputCompression`: 0-100 for webp/jpeg - `outputCompression`: 0100 for webp/jpeg
- `moderation`: 'low' | 'auto' - `moderation`: `'low'` | `'auto'`
- `n`: Number of images (1-10) - `n`: Number of images (110)
**gpt-image-1 Advantages:** **gpt-image-1 Advantages:**
- Superior text rendering in images - Superior text rendering in images
- Up to 4096×4096 resolution - Up to 4096×4096 resolution
- Transparent background support - Transparent background support
@@ -341,22 +430,22 @@ await inputWriter.write('Now show me how to make it thread-safe');
```typescript ```typescript
const supportBot = new SmartAi({ const supportBot = new SmartAi({
anthropicToken: process.env.ANTHROPIC_KEY // Claude for empathetic responses anthropicToken: process.env.ANTHROPIC_KEY, // Claude for empathetic responses
}); });
async function handleCustomerQuery(query: string, history: ChatMessage[]) { async function handleCustomerQuery(query: string, history: ChatMessage[]) {
try { try {
const response = await supportBot.anthropicProvider.chat({ const response = await supportBot.anthropicProvider.chat({
systemMessage: `You are a helpful customer support agent. systemMessage: `You are a helpful customer support agent.
Be empathetic, professional, and solution-oriented.`, Be empathetic, professional, and solution-oriented.`,
userMessage: query, userMessage: query,
messageHistory: history messageHistory: history,
}); });
return response.message; return response.message;
} catch (error) { } catch (error) {
// Fallback to another provider if needed // Fallback to another provider if needed
return await supportBot.openaiProvider.chat({...}); return await supportBot.openaiProvider.chat({ /* ... */ });
} }
} }
``` ```
@@ -365,23 +454,20 @@ async function handleCustomerQuery(query: string, history: ChatMessage[]) {
```typescript ```typescript
const codeReviewer = new SmartAi({ const codeReviewer = new SmartAi({
groqToken: process.env.GROQ_KEY // Groq for speed groqToken: process.env.GROQ_KEY, // Groq for speed
}); });
async function reviewCode(code: string, language: string) { async function reviewCode(code: string, language: string) {
const startTime = Date.now();
const review = await codeReviewer.groqProvider.chat({ const review = await codeReviewer.groqProvider.chat({
systemMessage: `You are a ${language} expert. Review code for: systemMessage: `You are a ${language} expert. Review code for:
- Security vulnerabilities - Security vulnerabilities
- Performance issues - Performance issues
- Best practices - Best practices
- Potential bugs`, - Potential bugs`,
userMessage: `Review this code:\n\n${code}`, userMessage: `Review this code:\n\n${code}`,
messageHistory: [] messageHistory: [],
}); });
console.log(`Review completed in ${Date.now() - startTime}ms`);
return review.message; return review.message;
} }
``` ```
@@ -390,18 +476,20 @@ async function reviewCode(code: string, language: string) {
```typescript ```typescript
const researcher = new SmartAi({ const researcher = new SmartAi({
perplexityToken: process.env.PERPLEXITY_KEY perplexityToken: process.env.PERPLEXITY_KEY,
}); });
async function research(topic: string) { async function research(topic: string) {
// Perplexity excels at web-aware research // Perplexity excels at web-aware research
const findings = await researcher.perplexityProvider.chat({ const findings = await researcher.perplexityProvider.research({
systemMessage: 'You are a research assistant. Provide factual, cited information.', query: `Research the latest developments in ${topic}`,
userMessage: `Research the latest developments in ${topic}`, searchDepth: 'deep',
messageHistory: []
}); });
return findings.message; return {
answer: findings.answer,
sources: findings.sources,
};
} }
``` ```
@@ -412,8 +500,8 @@ const localAI = new SmartAi({
ollama: { ollama: {
baseUrl: 'http://localhost:11434', baseUrl: 'http://localhost:11434',
model: 'llama2', model: 'llama2',
visionModel: 'llava' visionModel: 'llava',
} },
}); });
// Process sensitive documents without leaving your infrastructure // Process sensitive documents without leaving your infrastructure
@@ -422,9 +510,9 @@ async function analyzeSensitiveDoc(pdfBuffer: Buffer) {
systemMessage: 'Extract and summarize key information.', systemMessage: 'Extract and summarize key information.',
userMessage: 'Analyze this confidential document', userMessage: 'Analyze this confidential document',
messageHistory: [], messageHistory: [],
pdfDocuments: [pdfBuffer] pdfDocuments: [pdfBuffer],
}); });
// Data never leaves your servers // Data never leaves your servers
return analysis.message; return analysis.message;
} }
@@ -437,24 +525,27 @@ async function analyzeSensitiveDoc(pdfBuffer: Buffer) {
```typescript ```typescript
class SmartAIRouter { class SmartAIRouter {
constructor(private ai: SmartAi) {} constructor(private ai: SmartAi) {}
async query(message: string, requirements: { async query(
speed?: boolean; message: string,
accuracy?: boolean; requirements: {
cost?: boolean; speed?: boolean;
privacy?: boolean; accuracy?: boolean;
}) { cost?: boolean;
privacy?: boolean;
}
) {
if (requirements.privacy) { if (requirements.privacy) {
return this.ai.ollamaProvider.chat({...}); // Local only return this.ai.ollamaProvider.chat({ /* ... */ }); // Local only
} }
if (requirements.speed) { if (requirements.speed) {
return this.ai.groqProvider.chat({...}); // 10x faster return this.ai.groqProvider.chat({ /* ... */ }); // 10x faster
} }
if (requirements.accuracy) { if (requirements.accuracy) {
return this.ai.anthropicProvider.chat({...}); // Best reasoning return this.ai.anthropicProvider.chat({ /* ... */ }); // Best reasoning
} }
// Default fallback // Default fallback
return this.ai.openaiProvider.chat({...}); return this.ai.openaiProvider.chat({ /* ... */ });
} }
} }
``` ```
@@ -464,8 +555,10 @@ class SmartAIRouter {
```typescript ```typescript
// Don't wait for the entire response // Don't wait for the entire response
async function streamResponse(userQuery: string) { async function streamResponse(userQuery: string) {
const stream = await ai.openaiProvider.chatStream(createInputStream(userQuery)); const stream = await ai.openaiProvider.chatStream(
createInputStream(userQuery)
);
// Process tokens as they arrive // Process tokens as they arrive
for await (const chunk of stream) { for await (const chunk of stream) {
updateUI(chunk); // Immediate feedback updateUI(chunk); // Immediate feedback
@@ -480,31 +573,71 @@ async function streamResponse(userQuery: string) {
// Get the best answer from multiple AIs // Get the best answer from multiple AIs
async function consensusQuery(question: string) { async function consensusQuery(question: string) {
const providers = [ const providers = [
ai.openaiProvider.chat({...}), ai.openaiProvider.chat({ /* ... */ }),
ai.anthropicProvider.chat({...}), ai.anthropicProvider.chat({ /* ... */ }),
ai.perplexityProvider.chat({...}) ai.perplexityProvider.chat({ /* ... */ }),
]; ];
const responses = await Promise.all(providers); const responses = await Promise.all(providers);
return synthesizeResponses(responses); return synthesizeResponses(responses);
} }
``` ```
## 🛠️ Advanced Features ## 🛠️ Advanced Configuration
### Custom Streaming Transformations ### Provider-Specific Options
```typescript ```typescript
// Add real-time translation const ai = new SmartAi({
const translationStream = new TransformStream({ // OpenAI
async transform(chunk, controller) { openaiToken: 'sk-...',
const translated = await translateChunk(chunk);
controller.enqueue(translated);
}
});
const responseStream = await ai.openaiProvider.chatStream(input); // Anthropic with extended thinking
const translatedStream = responseStream.pipeThrough(translationStream); anthropicToken: 'sk-ant-...',
// Perplexity for research
perplexityToken: 'pplx-...',
// Groq for speed
groqToken: 'gsk_...',
// Mistral with OCR settings
mistralToken: 'your-key',
mistral: {
chatModel: 'mistral-large-latest',
ocrModel: 'mistral-ocr-latest',
tableFormat: 'markdown',
},
// XAI (Grok)
xaiToken: 'xai-...',
// ElevenLabs TTS
elevenlabsToken: 'sk-...',
elevenlabs: {
defaultVoiceId: '19STyYD15bswVz51nqLf',
defaultModelId: 'eleven_v3',
},
// Ollama (local)
ollama: {
baseUrl: 'http://localhost:11434',
model: 'llama2',
visionModel: 'llava',
defaultOptions: {
num_ctx: 4096,
temperature: 0.7,
top_p: 0.9,
},
defaultTimeout: 120000,
},
// Exo (distributed)
exo: {
baseUrl: 'http://localhost:8080/v1',
apiKey: 'optional-key',
},
});
``` ```
### Error Handling & Fallbacks ### Error Handling & Fallbacks
@@ -512,7 +645,7 @@ const translatedStream = responseStream.pipeThrough(translationStream);
```typescript ```typescript
class ResilientAI { class ResilientAI {
private providers = ['openai', 'anthropic', 'groq']; private providers = ['openai', 'anthropic', 'groq'];
async query(opts: ChatOptions): Promise<ChatResponse> { async query(opts: ChatOptions): Promise<ChatResponse> {
for (const provider of this.providers) { for (const provider of this.providers) {
try { try {
@@ -527,83 +660,27 @@ class ResilientAI {
} }
``` ```
### Token Counting & Cost Management
```typescript
// Track usage across providers
class UsageTracker {
async trackedChat(provider: string, options: ChatOptions) {
const start = Date.now();
const response = await ai[`${provider}Provider`].chat(options);
const usage = {
provider,
duration: Date.now() - start,
inputTokens: estimateTokens(options),
outputTokens: estimateTokens(response.message)
};
await this.logUsage(usage);
return response;
}
}
```
## 📦 Installation & Setup
### Prerequisites
- Node.js 16+
- TypeScript 4.5+
- API keys for your chosen providers
### Environment Setup
```bash
# Install
npm install @push.rocks/smartai
# Set up environment variables
export OPENAI_API_KEY=sk-...
export ANTHROPIC_API_KEY=sk-ant-...
export PERPLEXITY_API_KEY=pplx-...
export ELEVENLABS_API_KEY=sk-...
# ... etc
```
### TypeScript Configuration
```json
{
"compilerOptions": {
"target": "ES2022",
"module": "NodeNext",
"lib": ["ES2022"],
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true
}
}
```
## 🎯 Choosing the Right Provider ## 🎯 Choosing the Right Provider
| Use Case | Recommended Provider | Why | | Use Case | Recommended Provider | Why |
|----------|---------------------|-----| | --------------------- | -------------------- | --------------------------------------------------------- |
| **General Purpose** | OpenAI | Most features, stable, well-documented | | **General Purpose** | OpenAI | Most features, stable, well-documented |
| **Complex Reasoning** | Anthropic | Superior logical thinking, safer outputs | | **Complex Reasoning** | Anthropic | Superior logical thinking, extended thinking, safer |
| **Research & Facts** | Perplexity | Web-aware, provides citations | | **Document OCR** | Mistral | Native PDF processing, no image conversion overhead |
| **Deep Research** | OpenAI | Deep Research API with comprehensive analysis | | **Research & Facts** | Perplexity | Web-aware, provides citations |
| **Premium TTS** | ElevenLabs | Most natural voices, 70+ languages, superior quality (v3) | | **Deep Research** | OpenAI | Deep Research API with comprehensive analysis |
| **Speed Critical** | Groq | 10x faster inference, sub-second responses | | **Premium TTS** | ElevenLabs | Most natural voices, 70+ languages, v3 model |
| **Privacy Critical** | Ollama | 100% local, no data leaves your servers | | **Speed Critical** | Groq | 10x faster inference, sub-second responses |
| **Real-time Data** | XAI | Access to current information | | **Privacy Critical** | Ollama | 100% local, no data leaves your servers |
| **Cost Sensitive** | Ollama/Exo | Free (local) or distributed compute | | **Real-time Data** | XAI | Grok with access to current information |
| **Cost Sensitive** | Ollama/Exo | Free (local) or distributed compute |
## 📈 Roadmap ## 📈 Roadmap
- [x] Research & Web Search API - [x] Research & Web Search API
- [x] Image generation support (gpt-image-1, DALL-E 3, DALL-E 2) - [x] Image generation support (gpt-image-1, DALL-E 3, DALL-E 2)
- [x] Extended thinking (Anthropic)
- [x] Native PDF OCR (Mistral)
- [ ] Streaming function calls - [ ] Streaming function calls
- [ ] Voice input processing - [ ] Voice input processing
- [ ] Fine-tuning integration - [ ] Fine-tuning integration
@@ -613,19 +690,21 @@ export ELEVENLABS_API_KEY=sk-...
## License and Legal Information ## License and Legal Information
This repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [LICENSE](./LICENSE) file.
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file. **Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
### Trademarks ### Trademarks
This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH. This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH or third parties, and are not included within the scope of the MIT license granted herein.
Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines or the guidelines of the respective third-party owners, and any usage must be approved in writing. Third-party trademarks used herein are the property of their respective owners and used only in a descriptive manner, e.g. for an implementation of an API or similar.
### Company Information ### Company Information
Task Venture Capital GmbH Task Venture Capital GmbH
Registered at District court Bremen HRB 35230 HB, Germany Registered at District Court Bremen HRB 35230 HB, Germany
For any legal inquiries or if you require further information, please contact us via email at hello@task.vc. For any legal inquiries or further information, please contact us via email at hello@task.vc.
By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works. By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.

View File

@@ -1,8 +1,9 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
import * as smartfile from '@push.rocks/smartfile'; import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
const testQenv = new qenv.Qenv('./', './.nogit/'); const testQenv = new qenv.Qenv('./', './.nogit/');
const smartfs = new SmartFs(new SmartFsProviderNode());
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
@@ -27,7 +28,7 @@ tap.test('ElevenLabs Audio: should create audio response', async () => {
chunks.push(chunk as Uint8Array); chunks.push(chunk as Uint8Array);
} }
const audioBuffer = Buffer.concat(chunks); const audioBuffer = Buffer.concat(chunks);
await smartfile.fs.toFs(audioBuffer, './.nogit/testoutput_elevenlabs.mp3'); await smartfs.file('./.nogit/testoutput_elevenlabs.mp3').write(audioBuffer);
console.log(`Audio Buffer length: ${audioBuffer.length}`); console.log(`Audio Buffer length: ${audioBuffer.length}`);
expect(audioBuffer.length).toBeGreaterThan(0); expect(audioBuffer.length).toBeGreaterThan(0);
}); });
@@ -42,7 +43,7 @@ tap.test('ElevenLabs Audio: should create audio with custom voice', async () =>
chunks.push(chunk as Uint8Array); chunks.push(chunk as Uint8Array);
} }
const audioBuffer = Buffer.concat(chunks); const audioBuffer = Buffer.concat(chunks);
await smartfile.fs.toFs(audioBuffer, './.nogit/testoutput_elevenlabs_custom.mp3'); await smartfs.file('./.nogit/testoutput_elevenlabs_custom.mp3').write(audioBuffer);
console.log(`Audio Buffer length (custom voice): ${audioBuffer.length}`); console.log(`Audio Buffer length (custom voice): ${audioBuffer.length}`);
expect(audioBuffer.length).toBeGreaterThan(0); expect(audioBuffer.length).toBeGreaterThan(0);
}); });

View File

@@ -1,8 +1,9 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
import * as smartfile from '@push.rocks/smartfile'; import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
const testQenv = new qenv.Qenv('./', './.nogit/'); const testQenv = new qenv.Qenv('./', './.nogit/');
const smartfs = new SmartFs(new SmartFsProviderNode());
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
@@ -26,7 +27,7 @@ tap.test('OpenAI Audio: should create audio response', async () => {
chunks.push(chunk as Uint8Array); chunks.push(chunk as Uint8Array);
} }
const audioBuffer = Buffer.concat(chunks); const audioBuffer = Buffer.concat(chunks);
await smartfile.fs.toFs(audioBuffer, './.nogit/testoutput.mp3'); await smartfs.file('./.nogit/testoutput.mp3').write(audioBuffer);
console.log(`Audio Buffer length: ${audioBuffer.length}`); console.log(`Audio Buffer length: ${audioBuffer.length}`);
// Assert that the resulting buffer is not empty. // Assert that the resulting buffer is not empty.
expect(audioBuffer.length).toBeGreaterThan(0); expect(audioBuffer.length).toBeGreaterThan(0);

View File

@@ -1,4 +1,4 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
const testQenv = new qenv.Qenv('./', './.nogit/'); const testQenv = new qenv.Qenv('./', './.nogit/');

View File

@@ -1,4 +1,4 @@
import { tap, expect } from '@push.rocks/tapbundle'; import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
// Basic instantiation tests that don't require API tokens // Basic instantiation tests that don't require API tokens

View File

@@ -1,4 +1,4 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
const testQenv = new qenv.Qenv('./', './.nogit/'); const testQenv = new qenv.Qenv('./', './.nogit/');

66
test/test.chat.mistral.ts Normal file
View File

@@ -0,0 +1,66 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv';
const testQenv = new qenv.Qenv('./', './.nogit/');
import * as smartai from '../ts/index.js';
let mistralProvider: smartai.MistralProvider;
tap.test('Mistral Chat: should create and start Mistral provider', async () => {
mistralProvider = new smartai.MistralProvider({
mistralToken: await testQenv.getEnvVarOnDemand('MISTRAL_API_KEY'),
});
await mistralProvider.start();
expect(mistralProvider).toBeInstanceOf(smartai.MistralProvider);
});
tap.test('Mistral Chat: should create chat response', async () => {
const userMessage = 'What is the capital of France? Answer in one word.';
const response = await mistralProvider.chat({
systemMessage: 'You are a helpful assistant. Be concise.',
userMessage: userMessage,
messageHistory: [],
});
console.log(`Mistral Chat - User: ${userMessage}`);
console.log(`Mistral Chat - Response: ${response.message}`);
expect(response.role).toEqual('assistant');
expect(response.message).toBeTruthy();
expect(response.message.toLowerCase()).toInclude('paris');
});
tap.test('Mistral Chat: should handle message history', async () => {
const messageHistory: smartai.ChatMessage[] = [
{ role: 'user', content: 'My name is Claude Test' },
{ role: 'assistant', content: 'Nice to meet you, Claude Test!' }
];
const response = await mistralProvider.chat({
systemMessage: 'You are a helpful assistant with good memory.',
userMessage: 'What is my name?',
messageHistory: messageHistory,
});
console.log(`Mistral Memory Test - Response: ${response.message}`);
expect(response.message.toLowerCase()).toInclude('claude test');
});
tap.test('Mistral Chat: should handle longer conversations', async () => {
const response = await mistralProvider.chat({
systemMessage: 'You are a helpful coding assistant.',
userMessage: 'Write a simple hello world function in TypeScript. Keep it brief.',
messageHistory: [],
});
console.log(`Mistral Coding Test - Response: ${response.message}`);
expect(response.message).toBeTruthy();
// Should contain some TypeScript/function code
expect(response.message).toInclude('function');
});
tap.test('Mistral Chat: should stop the provider', async () => {
await mistralProvider.stop();
});
export default tap.start();

View File

@@ -1,4 +1,4 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
const testQenv = new qenv.Qenv('./', './.nogit/'); const testQenv = new qenv.Qenv('./', './.nogit/');

View File

@@ -1,9 +1,10 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
import * as smartrequest from '@push.rocks/smartrequest'; import * as smartrequest from '@push.rocks/smartrequest';
import * as smartfile from '@push.rocks/smartfile'; import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
const testQenv = new qenv.Qenv('./', './.nogit/'); const testQenv = new qenv.Qenv('./', './.nogit/');
const smartfs = new SmartFs(new SmartFsProviderNode());
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
@@ -41,7 +42,7 @@ tap.test('Anthropic Document: should handle complex document analysis', async ()
let pdfBuffer: Uint8Array; let pdfBuffer: Uint8Array;
try { try {
pdfBuffer = await smartfile.fs.toBuffer(pdfPath); pdfBuffer = await smartfs.file(pdfPath).read();
} catch (error) { } catch (error) {
// If the file doesn't exist, use the dummy PDF // If the file doesn't exist, use the dummy PDF
console.log('Demo PDF not found, using dummy PDF instead'); console.log('Demo PDF not found, using dummy PDF instead');

View File

@@ -0,0 +1,100 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv';
import * as smartrequest from '@push.rocks/smartrequest';
import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
const testQenv = new qenv.Qenv('./', './.nogit/');
const smartfs = new SmartFs(new SmartFsProviderNode());
import * as smartai from '../ts/index.js';
let mistralProvider: smartai.MistralProvider;
tap.test('Mistral Document: should create and start Mistral provider', async () => {
mistralProvider = new smartai.MistralProvider({
mistralToken: await testQenv.getEnvVarOnDemand('MISTRAL_API_KEY'),
tableFormat: 'markdown',
});
await mistralProvider.start();
expect(mistralProvider).toBeInstanceOf(smartai.MistralProvider);
});
tap.test('Mistral Document: should process a PDF document', async () => {
const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf';
const pdfResponse = await smartrequest.SmartRequest.create()
.url(pdfUrl)
.get();
const result = await mistralProvider.document({
systemMessage: 'Classify the document. Only the following answers are allowed: "invoice", "bank account statement", "contract", "test document", "other". The answer should only contain the keyword for machine use.',
userMessage: 'Classify this document.',
messageHistory: [],
pdfDocuments: [Buffer.from(await pdfResponse.arrayBuffer())],
});
console.log(`Mistral Document - Result:`, result);
expect(result).toBeTruthy();
expect(result.message).toBeTruthy();
});
tap.test('Mistral Document: should handle complex document analysis', async () => {
// Test with the demo PDF if it exists
const pdfPath = './.nogit/demo_without_textlayer.pdf';
let pdfBuffer: Uint8Array;
try {
pdfBuffer = await smartfs.file(pdfPath).read();
} catch (error) {
// If the file doesn't exist, use the dummy PDF
console.log('Demo PDF not found, using dummy PDF instead');
const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf';
const pdfResponse = await smartrequest.SmartRequest.create()
.url(pdfUrl)
.get();
pdfBuffer = Buffer.from(await pdfResponse.arrayBuffer());
}
const result = await mistralProvider.document({
systemMessage: `
Analyze this document and provide a JSON response with the following structure:
{
"documentType": "string",
"hasText": boolean,
"summary": "string"
}
`,
userMessage: 'Analyze this document.',
messageHistory: [],
pdfDocuments: [pdfBuffer],
});
console.log(`Mistral Complex Document Analysis:`, result);
expect(result).toBeTruthy();
expect(result.message).toBeTruthy();
});
tap.test('Mistral Document: should process multiple PDF documents', async () => {
const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf';
const pdfResponse = await smartrequest.SmartRequest.create()
.url(pdfUrl)
.get();
const pdfBuffer = Buffer.from(await pdfResponse.arrayBuffer());
const result = await mistralProvider.document({
systemMessage: 'You are a document comparison assistant.',
userMessage: 'Are these two documents the same? Answer yes or no.',
messageHistory: [],
pdfDocuments: [pdfBuffer, pdfBuffer], // Same document twice for test
});
console.log(`Mistral Multi-Document - Result:`, result);
expect(result).toBeTruthy();
expect(result.message).toBeTruthy();
});
tap.test('Mistral Document: should stop the provider', async () => {
await mistralProvider.stop();
});
export default tap.start();

View File

@@ -1,9 +1,10 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
import * as smartrequest from '@push.rocks/smartrequest'; import * as smartrequest from '@push.rocks/smartrequest';
import * as smartfile from '@push.rocks/smartfile'; import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
const testQenv = new qenv.Qenv('./', './.nogit/'); const testQenv = new qenv.Qenv('./', './.nogit/');
const smartfs = new SmartFs(new SmartFsProviderNode());
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
@@ -32,7 +33,7 @@ tap.test('OpenAI Document: should document a pdf', async () => {
}); });
tap.test('OpenAI Document: should recognize companies in a pdf', async () => { tap.test('OpenAI Document: should recognize companies in a pdf', async () => {
const pdfBuffer = await smartfile.fs.toBuffer('./.nogit/demo_without_textlayer.pdf'); const pdfBuffer = await smartfs.file('./.nogit/demo_without_textlayer.pdf').read();
const result = await testSmartai.openaiProvider.document({ const result = await testSmartai.openaiProvider.document({
systemMessage: ` systemMessage: `
summarize the document. summarize the document.

View File

@@ -1,4 +1,4 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
import * as path from 'path'; import * as path from 'path';

View File

@@ -1,4 +1,4 @@
import { tap, expect } from '@push.rocks/tapbundle'; import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
// Test interface exports and type checking // Test interface exports and type checking

View File

@@ -1,4 +1,4 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
import * as path from 'path'; import * as path from 'path';

View File

@@ -1,4 +1,4 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
import * as path from 'path'; import * as path from 'path';

View File

@@ -1,4 +1,4 @@
import { tap, expect } from '@push.rocks/tapbundle'; import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
// Test research method stubs for providers without full implementation // Test research method stubs for providers without full implementation

View File

@@ -0,0 +1,151 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv';
const testQenv = new qenv.Qenv('./', './.nogit/');
import * as smartai from '../ts/index.js';
let anthropicProviderQuick: smartai.AnthropicProvider;
let anthropicProviderNormal: smartai.AnthropicProvider;
let anthropicProviderDeep: smartai.AnthropicProvider;
let anthropicProviderOff: smartai.AnthropicProvider;
// Test 'quick' mode
tap.test('Extended Thinking: should create Anthropic provider with quick mode', async () => {
anthropicProviderQuick = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
extendedThinking: 'quick',
});
await anthropicProviderQuick.start();
expect(anthropicProviderQuick).toBeInstanceOf(smartai.AnthropicProvider);
});
tap.test('Extended Thinking: should chat with quick mode (2048 tokens)', async () => {
const userMessage = 'Explain quantum entanglement in simple terms.';
const response = await anthropicProviderQuick.chat({
systemMessage: 'You are a helpful physics teacher.',
userMessage: userMessage,
messageHistory: [],
});
console.log(`Quick Mode - User: ${userMessage}`);
console.log(`Quick Mode - Response length: ${response.message.length} chars`);
expect(response.role).toEqual('assistant');
expect(response.message).toBeTruthy();
expect(response.message.toLowerCase()).toInclude('quantum');
});
tap.test('Extended Thinking: should stop quick mode provider', async () => {
await anthropicProviderQuick.stop();
});
// Test 'normal' mode (default)
tap.test('Extended Thinking: should create Anthropic provider with normal mode (default)', async () => {
anthropicProviderNormal = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
// extendedThinking not specified, should default to 'normal'
});
await anthropicProviderNormal.start();
expect(anthropicProviderNormal).toBeInstanceOf(smartai.AnthropicProvider);
});
tap.test('Extended Thinking: should chat with normal mode (8000 tokens default)', async () => {
const userMessage = 'What are the implications of the P vs NP problem?';
const response = await anthropicProviderNormal.chat({
systemMessage: 'You are a helpful computer science expert.',
userMessage: userMessage,
messageHistory: [],
});
console.log(`Normal Mode - User: ${userMessage}`);
console.log(`Normal Mode - Response length: ${response.message.length} chars`);
expect(response.role).toEqual('assistant');
expect(response.message).toBeTruthy();
expect(response.message.length).toBeGreaterThan(50);
});
tap.test('Extended Thinking: should stop normal mode provider', async () => {
await anthropicProviderNormal.stop();
});
// Test 'deep' mode
tap.test('Extended Thinking: should create Anthropic provider with deep mode', async () => {
anthropicProviderDeep = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
extendedThinking: 'deep',
});
await anthropicProviderDeep.start();
expect(anthropicProviderDeep).toBeInstanceOf(smartai.AnthropicProvider);
});
tap.test('Extended Thinking: should chat with deep mode (16000 tokens)', async () => {
const userMessage = 'Analyze the philosophical implications of artificial consciousness.';
const response = await anthropicProviderDeep.chat({
systemMessage: 'You are a philosopher and cognitive scientist.',
userMessage: userMessage,
messageHistory: [],
});
console.log(`Deep Mode - User: ${userMessage}`);
console.log(`Deep Mode - Response length: ${response.message.length} chars`);
expect(response.role).toEqual('assistant');
expect(response.message).toBeTruthy();
expect(response.message.length).toBeGreaterThan(100);
});
tap.test('Extended Thinking: should stop deep mode provider', async () => {
await anthropicProviderDeep.stop();
});
// Test 'off' mode
tap.test('Extended Thinking: should create Anthropic provider with thinking disabled', async () => {
anthropicProviderOff = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
extendedThinking: 'off',
});
await anthropicProviderOff.start();
expect(anthropicProviderOff).toBeInstanceOf(smartai.AnthropicProvider);
});
tap.test('Extended Thinking: should chat with thinking disabled', async () => {
const userMessage = 'What is 2 + 2?';
const response = await anthropicProviderOff.chat({
systemMessage: 'You are a helpful assistant.',
userMessage: userMessage,
messageHistory: [],
});
console.log(`Thinking Off - User: ${userMessage}`);
console.log(`Thinking Off - Response: ${response.message}`);
expect(response.role).toEqual('assistant');
expect(response.message).toBeTruthy();
expect(response.message).toInclude('4');
});
tap.test('Extended Thinking: should stop off mode provider', async () => {
await anthropicProviderOff.stop();
});
// Test with vision method
tap.test('Extended Thinking: should work with vision method', async () => {
const provider = new smartai.AnthropicProvider({
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
extendedThinking: 'normal',
});
await provider.start();
// Create a simple test image (1x1 red pixel PNG)
const redPixelPng = Buffer.from(
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==',
'base64'
);
const response = await provider.vision({
image: redPixelPng,
prompt: 'What color is this image?',
});
console.log(`Vision with Thinking - Response: ${response}`);
expect(response).toBeTruthy();
expect(response.toLowerCase()).toInclude('red');
await provider.stop();
});
export default tap.start();

View File

@@ -1,8 +1,9 @@
import { expect, tap } from '@push.rocks/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
import * as smartfile from '@push.rocks/smartfile'; import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
const testQenv = new qenv.Qenv('./', './.nogit/'); const testQenv = new qenv.Qenv('./', './.nogit/');
const smartfs = new SmartFs(new SmartFsProviderNode());
import * as smartai from '../ts/index.js'; import * as smartai from '../ts/index.js';
@@ -21,7 +22,7 @@ tap.test('Anthropic Vision: should analyze coffee image with latte art', async (
const imagePath = './test/testimages/coffee-dani/coffee.jpg'; const imagePath = './test/testimages/coffee-dani/coffee.jpg';
console.log(`Loading coffee image from: ${imagePath}`); console.log(`Loading coffee image from: ${imagePath}`);
const imageBuffer = await smartfile.fs.toBuffer(imagePath); const imageBuffer = await smartfs.file(imagePath).read();
console.log(`Image loaded, size: ${imageBuffer.length} bytes`); console.log(`Image loaded, size: ${imageBuffer.length} bytes`);
const result = await anthropicProvider.vision({ const result = await anthropicProvider.vision({
@@ -45,7 +46,7 @@ tap.test('Anthropic Vision: should analyze laptop/workspace image', async () =>
const imagePath = './test/testimages/laptop-nicolas/laptop.jpg'; const imagePath = './test/testimages/laptop-nicolas/laptop.jpg';
console.log(`Loading laptop image from: ${imagePath}`); console.log(`Loading laptop image from: ${imagePath}`);
const imageBuffer = await smartfile.fs.toBuffer(imagePath); const imageBuffer = await smartfs.file(imagePath).read();
console.log(`Image loaded, size: ${imageBuffer.length} bytes`); console.log(`Image loaded, size: ${imageBuffer.length} bytes`);
const result = await anthropicProvider.vision({ const result = await anthropicProvider.vision({
@@ -69,7 +70,7 @@ tap.test('Anthropic Vision: should analyze receipt/document image', async () =>
const imagePath = './test/testimages/receipt-annie/receipt.jpg'; const imagePath = './test/testimages/receipt-annie/receipt.jpg';
console.log(`Loading receipt image from: ${imagePath}`); console.log(`Loading receipt image from: ${imagePath}`);
const imageBuffer = await smartfile.fs.toBuffer(imagePath); const imageBuffer = await smartfs.file(imagePath).read();
console.log(`Image loaded, size: ${imageBuffer.length} bytes`); console.log(`Image loaded, size: ${imageBuffer.length} bytes`);
const result = await anthropicProvider.vision({ const result = await anthropicProvider.vision({

View File

@@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@push.rocks/smartai', name: '@push.rocks/smartai',
version: '0.7.6', version: '0.13.2',
description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.' description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.'
} }

View File

@@ -6,6 +6,10 @@ import * as plugins from './plugins.js';
export interface ChatMessage { export interface ChatMessage {
role: 'assistant' | 'user' | 'system'; role: 'assistant' | 'user' | 'system';
content: string; content: string;
/** Base64-encoded images for vision-capable models */
images?: string[];
/** Chain-of-thought reasoning for GPT-OSS models (e.g., Ollama) */
reasoning?: string;
} }
/** /**
@@ -15,6 +19,16 @@ export interface ChatOptions {
systemMessage: string; systemMessage: string;
userMessage: string; userMessage: string;
messageHistory: ChatMessage[]; messageHistory: ChatMessage[];
/** Base64-encoded images for the current message (vision-capable models) */
images?: string[];
}
/**
* Options for streaming chat interactions
*/
export interface StreamingChatOptions extends ChatOptions {
/** Callback fired for each token during generation */
onToken?: (token: string) => void;
} }
/** /**
@@ -23,6 +37,8 @@ export interface ChatOptions {
export interface ChatResponse { export interface ChatResponse {
role: 'assistant'; role: 'assistant';
message: string; message: string;
/** Chain-of-thought reasoning from reasoning models */
reasoning?: string;
} }
/** /**
@@ -111,19 +127,30 @@ export interface ImageResponse {
export abstract class MultiModalModel { export abstract class MultiModalModel {
/** /**
* SmartPdf instance for document processing * SmartPdf instance for document processing
* Shared across all methods that need PDF functionality * Lazy-loaded only when PDF processing is needed to avoid starting browser unnecessarily
*/ */
protected smartpdfInstance: plugins.smartpdf.SmartPdf; protected smartpdfInstance: plugins.smartpdf.SmartPdf | null = null;
/**
* Ensures SmartPdf instance is initialized and ready
* Call this before using smartpdfInstance in document processing methods
*/
protected async ensureSmartpdfReady(): Promise<void> {
if (!this.smartpdfInstance) {
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
await this.smartpdfInstance.start();
}
}
/** /**
* Initializes the model and any necessary resources * Initializes the model and any necessary resources
* Should be called before using any other methods * Should be called before using any other methods
*/ */
public async start(): Promise<void> { public async start(): Promise<void> {
this.smartpdfInstance = new plugins.smartpdf.SmartPdf(); // SmartPdf is now lazy-loaded only when needed for PDF processing
await this.smartpdfInstance.start(); // This avoids starting a browser unless document() method is actually used
} }
/** /**
* Cleans up any resources used by the model * Cleans up any resources used by the model
* Should be called when the model is no longer needed * Should be called when the model is no longer needed
@@ -131,6 +158,7 @@ export abstract class MultiModalModel {
public async stop(): Promise<void> { public async stop(): Promise<void> {
if (this.smartpdfInstance) { if (this.smartpdfInstance) {
await this.smartpdfInstance.stop(); await this.smartpdfInstance.stop();
this.smartpdfInstance = null;
} }
} }
@@ -140,7 +168,7 @@ export abstract class MultiModalModel {
* @returns Promise resolving to the assistant's response * @returns Promise resolving to the assistant's response
*/ */
public abstract chat(optionsArg: ChatOptions): Promise<ChatResponse>; public abstract chat(optionsArg: ChatOptions): Promise<ChatResponse>;
/** /**
* Streaming interface for chat interactions * Streaming interface for chat interactions
* Allows for real-time responses from the model * Allows for real-time responses from the model
@@ -149,6 +177,14 @@ export abstract class MultiModalModel {
*/ */
public abstract chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>>; public abstract chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>>;
/**
* Streaming chat with token callback
* Calls onToken for each token generated, returns final response
* @param optionsArg Options containing system message, user message, message history, and onToken callback
* @returns Promise resolving to the assistant's response
*/
public chatStreaming?(optionsArg: StreamingChatOptions): Promise<ChatResponse>;
/** /**
* Text-to-speech conversion * Text-to-speech conversion
* @param optionsArg Options containing the message to convert to speech * @param optionsArg Options containing the message to convert to speech

View File

@@ -84,6 +84,18 @@ export class Conversation {
return conversation; return conversation;
} }
public static async createWithMistral(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.mistralProvider) {
throw new Error('Mistral provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
public static async createWithXai(smartaiRefArg: SmartAi) { public static async createWithXai(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.xaiProvider) { if (!smartaiRefArg.xaiProvider) {
throw new Error('XAI provider not available'); throw new Error('XAI provider not available');

View File

@@ -2,7 +2,8 @@ import { Conversation } from './classes.conversation.js';
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
import { AnthropicProvider } from './provider.anthropic.js'; import { AnthropicProvider } from './provider.anthropic.js';
import { ElevenLabsProvider } from './provider.elevenlabs.js'; import { ElevenLabsProvider } from './provider.elevenlabs.js';
import { OllamaProvider } from './provider.ollama.js'; import { MistralProvider } from './provider.mistral.js';
import { OllamaProvider, type IOllamaModelOptions } from './provider.ollama.js';
import { OpenAiProvider } from './provider.openai.js'; import { OpenAiProvider } from './provider.openai.js';
import { PerplexityProvider } from './provider.perplexity.js'; import { PerplexityProvider } from './provider.perplexity.js';
import { ExoProvider } from './provider.exo.js'; import { ExoProvider } from './provider.exo.js';
@@ -15,16 +16,24 @@ export interface ISmartAiOptions {
anthropicToken?: string; anthropicToken?: string;
perplexityToken?: string; perplexityToken?: string;
groqToken?: string; groqToken?: string;
mistralToken?: string;
xaiToken?: string; xaiToken?: string;
elevenlabsToken?: string; elevenlabsToken?: string;
exo?: { exo?: {
baseUrl?: string; baseUrl?: string;
apiKey?: string; apiKey?: string;
}; };
mistral?: {
chatModel?: string;
ocrModel?: string;
tableFormat?: 'markdown' | 'html';
};
ollama?: { ollama?: {
baseUrl?: string; baseUrl?: string;
model?: string; model?: string;
visionModel?: string; visionModel?: string;
defaultOptions?: IOllamaModelOptions;
defaultTimeout?: number;
}; };
elevenlabs?: { elevenlabs?: {
defaultVoiceId?: string; defaultVoiceId?: string;
@@ -32,7 +41,7 @@ export interface ISmartAiOptions {
}; };
} }
export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'xai' | 'elevenlabs'; export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'mistral' | 'xai' | 'elevenlabs';
export class SmartAi { export class SmartAi {
public options: ISmartAiOptions; public options: ISmartAiOptions;
@@ -43,6 +52,7 @@ export class SmartAi {
public ollamaProvider: OllamaProvider; public ollamaProvider: OllamaProvider;
public exoProvider: ExoProvider; public exoProvider: ExoProvider;
public groqProvider: GroqProvider; public groqProvider: GroqProvider;
public mistralProvider: MistralProvider;
public xaiProvider: XAIProvider; public xaiProvider: XAIProvider;
public elevenlabsProvider: ElevenLabsProvider; public elevenlabsProvider: ElevenLabsProvider;
@@ -75,6 +85,15 @@ export class SmartAi {
}); });
await this.groqProvider.start(); await this.groqProvider.start();
} }
if (this.options.mistralToken) {
this.mistralProvider = new MistralProvider({
mistralToken: this.options.mistralToken,
chatModel: this.options.mistral?.chatModel,
ocrModel: this.options.mistral?.ocrModel,
tableFormat: this.options.mistral?.tableFormat,
});
await this.mistralProvider.start();
}
if (this.options.xaiToken) { if (this.options.xaiToken) {
this.xaiProvider = new XAIProvider({ this.xaiProvider = new XAIProvider({
xaiToken: this.options.xaiToken, xaiToken: this.options.xaiToken,
@@ -94,6 +113,8 @@ export class SmartAi {
baseUrl: this.options.ollama.baseUrl, baseUrl: this.options.ollama.baseUrl,
model: this.options.ollama.model, model: this.options.ollama.model,
visionModel: this.options.ollama.visionModel, visionModel: this.options.ollama.visionModel,
defaultOptions: this.options.ollama.defaultOptions,
defaultTimeout: this.options.ollama.defaultTimeout,
}); });
await this.ollamaProvider.start(); await this.ollamaProvider.start();
} }
@@ -119,6 +140,9 @@ export class SmartAi {
if (this.groqProvider) { if (this.groqProvider) {
await this.groqProvider.stop(); await this.groqProvider.stop();
} }
if (this.mistralProvider) {
await this.mistralProvider.stop();
}
if (this.xaiProvider) { if (this.xaiProvider) {
await this.xaiProvider.stop(); await this.xaiProvider.stop();
} }
@@ -150,6 +174,8 @@ export class SmartAi {
return Conversation.createWithOllama(this); return Conversation.createWithOllama(this);
case 'groq': case 'groq':
return Conversation.createWithGroq(this); return Conversation.createWithGroq(this);
case 'mistral':
return Conversation.createWithMistral(this);
case 'xai': case 'xai':
return Conversation.createWithXai(this); return Conversation.createWithXai(this);
case 'elevenlabs': case 'elevenlabs':

View File

@@ -4,6 +4,7 @@ export * from './provider.openai.js';
export * from './provider.anthropic.js'; export * from './provider.anthropic.js';
export * from './provider.perplexity.js'; export * from './provider.perplexity.js';
export * from './provider.groq.js'; export * from './provider.groq.js';
export * from './provider.mistral.js';
export * from './provider.ollama.js'; export * from './provider.ollama.js';
export * from './provider.xai.js'; export * from './provider.xai.js';
export * from './provider.exo.js'; export * from './provider.exo.js';

View File

@@ -8,7 +8,7 @@ export {
// @push.rocks scope // @push.rocks scope
import * as qenv from '@push.rocks/qenv'; import * as qenv from '@push.rocks/qenv';
import * as smartarray from '@push.rocks/smartarray'; import * as smartarray from '@push.rocks/smartarray';
import * as smartfile from '@push.rocks/smartfile'; import * as smartfs from '@push.rocks/smartfs';
import * as smartpath from '@push.rocks/smartpath'; import * as smartpath from '@push.rocks/smartpath';
import * as smartpdf from '@push.rocks/smartpdf'; import * as smartpdf from '@push.rocks/smartpdf';
import * as smartpromise from '@push.rocks/smartpromise'; import * as smartpromise from '@push.rocks/smartpromise';
@@ -18,7 +18,7 @@ import * as webstream from '@push.rocks/webstream';
export { export {
smartarray, smartarray,
qenv, qenv,
smartfile, smartfs,
smartpath, smartpath,
smartpdf, smartpdf,
smartpromise, smartpromise,
@@ -28,9 +28,11 @@ export {
// third party // third party
import * as anthropic from '@anthropic-ai/sdk'; import * as anthropic from '@anthropic-ai/sdk';
import * as mistralai from '@mistralai/mistralai';
import * as openai from 'openai'; import * as openai from 'openai';
export { export {
anthropic, anthropic,
mistralai,
openai, openai,
} }

View File

@@ -20,6 +20,7 @@ export interface IAnthropicProviderOptions {
enableWebSearch?: boolean; enableWebSearch?: boolean;
searchDomainAllowList?: string[]; searchDomainAllowList?: string[];
searchDomainBlockList?: string[]; searchDomainBlockList?: string[];
extendedThinking?: 'quick' | 'normal' | 'deep' | 'off';
} }
export class AnthropicProvider extends MultiModalModel { export class AnthropicProvider extends MultiModalModel {
@@ -42,6 +43,25 @@ export class AnthropicProvider extends MultiModalModel {
await super.stop(); await super.stop();
} }
/**
* Returns the thinking configuration based on provider options.
* Defaults to 'normal' mode (8000 tokens) if not specified.
*/
private getThinkingConfig(): { type: 'enabled'; budget_tokens: number } | undefined {
const mode = this.options.extendedThinking ?? 'normal';
const budgetMap = {
quick: 2048,
normal: 8000,
deep: 16000,
off: 0,
};
const budget = budgetMap[mode];
return budget > 0 ? { type: 'enabled', budget_tokens: budget } : undefined;
}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> { public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks // Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder(); const decoder = new TextDecoder();
@@ -76,12 +96,14 @@ export class AnthropicProvider extends MultiModalModel {
// If we have a complete message, send it to Anthropic // If we have a complete message, send it to Anthropic
if (currentMessage) { if (currentMessage) {
const thinkingConfig = this.getThinkingConfig();
const stream = await this.anthropicApiClient.messages.create({ const stream = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929', model: 'claude-sonnet-4-5-20250929',
messages: [{ role: currentMessage.role, content: currentMessage.content }], messages: [{ role: currentMessage.role, content: currentMessage.content }],
system: '', system: '',
stream: true, stream: true,
max_tokens: 4000, max_tokens: 20000,
...(thinkingConfig && { thinking: thinkingConfig }),
}); });
// Process each chunk from Anthropic // Process each chunk from Anthropic
@@ -120,6 +142,7 @@ export class AnthropicProvider extends MultiModalModel {
content: msg.content content: msg.content
})); }));
const thinkingConfig = this.getThinkingConfig();
const result = await this.anthropicApiClient.messages.create({ const result = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929', model: 'claude-sonnet-4-5-20250929',
system: optionsArg.systemMessage, system: optionsArg.systemMessage,
@@ -127,7 +150,8 @@ export class AnthropicProvider extends MultiModalModel {
...messages, ...messages,
{ role: 'user' as const, content: optionsArg.userMessage } { role: 'user' as const, content: optionsArg.userMessage }
], ],
max_tokens: 4000, max_tokens: 20000,
...(thinkingConfig && { thinking: thinkingConfig }),
}); });
// Extract text content from the response // Extract text content from the response
@@ -167,13 +191,15 @@ export class AnthropicProvider extends MultiModalModel {
} }
]; ];
const thinkingConfig = this.getThinkingConfig();
const result = await this.anthropicApiClient.messages.create({ const result = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929', model: 'claude-sonnet-4-5-20250929',
messages: [{ messages: [{
role: 'user', role: 'user',
content content
}], }],
max_tokens: 1024 max_tokens: 10000,
...(thinkingConfig && { thinking: thinkingConfig }),
}); });
// Extract text content from the response // Extract text content from the response
@@ -192,11 +218,14 @@ export class AnthropicProvider extends MultiModalModel {
pdfDocuments: Uint8Array[]; pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[]; messageHistory: ChatMessage[];
}): Promise<{ message: any }> { }): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// Convert PDF documents to images using SmartPDF // Convert PDF documents to images using SmartPDF
let documentImageBytesArray: Uint8Array[] = []; let documentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) { for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument); const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray); documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
} }
@@ -226,6 +255,7 @@ export class AnthropicProvider extends MultiModalModel {
}); });
} }
const thinkingConfig = this.getThinkingConfig();
const result = await this.anthropicApiClient.messages.create({ const result = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929', model: 'claude-sonnet-4-5-20250929',
system: optionsArg.systemMessage, system: optionsArg.systemMessage,
@@ -233,7 +263,8 @@ export class AnthropicProvider extends MultiModalModel {
...messages, ...messages,
{ role: 'user', content } { role: 'user', content }
], ],
max_tokens: 4096 max_tokens: 20000,
...(thinkingConfig && { thinking: thinkingConfig }),
}); });
// Extract text content from the response // Extract text content from the response
@@ -283,10 +314,14 @@ export class AnthropicProvider extends MultiModalModel {
} }
// Configure the request based on search depth // Configure the request based on search depth
const maxTokens = optionsArg.searchDepth === 'deep' ? 8192 : const maxTokens = optionsArg.searchDepth === 'deep' ? 20000 :
optionsArg.searchDepth === 'advanced' ? 6144 : 4096; optionsArg.searchDepth === 'advanced' ? 20000 : 20000;
// Add thinking configuration if enabled
const thinkingConfig = this.getThinkingConfig();
// Create the research request // Create the research request
// Note: When thinking is enabled, temperature must be 1 (or omitted)
const requestParams: any = { const requestParams: any = {
model: 'claude-sonnet-4-5-20250929', model: 'claude-sonnet-4-5-20250929',
system: systemMessage, system: systemMessage,
@@ -297,7 +332,8 @@ export class AnthropicProvider extends MultiModalModel {
} }
], ],
max_tokens: maxTokens, max_tokens: maxTokens,
temperature: 0.7 // Only set temperature when thinking is NOT enabled
...(thinkingConfig ? {} : { temperature: 0.7 })
}; };
// Add tools if web search is enabled // Add tools if web search is enabled
@@ -305,6 +341,11 @@ export class AnthropicProvider extends MultiModalModel {
requestParams.tools = tools; requestParams.tools = tools;
} }
// Add thinking configuration if enabled
if (thinkingConfig) {
requestParams.thinking = thinkingConfig;
}
// Execute the research request // Execute the research request
const result = await this.anthropicApiClient.messages.create(requestParams); const result = await this.anthropicApiClient.messages.create(requestParams);

View File

@@ -1,4 +1,5 @@
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
import { Readable } from 'stream';
import { MultiModalModel } from './abstract.classes.multimodal.js'; import { MultiModalModel } from './abstract.classes.multimodal.js';
import type { import type {
@@ -83,7 +84,8 @@ export class ElevenLabsProvider extends MultiModalModel {
throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText} - ${errorText}`); throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText} - ${errorText}`);
} }
const nodeStream = response.streamNode(); const webStream = response.stream();
const nodeStream = Readable.fromWeb(webStream as any);
return nodeStream; return nodeStream;
} }

352
ts/provider.mistral.ts Normal file
View File

@@ -0,0 +1,352 @@
import * as plugins from './plugins.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ChatMessage,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
export interface IMistralProviderOptions {
mistralToken: string;
chatModel?: string; // default: 'mistral-large-latest'
ocrModel?: string; // default: 'mistral-ocr-latest'
tableFormat?: 'markdown' | 'html';
}
export class MistralProvider extends MultiModalModel {
private options: IMistralProviderOptions;
public mistralClient: plugins.mistralai.Mistral;
constructor(optionsArg: IMistralProviderOptions) {
super();
this.options = optionsArg;
}
async start() {
await super.start();
this.mistralClient = new plugins.mistralai.Mistral({
apiKey: this.options.mistralToken,
});
}
async stop() {
await super.stop();
}
/**
* Synchronous chat interaction using Mistral's chat API
*/
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
// Convert message history to Mistral format
const messages: Array<{
role: 'system' | 'user' | 'assistant';
content: string;
}> = [];
// Add system message first
if (optionsArg.systemMessage) {
messages.push({
role: 'system',
content: optionsArg.systemMessage
});
}
// Add message history
for (const msg of optionsArg.messageHistory) {
messages.push({
role: msg.role === 'system' ? 'system' : msg.role === 'assistant' ? 'assistant' : 'user',
content: msg.content
});
}
// Add current user message
messages.push({
role: 'user',
content: optionsArg.userMessage
});
const result = await this.mistralClient.chat.complete({
model: this.options.chatModel || 'mistral-large-latest',
messages: messages,
});
// Extract content from response
const choice = result.choices?.[0];
let content = '';
if (choice?.message?.content) {
if (typeof choice.message.content === 'string') {
content = choice.message.content;
} else if (Array.isArray(choice.message.content)) {
// Handle array of content chunks
content = choice.message.content
.map((chunk: any) => {
if (typeof chunk === 'string') return chunk;
if (chunk && typeof chunk === 'object' && 'text' in chunk) return chunk.text;
return '';
})
.join('');
}
}
return {
role: 'assistant',
message: content,
};
}
/**
* Streaming chat using Mistral's streaming API
*/
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
const decoder = new TextDecoder();
let buffer = '';
const mistralClient = this.mistralClient;
const chatModel = this.options.chatModel || 'mistral-large-latest';
const transform = new TransformStream<Uint8Array, string>({
async transform(chunk, controller) {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
// Build messages array
const messages: Array<{
role: 'system' | 'user' | 'assistant';
content: string;
}> = [];
if (message.systemMessage) {
messages.push({
role: 'system',
content: message.systemMessage
});
}
messages.push({
role: message.role === 'assistant' ? 'assistant' : 'user',
content: message.content
});
// Use Mistral streaming
const stream = await mistralClient.chat.stream({
model: chatModel,
messages: messages,
});
// Process streaming events
for await (const event of stream) {
const delta = event.data?.choices?.[0]?.delta;
if (delta?.content) {
if (typeof delta.content === 'string') {
controller.enqueue(delta.content);
} else if (Array.isArray(delta.content)) {
for (const chunk of delta.content) {
if (typeof chunk === 'string') {
controller.enqueue(chunk);
} else if (chunk && typeof chunk === 'object' && 'text' in chunk) {
controller.enqueue((chunk as any).text);
}
}
}
}
}
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
},
flush(controller) {
if (buffer.trim()) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
return input.pipeThrough(transform);
}
/**
* Audio generation is not supported by Mistral
*/
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
throw new Error('Audio generation is not supported by Mistral. Please use ElevenLabs or OpenAI provider for audio generation.');
}
/**
* Vision using Mistral's OCR API for image analysis
*/
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
const base64Image = optionsArg.image.toString('base64');
// Detect image type from buffer header
let mimeType = 'image/jpeg';
if (optionsArg.image[0] === 0x89 && optionsArg.image[1] === 0x50) {
mimeType = 'image/png';
} else if (optionsArg.image[0] === 0x47 && optionsArg.image[1] === 0x49) {
mimeType = 'image/gif';
} else if (optionsArg.image[0] === 0x52 && optionsArg.image[1] === 0x49) {
mimeType = 'image/webp';
}
// Use OCR API with image data URL
const ocrResult = await this.mistralClient.ocr.process({
model: this.options.ocrModel || 'mistral-ocr-latest',
document: {
imageUrl: `data:${mimeType};base64,${base64Image}`,
type: 'image_url',
},
});
// Combine markdown from all pages
const extractedText = ocrResult.pages.map(page => page.markdown).join('\n\n');
// If a prompt is provided, use chat to analyze the extracted text
if (optionsArg.prompt && optionsArg.prompt.trim()) {
const chatResponse = await this.chat({
systemMessage: 'You are an assistant analyzing image content. The following is text extracted from an image using OCR.',
userMessage: `${optionsArg.prompt}\n\nExtracted content:\n${extractedText}`,
messageHistory: [],
});
return chatResponse.message;
}
return extractedText;
}
/**
* Document processing using Mistral's OCR API
* PDFs are uploaded via Files API first, then processed with OCR
*/
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
const extractedTexts: string[] = [];
const uploadedFileIds: string[] = [];
try {
// Process each PDF document using Mistral OCR
for (let i = 0; i < optionsArg.pdfDocuments.length; i++) {
const pdfDocument = optionsArg.pdfDocuments[i];
// Upload the PDF to Mistral's Files API first
const uploadResult = await this.mistralClient.files.upload({
file: {
fileName: `document_${i + 1}.pdf`,
content: pdfDocument,
},
purpose: 'ocr',
});
uploadedFileIds.push(uploadResult.id);
// Now use OCR with the uploaded file
const ocrResult = await this.mistralClient.ocr.process({
model: this.options.ocrModel || 'mistral-ocr-latest',
document: {
type: 'file',
fileId: uploadResult.id,
},
tableFormat: this.options.tableFormat || 'markdown',
});
// Combine all page markdown with page separators
const pageTexts = ocrResult.pages.map((page, index) => {
let pageContent = `--- Page ${index + 1} ---\n${page.markdown}`;
// Include tables if present
if (page.tables && page.tables.length > 0) {
pageContent += '\n\n**Tables:**\n' + page.tables.map((t: any) => t.markdown || t.html || '').join('\n');
}
// Include header/footer if present
if (page.header) {
pageContent = `Header: ${page.header}\n${pageContent}`;
}
if (page.footer) {
pageContent += `\nFooter: ${page.footer}`;
}
return pageContent;
}).join('\n\n');
extractedTexts.push(pageTexts);
}
// Combine all document texts
const allDocumentText = extractedTexts.length === 1
? extractedTexts[0]
: extractedTexts.map((text, i) => `=== Document ${i + 1} ===\n${text}`).join('\n\n');
// Use chat API to process the extracted text with the user's query
const chatResponse = await this.chat({
systemMessage: optionsArg.systemMessage || 'You are a helpful assistant analyzing document content.',
userMessage: `${optionsArg.userMessage}\n\n---\nDocument Content:\n${allDocumentText}`,
messageHistory: optionsArg.messageHistory,
});
return {
message: {
role: 'assistant',
content: chatResponse.message
}
};
} finally {
// Clean up uploaded files
for (const fileId of uploadedFileIds) {
try {
await this.mistralClient.files.delete({ fileId });
} catch (cleanupError) {
// Ignore cleanup errors - files may have already been auto-deleted
console.warn(`Failed to delete temporary file ${fileId}:`, cleanupError);
}
}
}
}
/**
* Research is not natively supported by Mistral
*/
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
throw new Error('Research/web search is not supported by Mistral. Please use Perplexity or Anthropic provider for research capabilities.');
}
/**
* Image generation is not supported by Mistral
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('Image generation is not supported by Mistral. Please use OpenAI provider for image generation.');
}
/**
* Image editing is not supported by Mistral
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('Image editing is not supported by Mistral. Please use OpenAI provider for image editing.');
}
}

View File

@@ -9,13 +9,101 @@ import type {
ResearchResponse, ResearchResponse,
ImageGenerateOptions, ImageGenerateOptions,
ImageEditOptions, ImageEditOptions,
ImageResponse ImageResponse,
StreamingChatOptions
} from './abstract.classes.multimodal.js'; } from './abstract.classes.multimodal.js';
/**
* Ollama model runtime options
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
*/
export interface IOllamaModelOptions {
num_ctx?: number; // Context window (default: 2048)
temperature?: number; // 0 = deterministic (default: 0.8)
top_k?: number; // Top-k sampling (default: 40)
top_p?: number; // Nucleus sampling (default: 0.9)
repeat_penalty?: number;// Repeat penalty (default: 1.1)
num_predict?: number; // Max tokens to predict
stop?: string[]; // Stop sequences
seed?: number; // Random seed for reproducibility
think?: boolean; // Enable thinking/reasoning mode (for GPT-OSS, QwQ, etc.)
}
/**
* JSON Schema tool definition for Ollama native tool calling
* @see https://docs.ollama.com/capabilities/tool-calling
*/
export interface IOllamaTool {
type: 'function';
function: {
name: string;
description: string;
parameters: {
type: 'object';
properties: Record<string, {
type: string;
description?: string;
enum?: string[];
}>;
required?: string[];
};
};
}
/**
* Tool call returned by model in native tool calling mode
*/
export interface IOllamaToolCall {
function: {
name: string;
arguments: Record<string, unknown>;
index?: number;
};
}
export interface IOllamaProviderOptions { export interface IOllamaProviderOptions {
baseUrl?: string; baseUrl?: string;
model?: string; model?: string;
visionModel?: string; // Model to use for vision tasks (e.g. 'llava') visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
defaultOptions?: IOllamaModelOptions; // Default model options
defaultTimeout?: number; // Default timeout in ms (default: 120000)
}
/**
* Extended chat options with Ollama-specific settings
*/
export interface IOllamaChatOptions extends ChatOptions {
options?: IOllamaModelOptions; // Per-request model options
timeout?: number; // Per-request timeout in ms
model?: string; // Per-request model override
tools?: IOllamaTool[]; // Available tools for native function calling
// images is inherited from ChatOptions
}
/**
* Chunk emitted during streaming
*/
export interface IOllamaStreamChunk {
content: string;
thinking?: string; // For models with extended thinking
toolCalls?: IOllamaToolCall[]; // Tool calls in streaming mode
done: boolean;
stats?: {
totalDuration?: number;
evalCount?: number;
};
}
/**
* Extended chat response with Ollama-specific fields
*/
export interface IOllamaChatResponse extends ChatResponse {
thinking?: string;
toolCalls?: IOllamaToolCall[]; // Tool calls from model (native tool calling)
stats?: {
totalDuration?: number;
evalCount?: number;
};
} }
export class OllamaProvider extends MultiModalModel { export class OllamaProvider extends MultiModalModel {
@@ -23,6 +111,8 @@ export class OllamaProvider extends MultiModalModel {
private baseUrl: string; private baseUrl: string;
private model: string; private model: string;
private visionModel: string; private visionModel: string;
private defaultOptions: IOllamaModelOptions;
private defaultTimeout: number;
constructor(optionsArg: IOllamaProviderOptions = {}) { constructor(optionsArg: IOllamaProviderOptions = {}) {
super(); super();
@@ -30,6 +120,8 @@ export class OllamaProvider extends MultiModalModel {
this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434'; this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
this.model = optionsArg.model || 'llama2'; this.model = optionsArg.model || 'llama2';
this.visionModel = optionsArg.visionModel || 'llava'; this.visionModel = optionsArg.visionModel || 'llava';
this.defaultOptions = optionsArg.defaultOptions || {};
this.defaultTimeout = optionsArg.defaultTimeout || 120000;
} }
async start() { async start() {
@@ -148,23 +240,56 @@ export class OllamaProvider extends MultiModalModel {
// Implementing the synchronous chat interaction // Implementing the synchronous chat interaction
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> { public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
// Format messages for Ollama // Format messages for Ollama
const historyMessages = optionsArg.messageHistory.map((msg) => {
const formatted: { role: string; content: string; images?: string[]; reasoning?: string } = {
role: msg.role,
content: msg.content,
};
if (msg.images && msg.images.length > 0) {
formatted.images = msg.images;
}
if (msg.reasoning) {
formatted.reasoning = msg.reasoning;
}
return formatted;
});
// Build user message with optional images
const userMessage: { role: string; content: string; images?: string[] } = {
role: 'user',
content: optionsArg.userMessage,
};
if (optionsArg.images && optionsArg.images.length > 0) {
userMessage.images = optionsArg.images;
}
const messages = [ const messages = [
{ role: 'system', content: optionsArg.systemMessage }, { role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory, ...historyMessages,
{ role: 'user', content: optionsArg.userMessage } userMessage,
]; ];
// Make API call to Ollama // Build request body - include think parameter if set
const requestBody: Record<string, unknown> = {
model: this.model,
messages: messages,
stream: false,
options: this.defaultOptions,
};
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
if (this.defaultOptions.think !== undefined) {
requestBody.think = this.defaultOptions.think;
}
// Make API call to Ollama with defaultOptions and timeout
const response = await fetch(`${this.baseUrl}/api/chat`, { const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, },
body: JSON.stringify({ body: JSON.stringify(requestBody),
model: this.model, signal: AbortSignal.timeout(this.defaultTimeout),
messages: messages,
stream: false
}),
}); });
if (!response.ok) { if (!response.ok) {
@@ -172,10 +297,296 @@ export class OllamaProvider extends MultiModalModel {
} }
const result = await response.json(); const result = await response.json();
return { return {
role: 'assistant' as const, role: 'assistant' as const,
message: result.message.content, message: result.message.content,
reasoning: result.message.thinking || result.message.reasoning,
};
}
/**
* Streaming chat with token callback (implements MultiModalModel interface)
* Calls onToken for each token generated during the response
*/
public async chatStreaming(optionsArg: StreamingChatOptions): Promise<ChatResponse> {
const onToken = optionsArg.onToken;
// Use existing collectStreamResponse with callback, including images
const response = await this.collectStreamResponse(
{
systemMessage: optionsArg.systemMessage,
userMessage: optionsArg.userMessage,
messageHistory: optionsArg.messageHistory,
images: optionsArg.images,
},
(chunk) => {
if (onToken) {
if (chunk.thinking) onToken(chunk.thinking);
if (chunk.content) onToken(chunk.content);
}
}
);
return {
role: 'assistant' as const,
message: response.message,
reasoning: response.thinking,
};
}
/**
* Streaming chat with async iteration and options support
*/
public async chatStreamResponse(
optionsArg: IOllamaChatOptions
): Promise<AsyncIterable<IOllamaStreamChunk>> {
const model = optionsArg.model || this.model;
const timeout = optionsArg.timeout || this.defaultTimeout;
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
// Format history messages with optional images and reasoning
const historyMessages = optionsArg.messageHistory.map((msg) => {
const formatted: { role: string; content: string; images?: string[]; reasoning?: string } = {
role: msg.role,
content: msg.content,
};
if (msg.images && msg.images.length > 0) {
formatted.images = msg.images;
}
if (msg.reasoning) {
formatted.reasoning = msg.reasoning;
}
return formatted;
});
// Build user message with optional images
const userMessage: { role: string; content: string; images?: string[] } = {
role: 'user',
content: optionsArg.userMessage,
};
if (optionsArg.images && optionsArg.images.length > 0) {
userMessage.images = optionsArg.images;
}
const messages = [
{ role: 'system', content: optionsArg.systemMessage },
...historyMessages,
userMessage,
];
// Build request body with optional tools and think parameters
const requestBody: Record<string, unknown> = {
model,
messages,
stream: true,
options: modelOptions,
};
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
if (modelOptions.think !== undefined) {
requestBody.think = modelOptions.think;
}
// Add tools for native function calling
if (optionsArg.tools && optionsArg.tools.length > 0) {
requestBody.tools = optionsArg.tools;
}
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
signal: AbortSignal.timeout(timeout),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.status}`);
}
const reader = response.body!.getReader();
const decoder = new TextDecoder();
return {
[Symbol.asyncIterator]: async function* () {
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (!line.trim()) continue;
try {
const json = JSON.parse(line);
// Parse tool_calls from response
let toolCalls: IOllamaToolCall[] | undefined;
if (json.message?.tool_calls && Array.isArray(json.message.tool_calls)) {
toolCalls = json.message.tool_calls.map((tc: any) => ({
function: {
name: tc.function?.name || '',
arguments: typeof tc.function?.arguments === 'string'
? JSON.parse(tc.function.arguments)
: tc.function?.arguments || {},
index: tc.index,
},
}));
}
yield {
content: json.message?.content || '',
thinking: json.message?.thinking,
toolCalls,
done: json.done || false,
stats: json.done ? {
totalDuration: json.total_duration,
evalCount: json.eval_count,
} : undefined,
} as IOllamaStreamChunk;
} catch { /* skip malformed */ }
}
}
} finally {
reader.releaseLock();
}
}
};
}
/**
* Stream and collect full response with optional progress callback
*/
public async collectStreamResponse(
optionsArg: IOllamaChatOptions,
onChunk?: (chunk: IOllamaStreamChunk) => void
): Promise<IOllamaChatResponse> {
const stream = await this.chatStreamResponse(optionsArg);
let content = '';
let thinking = '';
let toolCalls: IOllamaToolCall[] = [];
let stats: IOllamaChatResponse['stats'];
for await (const chunk of stream) {
if (chunk.content) content += chunk.content;
if (chunk.thinking) thinking += chunk.thinking;
if (chunk.toolCalls) toolCalls = toolCalls.concat(chunk.toolCalls);
if (chunk.stats) stats = chunk.stats;
if (onChunk) onChunk(chunk);
}
return {
role: 'assistant' as const,
message: content,
thinking: thinking || undefined,
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
stats,
};
}
/**
* Non-streaming chat with full options support
*/
public async chatWithOptions(optionsArg: IOllamaChatOptions): Promise<IOllamaChatResponse> {
const model = optionsArg.model || this.model;
const timeout = optionsArg.timeout || this.defaultTimeout;
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
// Format history messages with optional images, reasoning, and tool role
const historyMessages = optionsArg.messageHistory.map((msg) => {
// Handle tool result messages
if ((msg as any).role === 'tool') {
return {
role: 'tool',
content: msg.content,
tool_name: (msg as any).toolName,
};
}
const formatted: { role: string; content: string; images?: string[]; reasoning?: string } = {
role: msg.role,
content: msg.content,
};
if (msg.images && msg.images.length > 0) {
formatted.images = msg.images;
}
if (msg.reasoning) {
formatted.reasoning = msg.reasoning;
}
return formatted;
});
// Build user message with optional images
const userMessage: { role: string; content: string; images?: string[] } = {
role: 'user',
content: optionsArg.userMessage,
};
if (optionsArg.images && optionsArg.images.length > 0) {
userMessage.images = optionsArg.images;
}
const messages = [
{ role: 'system', content: optionsArg.systemMessage },
...historyMessages,
userMessage,
];
// Build request body with optional tools and think parameters
const requestBody: Record<string, unknown> = {
model,
messages,
stream: false,
options: modelOptions,
};
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
if (modelOptions.think !== undefined) {
requestBody.think = modelOptions.think;
}
// Add tools for native function calling
if (optionsArg.tools && optionsArg.tools.length > 0) {
requestBody.tools = optionsArg.tools;
}
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
signal: AbortSignal.timeout(timeout),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.statusText}`);
}
const result = await response.json();
// Parse tool_calls from response
let toolCalls: IOllamaToolCall[] | undefined;
if (result.message?.tool_calls && Array.isArray(result.message.tool_calls)) {
toolCalls = result.message.tool_calls.map((tc: any) => ({
function: {
name: tc.function?.name || '',
arguments: typeof tc.function?.arguments === 'string'
? JSON.parse(tc.function.arguments)
: tc.function?.arguments || {},
index: tc.index,
},
}));
}
return {
role: 'assistant' as const,
message: result.message.content || '',
thinking: result.message.thinking,
toolCalls,
stats: {
totalDuration: result.total_duration,
evalCount: result.eval_count,
},
}; };
} }
@@ -216,11 +627,14 @@ export class OllamaProvider extends MultiModalModel {
pdfDocuments: Uint8Array[]; pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[]; messageHistory: ChatMessage[];
}): Promise<{ message: any }> { }): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// Convert PDF documents to images using SmartPDF // Convert PDF documents to images using SmartPDF
let documentImageBytesArray: Uint8Array[] = []; let documentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) { for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument); const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray); documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
} }

View File

@@ -1,6 +1,7 @@
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
import * as paths from './paths.js'; import * as paths from './paths.js';
import { Readable } from 'stream'; import { Readable } from 'stream';
import { toFile } from 'openai';
// Custom type definition for chat completion messages // Custom type definition for chat completion messages
export type TChatCompletionRequestMessage = { export type TChatCompletionRequestMessage = {
@@ -173,11 +174,14 @@ export class OpenAiProvider extends MultiModalModel {
content: any; content: any;
}[]; }[];
}) { }) {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
let pdfDocumentImageBytesArray: Uint8Array[] = []; let pdfDocumentImageBytesArray: Uint8Array[] = [];
// Convert each PDF into one or more image byte arrays. // Convert each PDF into one or more image byte arrays.
for (const pdfDocument of optionsArg.pdfDocuments) { for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument); const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray); pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
} }
@@ -402,16 +406,19 @@ export class OpenAiProvider extends MultiModalModel {
const model = optionsArg.model || this.options.imageModel || 'gpt-image-1'; const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';
try { try {
// Convert Buffer to uploadable file format for OpenAI API
const imageFile = await toFile(optionsArg.image, 'image.png', { type: 'image/png' });
const requestParams: any = { const requestParams: any = {
model, model,
image: optionsArg.image, image: imageFile,
prompt: optionsArg.prompt, prompt: optionsArg.prompt,
n: optionsArg.n || 1, n: optionsArg.n || 1,
}; };
// Add mask if provided // Add mask if provided (also convert to file format)
if (optionsArg.mask) { if (optionsArg.mask) {
requestParams.mask = optionsArg.mask; requestParams.mask = await toFile(optionsArg.mask, 'mask.png', { type: 'image/png' });
} }
// Add gpt-image-1 specific parameters // Add gpt-image-1 specific parameters

View File

@@ -149,11 +149,14 @@ export class XAIProvider extends MultiModalModel {
pdfDocuments: Uint8Array[]; pdfDocuments: Uint8Array[];
messageHistory: { role: string; content: string; }[]; messageHistory: { role: string; content: string; }[];
}): Promise<{ message: any }> { }): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// First convert PDF documents to images // First convert PDF documents to images
let pdfDocumentImageBytesArray: Uint8Array[] = []; let pdfDocumentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) { for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument); const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray); pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
} }

View File

@@ -6,9 +6,9 @@
"module": "NodeNext", "module": "NodeNext",
"moduleResolution": "NodeNext", "moduleResolution": "NodeNext",
"esModuleInterop": true, "esModuleInterop": true,
"verbatimModuleSyntax": true "verbatimModuleSyntax": true,
"baseUrl": ".",
"paths": {}
}, },
"exclude": [ "exclude": ["dist_*/**/*.d.ts"]
"dist_*/**/*.d.ts"
]
} }