diff --git a/changelog.md b/changelog.md index b1e60b1..312776f 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,14 @@ # Changelog +## 2026-01-18 - 0.10.0 - feat(mistral) +add Mistral provider with native PDF OCR and chat integration + +- Adds dependency @mistralai/mistralai +- Implements ts/provider.mistral.ts providing chat() and document() (OCR) functionality +- Registers and exposes MistralProvider in SmartAi (options, lifecycle, conversation routing) +- Adds unit/integration tests: test.chat.mistral.ts and test.document.mistral.ts +- Updates readme.hints.md with Mistral usage, configuration and notes + ## 2026-01-18 - 0.9.0 - feat(providers) Add Anthropic extended thinking and adapt providers to new streaming/file APIs; bump dependencies and update docs, tests and configuration diff --git a/package.json b/package.json index 47d50c3..bfd3abe 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ }, "dependencies": { "@anthropic-ai/sdk": "^0.71.2", + "@mistralai/mistralai": "^1.11.0", "@push.rocks/smartarray": "^1.1.0", "@push.rocks/smartfile": "^11.2.7", "@push.rocks/smartpath": "^6.0.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7067821..02de80b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -11,6 +11,9 @@ importers: '@anthropic-ai/sdk': specifier: ^0.71.2 version: 0.71.2(zod@3.25.76) + '@mistralai/mistralai': + specifier: ^1.11.0 + version: 1.11.0 '@push.rocks/smartarray': specifier: ^1.1.0 version: 1.1.0 @@ -1132,6 +1135,9 @@ packages: '@lit/reactive-element@2.1.1': resolution: {integrity: sha512-N+dm5PAYdQ8e6UlywyyrgI2t++wFGXfHx+dSJ1oBrg6FAxUj40jId++EaRm80MKX5JnlH1sBsyZ5h0bcZKemCg==} + '@mistralai/mistralai@1.11.0': + resolution: {integrity: sha512-6/BVj2mcaggYbpMzNSxtqtM2Tv/Jb5845XFd2CMYFO+O5VBkX70iLjtkBBTI4JFhh1l9vTCIMYXBVOjLoBVHGQ==} + '@mixmark-io/domino@2.2.0': resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==} @@ -4895,6 +4901,11 @@ packages: resolution: {integrity: sha512-U/PBtDf35ff0D8X8D0jfdzHYEPFxAI7jJlxZXwCSez5M3190m+QobIfh+sWDWSHMCWWJN2AWamkegn6vr6YBTw==} engines: {node: '>=18'} + zod-to-json-schema@3.25.1: + resolution: {integrity: sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==} + peerDependencies: + zod: ^3.25 || ^4 + zod@3.24.2: resolution: {integrity: sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==} @@ -7350,6 +7361,11 @@ snapshots: dependencies: '@lit-labs/ssr-dom-shim': 1.4.0 + '@mistralai/mistralai@1.11.0': + dependencies: + zod: 3.25.76 + zod-to-json-schema: 3.25.1(zod@3.25.76) + '@mixmark-io/domino@2.2.0': {} '@module-federation/error-codes@0.22.0': {} @@ -12506,6 +12522,10 @@ snapshots: yoctocolors-cjs@2.1.3: {} + zod-to-json-schema@3.25.1(zod@3.25.76): + dependencies: + zod: 3.25.76 + zod@3.24.2: {} zod@3.25.76: {} diff --git a/readme.hints.md b/readme.hints.md index b86d7df..2392bd5 100644 --- a/readme.hints.md +++ b/readme.hints.md @@ -5,6 +5,7 @@ - Uses `@git.zone/tstest` v3.x for testing (import from `@git.zone/tstest/tapbundle`) - `@push.rocks/smartfile` is kept at v11 to avoid migration to factory pattern - `@anthropic-ai/sdk` v0.71.x with extended thinking support +- `@mistralai/mistralai` v1.x for Mistral OCR and chat capabilities - `@push.rocks/smartrequest` v5.x - uses `response.stream()` + `Readable.fromWeb()` for streaming ## Important Notes @@ -12,6 +13,68 @@ - When extended thinking is enabled, temperature parameter must NOT be set (or set to 1) - The `streamNode()` method was removed in smartrequest v5, use `response.stream()` with `Readable.fromWeb()` instead +## Mistral Provider Integration + +### Overview + +The Mistral provider supports: +- **Document AI** via Mistral OCR 3 (December 2025) - native PDF processing without image conversion +- **Chat capabilities** using Mistral's chat models (`mistral-large-latest`, etc.) + +### Key Advantage: Native PDF Support + +Unlike other providers that require converting PDFs to images (using SmartPdf), Mistral OCR natively accepts PDF documents as base64-encoded data. This makes document processing potentially faster and more accurate for text extraction. + +### Configuration + +```typescript +import * as smartai from '@push.rocks/smartai'; + +const provider = new smartai.MistralProvider({ + mistralToken: 'your-token-here', + chatModel: 'mistral-large-latest', // default + ocrModel: 'mistral-ocr-latest', // default + tableFormat: 'markdown', // 'markdown' or 'html' +}); + +await provider.start(); +``` + +### Supported Methods + +| Method | Support | Notes | +|--------|---------|-------| +| `chat()` | ✅ | Standard chat completion | +| `chatStream()` | ✅ | Streaming chat responses | +| `document()` | ✅ | Native PDF OCR - no image conversion needed | +| `vision()` | ✅ | Image OCR with optional chat analysis | +| `audio()` | ❌ | Not supported - use ElevenLabs | +| `research()` | ❌ | Not supported - use Perplexity | +| `imageGenerate()` | ❌ | Not supported - use OpenAI | +| `imageEdit()` | ❌ | Not supported - use OpenAI | + +### Document Processing + +The `document()` method uses Mistral OCR to extract text from PDFs, then uses Mistral chat to process the user's query with the extracted content. + +```typescript +const result = await provider.document({ + systemMessage: 'You are a document analyst.', + userMessage: 'Summarize this document.', + pdfDocuments: [pdfBuffer], + messageHistory: [], +}); +``` + +### API Key + +Tests require `MISTRAL_API_KEY` in `.nogit/env.json`. + +### Pricing (as of December 2025) + +- OCR: $2 per 1,000 pages ($1 with Batch API) +- Chat: Varies by model (see Mistral pricing page) + ## Anthropic Extended Thinking Feature ### Overview diff --git a/test/test.chat.mistral.ts b/test/test.chat.mistral.ts new file mode 100644 index 0000000..6922272 --- /dev/null +++ b/test/test.chat.mistral.ts @@ -0,0 +1,66 @@ +import { expect, tap } from '@git.zone/tstest/tapbundle'; +import * as qenv from '@push.rocks/qenv'; + +const testQenv = new qenv.Qenv('./', './.nogit/'); + +import * as smartai from '../ts/index.js'; + +let mistralProvider: smartai.MistralProvider; + +tap.test('Mistral Chat: should create and start Mistral provider', async () => { + mistralProvider = new smartai.MistralProvider({ + mistralToken: await testQenv.getEnvVarOnDemand('MISTRAL_API_KEY'), + }); + await mistralProvider.start(); + expect(mistralProvider).toBeInstanceOf(smartai.MistralProvider); +}); + +tap.test('Mistral Chat: should create chat response', async () => { + const userMessage = 'What is the capital of France? Answer in one word.'; + const response = await mistralProvider.chat({ + systemMessage: 'You are a helpful assistant. Be concise.', + userMessage: userMessage, + messageHistory: [], + }); + console.log(`Mistral Chat - User: ${userMessage}`); + console.log(`Mistral Chat - Response: ${response.message}`); + + expect(response.role).toEqual('assistant'); + expect(response.message).toBeTruthy(); + expect(response.message.toLowerCase()).toInclude('paris'); +}); + +tap.test('Mistral Chat: should handle message history', async () => { + const messageHistory: smartai.ChatMessage[] = [ + { role: 'user', content: 'My name is Claude Test' }, + { role: 'assistant', content: 'Nice to meet you, Claude Test!' } + ]; + + const response = await mistralProvider.chat({ + systemMessage: 'You are a helpful assistant with good memory.', + userMessage: 'What is my name?', + messageHistory: messageHistory, + }); + + console.log(`Mistral Memory Test - Response: ${response.message}`); + expect(response.message.toLowerCase()).toInclude('claude test'); +}); + +tap.test('Mistral Chat: should handle longer conversations', async () => { + const response = await mistralProvider.chat({ + systemMessage: 'You are a helpful coding assistant.', + userMessage: 'Write a simple hello world function in TypeScript. Keep it brief.', + messageHistory: [], + }); + + console.log(`Mistral Coding Test - Response: ${response.message}`); + expect(response.message).toBeTruthy(); + // Should contain some TypeScript/function code + expect(response.message).toInclude('function'); +}); + +tap.test('Mistral Chat: should stop the provider', async () => { + await mistralProvider.stop(); +}); + +export default tap.start(); diff --git a/test/test.document.mistral.ts b/test/test.document.mistral.ts new file mode 100644 index 0000000..79e3e60 --- /dev/null +++ b/test/test.document.mistral.ts @@ -0,0 +1,99 @@ +import { expect, tap } from '@git.zone/tstest/tapbundle'; +import * as qenv from '@push.rocks/qenv'; +import * as smartrequest from '@push.rocks/smartrequest'; +import * as smartfile from '@push.rocks/smartfile'; + +const testQenv = new qenv.Qenv('./', './.nogit/'); + +import * as smartai from '../ts/index.js'; + +let mistralProvider: smartai.MistralProvider; + +tap.test('Mistral Document: should create and start Mistral provider', async () => { + mistralProvider = new smartai.MistralProvider({ + mistralToken: await testQenv.getEnvVarOnDemand('MISTRAL_API_KEY'), + tableFormat: 'markdown', + }); + await mistralProvider.start(); + expect(mistralProvider).toBeInstanceOf(smartai.MistralProvider); +}); + +tap.test('Mistral Document: should process a PDF document', async () => { + const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf'; + const pdfResponse = await smartrequest.SmartRequest.create() + .url(pdfUrl) + .get(); + + const result = await mistralProvider.document({ + systemMessage: 'Classify the document. Only the following answers are allowed: "invoice", "bank account statement", "contract", "test document", "other". The answer should only contain the keyword for machine use.', + userMessage: 'Classify this document.', + messageHistory: [], + pdfDocuments: [Buffer.from(await pdfResponse.arrayBuffer())], + }); + + console.log(`Mistral Document - Result:`, result); + expect(result).toBeTruthy(); + expect(result.message).toBeTruthy(); +}); + +tap.test('Mistral Document: should handle complex document analysis', async () => { + // Test with the demo PDF if it exists + const pdfPath = './.nogit/demo_without_textlayer.pdf'; + let pdfBuffer: Uint8Array; + + try { + pdfBuffer = await smartfile.fs.toBuffer(pdfPath); + } catch (error) { + // If the file doesn't exist, use the dummy PDF + console.log('Demo PDF not found, using dummy PDF instead'); + const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf'; + const pdfResponse = await smartrequest.SmartRequest.create() + .url(pdfUrl) + .get(); + pdfBuffer = Buffer.from(await pdfResponse.arrayBuffer()); + } + + const result = await mistralProvider.document({ + systemMessage: ` + Analyze this document and provide a JSON response with the following structure: + { + "documentType": "string", + "hasText": boolean, + "summary": "string" + } + `, + userMessage: 'Analyze this document.', + messageHistory: [], + pdfDocuments: [pdfBuffer], + }); + + console.log(`Mistral Complex Document Analysis:`, result); + expect(result).toBeTruthy(); + expect(result.message).toBeTruthy(); +}); + +tap.test('Mistral Document: should process multiple PDF documents', async () => { + const pdfUrl = 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf'; + const pdfResponse = await smartrequest.SmartRequest.create() + .url(pdfUrl) + .get(); + + const pdfBuffer = Buffer.from(await pdfResponse.arrayBuffer()); + + const result = await mistralProvider.document({ + systemMessage: 'You are a document comparison assistant.', + userMessage: 'Are these two documents the same? Answer yes or no.', + messageHistory: [], + pdfDocuments: [pdfBuffer, pdfBuffer], // Same document twice for test + }); + + console.log(`Mistral Multi-Document - Result:`, result); + expect(result).toBeTruthy(); + expect(result.message).toBeTruthy(); +}); + +tap.test('Mistral Document: should stop the provider', async () => { + await mistralProvider.stop(); +}); + +export default tap.start(); diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index 5ad313c..2f39f82 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@push.rocks/smartai', - version: '0.9.0', + version: '0.10.0', description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.' } diff --git a/ts/classes.conversation.ts b/ts/classes.conversation.ts index 472d6eb..4a651e2 100644 --- a/ts/classes.conversation.ts +++ b/ts/classes.conversation.ts @@ -84,6 +84,18 @@ export class Conversation { return conversation; } + public static async createWithMistral(smartaiRefArg: SmartAi) { + if (!smartaiRefArg.mistralProvider) { + throw new Error('Mistral provider not available'); + } + const conversation = new Conversation(smartaiRefArg, { + processFunction: async (input) => { + return '' // TODO implement proper streaming + } + }); + return conversation; + } + public static async createWithXai(smartaiRefArg: SmartAi) { if (!smartaiRefArg.xaiProvider) { throw new Error('XAI provider not available'); diff --git a/ts/classes.smartai.ts b/ts/classes.smartai.ts index 86a47c4..4d8e7ab 100644 --- a/ts/classes.smartai.ts +++ b/ts/classes.smartai.ts @@ -2,6 +2,7 @@ import { Conversation } from './classes.conversation.js'; import * as plugins from './plugins.js'; import { AnthropicProvider } from './provider.anthropic.js'; import { ElevenLabsProvider } from './provider.elevenlabs.js'; +import { MistralProvider } from './provider.mistral.js'; import { OllamaProvider } from './provider.ollama.js'; import { OpenAiProvider } from './provider.openai.js'; import { PerplexityProvider } from './provider.perplexity.js'; @@ -15,12 +16,18 @@ export interface ISmartAiOptions { anthropicToken?: string; perplexityToken?: string; groqToken?: string; + mistralToken?: string; xaiToken?: string; elevenlabsToken?: string; exo?: { baseUrl?: string; apiKey?: string; }; + mistral?: { + chatModel?: string; + ocrModel?: string; + tableFormat?: 'markdown' | 'html'; + }; ollama?: { baseUrl?: string; model?: string; @@ -32,7 +39,7 @@ export interface ISmartAiOptions { }; } -export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'xai' | 'elevenlabs'; +export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'mistral' | 'xai' | 'elevenlabs'; export class SmartAi { public options: ISmartAiOptions; @@ -43,6 +50,7 @@ export class SmartAi { public ollamaProvider: OllamaProvider; public exoProvider: ExoProvider; public groqProvider: GroqProvider; + public mistralProvider: MistralProvider; public xaiProvider: XAIProvider; public elevenlabsProvider: ElevenLabsProvider; @@ -75,6 +83,15 @@ export class SmartAi { }); await this.groqProvider.start(); } + if (this.options.mistralToken) { + this.mistralProvider = new MistralProvider({ + mistralToken: this.options.mistralToken, + chatModel: this.options.mistral?.chatModel, + ocrModel: this.options.mistral?.ocrModel, + tableFormat: this.options.mistral?.tableFormat, + }); + await this.mistralProvider.start(); + } if (this.options.xaiToken) { this.xaiProvider = new XAIProvider({ xaiToken: this.options.xaiToken, @@ -119,6 +136,9 @@ export class SmartAi { if (this.groqProvider) { await this.groqProvider.stop(); } + if (this.mistralProvider) { + await this.mistralProvider.stop(); + } if (this.xaiProvider) { await this.xaiProvider.stop(); } @@ -150,6 +170,8 @@ export class SmartAi { return Conversation.createWithOllama(this); case 'groq': return Conversation.createWithGroq(this); + case 'mistral': + return Conversation.createWithMistral(this); case 'xai': return Conversation.createWithXai(this); case 'elevenlabs': diff --git a/ts/index.ts b/ts/index.ts index 23000d6..c57b8b1 100644 --- a/ts/index.ts +++ b/ts/index.ts @@ -4,6 +4,7 @@ export * from './provider.openai.js'; export * from './provider.anthropic.js'; export * from './provider.perplexity.js'; export * from './provider.groq.js'; +export * from './provider.mistral.js'; export * from './provider.ollama.js'; export * from './provider.xai.js'; export * from './provider.exo.js'; diff --git a/ts/plugins.ts b/ts/plugins.ts index 463e999..16c9d9d 100644 --- a/ts/plugins.ts +++ b/ts/plugins.ts @@ -28,9 +28,11 @@ export { // third party import * as anthropic from '@anthropic-ai/sdk'; +import * as mistralai from '@mistralai/mistralai'; import * as openai from 'openai'; export { anthropic, + mistralai, openai, } diff --git a/ts/provider.mistral.ts b/ts/provider.mistral.ts new file mode 100644 index 0000000..f1940e2 --- /dev/null +++ b/ts/provider.mistral.ts @@ -0,0 +1,352 @@ +import * as plugins from './plugins.js'; +import { MultiModalModel } from './abstract.classes.multimodal.js'; +import type { + ChatOptions, + ChatResponse, + ChatMessage, + ResearchOptions, + ResearchResponse, + ImageGenerateOptions, + ImageEditOptions, + ImageResponse +} from './abstract.classes.multimodal.js'; + +export interface IMistralProviderOptions { + mistralToken: string; + chatModel?: string; // default: 'mistral-large-latest' + ocrModel?: string; // default: 'mistral-ocr-latest' + tableFormat?: 'markdown' | 'html'; +} + +export class MistralProvider extends MultiModalModel { + private options: IMistralProviderOptions; + public mistralClient: plugins.mistralai.Mistral; + + constructor(optionsArg: IMistralProviderOptions) { + super(); + this.options = optionsArg; + } + + async start() { + await super.start(); + this.mistralClient = new plugins.mistralai.Mistral({ + apiKey: this.options.mistralToken, + }); + } + + async stop() { + await super.stop(); + } + + /** + * Synchronous chat interaction using Mistral's chat API + */ + public async chat(optionsArg: ChatOptions): Promise { + // Convert message history to Mistral format + const messages: Array<{ + role: 'system' | 'user' | 'assistant'; + content: string; + }> = []; + + // Add system message first + if (optionsArg.systemMessage) { + messages.push({ + role: 'system', + content: optionsArg.systemMessage + }); + } + + // Add message history + for (const msg of optionsArg.messageHistory) { + messages.push({ + role: msg.role === 'system' ? 'system' : msg.role === 'assistant' ? 'assistant' : 'user', + content: msg.content + }); + } + + // Add current user message + messages.push({ + role: 'user', + content: optionsArg.userMessage + }); + + const result = await this.mistralClient.chat.complete({ + model: this.options.chatModel || 'mistral-large-latest', + messages: messages, + }); + + // Extract content from response + const choice = result.choices?.[0]; + let content = ''; + + if (choice?.message?.content) { + if (typeof choice.message.content === 'string') { + content = choice.message.content; + } else if (Array.isArray(choice.message.content)) { + // Handle array of content chunks + content = choice.message.content + .map((chunk: any) => { + if (typeof chunk === 'string') return chunk; + if (chunk && typeof chunk === 'object' && 'text' in chunk) return chunk.text; + return ''; + }) + .join(''); + } + } + + return { + role: 'assistant', + message: content, + }; + } + + /** + * Streaming chat using Mistral's streaming API + */ + public async chatStream(input: ReadableStream): Promise> { + const decoder = new TextDecoder(); + let buffer = ''; + const mistralClient = this.mistralClient; + const chatModel = this.options.chatModel || 'mistral-large-latest'; + + const transform = new TransformStream({ + async transform(chunk, controller) { + buffer += decoder.decode(chunk, { stream: true }); + + // Try to parse complete JSON messages from the buffer + while (true) { + const newlineIndex = buffer.indexOf('\n'); + if (newlineIndex === -1) break; + + const line = buffer.slice(0, newlineIndex); + buffer = buffer.slice(newlineIndex + 1); + + if (line.trim()) { + try { + const message = JSON.parse(line); + + // Build messages array + const messages: Array<{ + role: 'system' | 'user' | 'assistant'; + content: string; + }> = []; + + if (message.systemMessage) { + messages.push({ + role: 'system', + content: message.systemMessage + }); + } + + messages.push({ + role: message.role === 'assistant' ? 'assistant' : 'user', + content: message.content + }); + + // Use Mistral streaming + const stream = await mistralClient.chat.stream({ + model: chatModel, + messages: messages, + }); + + // Process streaming events + for await (const event of stream) { + const delta = event.data?.choices?.[0]?.delta; + if (delta?.content) { + if (typeof delta.content === 'string') { + controller.enqueue(delta.content); + } else if (Array.isArray(delta.content)) { + for (const chunk of delta.content) { + if (typeof chunk === 'string') { + controller.enqueue(chunk); + } else if (chunk && typeof chunk === 'object' && 'text' in chunk) { + controller.enqueue((chunk as any).text); + } + } + } + } + } + } catch (e) { + console.error('Failed to parse message:', e); + } + } + } + }, + + flush(controller) { + if (buffer.trim()) { + try { + const message = JSON.parse(buffer); + controller.enqueue(message.content || ''); + } catch (e) { + console.error('Failed to parse remaining buffer:', e); + } + } + } + }); + + return input.pipeThrough(transform); + } + + /** + * Audio generation is not supported by Mistral + */ + public async audio(optionsArg: { message: string }): Promise { + throw new Error('Audio generation is not supported by Mistral. Please use ElevenLabs or OpenAI provider for audio generation.'); + } + + /** + * Vision using Mistral's OCR API for image analysis + */ + public async vision(optionsArg: { image: Buffer; prompt: string }): Promise { + const base64Image = optionsArg.image.toString('base64'); + + // Detect image type from buffer header + let mimeType = 'image/jpeg'; + if (optionsArg.image[0] === 0x89 && optionsArg.image[1] === 0x50) { + mimeType = 'image/png'; + } else if (optionsArg.image[0] === 0x47 && optionsArg.image[1] === 0x49) { + mimeType = 'image/gif'; + } else if (optionsArg.image[0] === 0x52 && optionsArg.image[1] === 0x49) { + mimeType = 'image/webp'; + } + + // Use OCR API with image data URL + const ocrResult = await this.mistralClient.ocr.process({ + model: this.options.ocrModel || 'mistral-ocr-latest', + document: { + imageUrl: `data:${mimeType};base64,${base64Image}`, + type: 'image_url', + }, + }); + + // Combine markdown from all pages + const extractedText = ocrResult.pages.map(page => page.markdown).join('\n\n'); + + // If a prompt is provided, use chat to analyze the extracted text + if (optionsArg.prompt && optionsArg.prompt.trim()) { + const chatResponse = await this.chat({ + systemMessage: 'You are an assistant analyzing image content. The following is text extracted from an image using OCR.', + userMessage: `${optionsArg.prompt}\n\nExtracted content:\n${extractedText}`, + messageHistory: [], + }); + return chatResponse.message; + } + + return extractedText; + } + + /** + * Document processing using Mistral's OCR API + * PDFs are uploaded via Files API first, then processed with OCR + */ + public async document(optionsArg: { + systemMessage: string; + userMessage: string; + pdfDocuments: Uint8Array[]; + messageHistory: ChatMessage[]; + }): Promise<{ message: any }> { + const extractedTexts: string[] = []; + const uploadedFileIds: string[] = []; + + try { + // Process each PDF document using Mistral OCR + for (let i = 0; i < optionsArg.pdfDocuments.length; i++) { + const pdfDocument = optionsArg.pdfDocuments[i]; + + // Upload the PDF to Mistral's Files API first + const uploadResult = await this.mistralClient.files.upload({ + file: { + fileName: `document_${i + 1}.pdf`, + content: pdfDocument, + }, + purpose: 'ocr', + }); + + uploadedFileIds.push(uploadResult.id); + + // Now use OCR with the uploaded file + const ocrResult = await this.mistralClient.ocr.process({ + model: this.options.ocrModel || 'mistral-ocr-latest', + document: { + type: 'file', + fileId: uploadResult.id, + }, + tableFormat: this.options.tableFormat || 'markdown', + }); + + // Combine all page markdown with page separators + const pageTexts = ocrResult.pages.map((page, index) => { + let pageContent = `--- Page ${index + 1} ---\n${page.markdown}`; + + // Include tables if present + if (page.tables && page.tables.length > 0) { + pageContent += '\n\n**Tables:**\n' + page.tables.map((t: any) => t.markdown || t.html || '').join('\n'); + } + + // Include header/footer if present + if (page.header) { + pageContent = `Header: ${page.header}\n${pageContent}`; + } + if (page.footer) { + pageContent += `\nFooter: ${page.footer}`; + } + + return pageContent; + }).join('\n\n'); + + extractedTexts.push(pageTexts); + } + + // Combine all document texts + const allDocumentText = extractedTexts.length === 1 + ? extractedTexts[0] + : extractedTexts.map((text, i) => `=== Document ${i + 1} ===\n${text}`).join('\n\n'); + + // Use chat API to process the extracted text with the user's query + const chatResponse = await this.chat({ + systemMessage: optionsArg.systemMessage || 'You are a helpful assistant analyzing document content.', + userMessage: `${optionsArg.userMessage}\n\n---\nDocument Content:\n${allDocumentText}`, + messageHistory: optionsArg.messageHistory, + }); + + return { + message: { + role: 'assistant', + content: chatResponse.message + } + }; + } finally { + // Clean up uploaded files + for (const fileId of uploadedFileIds) { + try { + await this.mistralClient.files.delete({ fileId }); + } catch (cleanupError) { + // Ignore cleanup errors - files may have already been auto-deleted + console.warn(`Failed to delete temporary file ${fileId}:`, cleanupError); + } + } + } + } + + /** + * Research is not natively supported by Mistral + */ + public async research(optionsArg: ResearchOptions): Promise { + throw new Error('Research/web search is not supported by Mistral. Please use Perplexity or Anthropic provider for research capabilities.'); + } + + /** + * Image generation is not supported by Mistral + */ + public async imageGenerate(optionsArg: ImageGenerateOptions): Promise { + throw new Error('Image generation is not supported by Mistral. Please use OpenAI provider for image generation.'); + } + + /** + * Image editing is not supported by Mistral + */ + public async imageEdit(optionsArg: ImageEditOptions): Promise { + throw new Error('Image editing is not supported by Mistral. Please use OpenAI provider for image editing.'); + } +}