export type TSmartAiOcrImageMimeType = | 'image/png' | 'image/jpeg' | 'image/webp' | 'image/gif' | string; export type TSmartAiMistralOcrTableFormat = 'markdown' | 'html'; export type TSmartAiMistralOcrConfidenceScoresGranularity = 'page' | 'word'; export interface ISmartAiOcrImageInput { dataBase64: string; mimeType: TSmartAiOcrImageMimeType; } export interface ISmartAiOcrPageResult { index: number; text: string; confidence?: number; } export interface ISmartAiOcrResult { text: string; confidence?: number; pages: ISmartAiOcrPageResult[]; raw: TRaw; } export interface ISmartAiOcrEngine { recognizeImage: ( input: ISmartAiOcrImageInput, options?: ISmartAiMistralOcrRecognizeOptions ) => Promise>; } export interface IMistralOcrPageConfidenceScores { average_page_confidence_score?: number; averagePageConfidenceScore?: number; minimum_page_confidence_score?: number; minimumPageConfidenceScore?: number; } export interface IMistralOcrPageResponse { index: number; markdown: string; confidence_scores?: IMistralOcrPageConfidenceScores | null; confidenceScores?: IMistralOcrPageConfidenceScores | null; } export interface IMistralOcrResponse { pages: IMistralOcrPageResponse[]; model: string; document_annotation?: unknown; documentAnnotation?: unknown; usage_info?: unknown; usageInfo?: unknown; } export interface IMistralOcrRequest { model: string; document: { type: 'image_url'; image_url: string; }; include_image_base64?: boolean; table_format?: TSmartAiMistralOcrTableFormat; extract_header?: boolean; extract_footer?: boolean; confidence_scores_granularity?: TSmartAiMistralOcrConfidenceScoresGranularity; } export interface ISmartAiMistralOcrTransport { process: (request: IMistralOcrRequest) => Promise; } export interface ISmartAiMistralOcrOptions { apiKey?: string; model?: string; endpointUrl?: string; transport?: ISmartAiMistralOcrTransport; includeImageBase64?: boolean; tableFormat?: TSmartAiMistralOcrTableFormat; extractHeader?: boolean; extractFooter?: boolean; confidenceScoresGranularity?: TSmartAiMistralOcrConfidenceScoresGranularity; } export interface ISmartAiMistralOcrRecognizeOptions { includeImageBase64?: boolean; tableFormat?: TSmartAiMistralOcrTableFormat; extractHeader?: boolean; extractFooter?: boolean; confidenceScoresGranularity?: TSmartAiMistralOcrConfidenceScoresGranularity; } const defaultMistralOcrModel = 'mistral-ocr-latest'; const defaultMistralOcrEndpointUrl = 'https://api.mistral.ai/v1/ocr'; const createMistralOcrHttpTransport = (options: { apiKey?: string; endpointUrl?: string; }): ISmartAiMistralOcrTransport => { return { process: async (request) => { if (!options.apiKey) { throw new Error('Mistral OCR requires an apiKey when no custom transport is provided.'); } const response = await fetch(options.endpointUrl ?? defaultMistralOcrEndpointUrl, { method: 'POST', headers: { Authorization: `Bearer ${options.apiKey}`, 'Content-Type': 'application/json', }, body: JSON.stringify(request), }); if (!response.ok) { const errorBody = await response.text(); throw new Error(`Mistral OCR request failed with status ${response.status}: ${errorBody}`); } return (await response.json()) as IMistralOcrResponse; }, }; }; const getPageConfidence = (page: IMistralOcrPageResponse): number | undefined => { const confidenceScores = page.confidence_scores ?? page.confidenceScores; return ( confidenceScores?.average_page_confidence_score ?? confidenceScores?.averagePageConfidenceScore ); }; export const createMistralOcrEngine = ( options: ISmartAiMistralOcrOptions = {} ): ISmartAiOcrEngine => { const transport = options.transport ?? createMistralOcrHttpTransport({ apiKey: options.apiKey, endpointUrl: options.endpointUrl, }); const model = options.model ?? defaultMistralOcrModel; return { recognizeImage: async (input, recognizeOptions = {}) => { if (!input.dataBase64) { throw new Error('Mistral OCR image input requires dataBase64.'); } if (!input.mimeType) { throw new Error('Mistral OCR image input requires mimeType.'); } const response = await transport.process({ model, document: { type: 'image_url', image_url: `data:${input.mimeType};base64,${input.dataBase64}`, }, include_image_base64: recognizeOptions.includeImageBase64 ?? options.includeImageBase64 ?? false, table_format: recognizeOptions.tableFormat ?? options.tableFormat, extract_header: recognizeOptions.extractHeader ?? options.extractHeader, extract_footer: recognizeOptions.extractFooter ?? options.extractFooter, confidence_scores_granularity: recognizeOptions.confidenceScoresGranularity ?? options.confidenceScoresGranularity, }); const pages = response.pages.map((page) => ({ index: page.index, text: page.markdown, confidence: getPageConfidence(page), })); const pageConfidences = pages .map((page) => page.confidence) .filter((confidence): confidence is number => typeof confidence === 'number'); const confidence = pageConfidences.length ? pageConfidences.reduce((sum, value) => sum + value, 0) / pageConfidences.length : undefined; return { text: pages.map((page) => page.text).join('\n\n').trim(), confidence, pages, raw: response, }; }, }; };