Files
smartai/ts_ocr/index.ts
T

193 lines
5.6 KiB
TypeScript
Raw Permalink Normal View History

export type TSmartAiOcrImageMimeType =
| 'image/png'
| 'image/jpeg'
| 'image/webp'
| 'image/gif'
| string;
export type TSmartAiMistralOcrTableFormat = 'markdown' | 'html';
export type TSmartAiMistralOcrConfidenceScoresGranularity = 'page' | 'word';
export interface ISmartAiOcrImageInput {
dataBase64: string;
mimeType: TSmartAiOcrImageMimeType;
}
export interface ISmartAiOcrPageResult {
index: number;
text: string;
confidence?: number;
}
export interface ISmartAiOcrResult<TRaw = unknown> {
text: string;
confidence?: number;
pages: ISmartAiOcrPageResult[];
raw: TRaw;
}
export interface ISmartAiOcrEngine {
recognizeImage: (
input: ISmartAiOcrImageInput,
options?: ISmartAiMistralOcrRecognizeOptions
) => Promise<ISmartAiOcrResult<IMistralOcrResponse>>;
}
export interface IMistralOcrPageConfidenceScores {
average_page_confidence_score?: number;
averagePageConfidenceScore?: number;
minimum_page_confidence_score?: number;
minimumPageConfidenceScore?: number;
}
export interface IMistralOcrPageResponse {
index: number;
markdown: string;
confidence_scores?: IMistralOcrPageConfidenceScores | null;
confidenceScores?: IMistralOcrPageConfidenceScores | null;
}
export interface IMistralOcrResponse {
pages: IMistralOcrPageResponse[];
model: string;
document_annotation?: unknown;
documentAnnotation?: unknown;
usage_info?: unknown;
usageInfo?: unknown;
}
export interface IMistralOcrRequest {
model: string;
document: {
type: 'image_url';
image_url: string;
};
include_image_base64?: boolean;
table_format?: TSmartAiMistralOcrTableFormat;
extract_header?: boolean;
extract_footer?: boolean;
confidence_scores_granularity?: TSmartAiMistralOcrConfidenceScoresGranularity;
}
export interface ISmartAiMistralOcrTransport {
process: (request: IMistralOcrRequest) => Promise<IMistralOcrResponse>;
}
export interface ISmartAiMistralOcrOptions {
apiKey?: string;
model?: string;
endpointUrl?: string;
transport?: ISmartAiMistralOcrTransport;
includeImageBase64?: boolean;
tableFormat?: TSmartAiMistralOcrTableFormat;
extractHeader?: boolean;
extractFooter?: boolean;
confidenceScoresGranularity?: TSmartAiMistralOcrConfidenceScoresGranularity;
}
export interface ISmartAiMistralOcrRecognizeOptions {
includeImageBase64?: boolean;
tableFormat?: TSmartAiMistralOcrTableFormat;
extractHeader?: boolean;
extractFooter?: boolean;
confidenceScoresGranularity?: TSmartAiMistralOcrConfidenceScoresGranularity;
}
const defaultMistralOcrModel = 'mistral-ocr-latest';
const defaultMistralOcrEndpointUrl = 'https://api.mistral.ai/v1/ocr';
const createMistralOcrHttpTransport = (options: {
apiKey?: string;
endpointUrl?: string;
}): ISmartAiMistralOcrTransport => {
return {
process: async (request) => {
if (!options.apiKey) {
throw new Error('Mistral OCR requires an apiKey when no custom transport is provided.');
}
const response = await fetch(options.endpointUrl ?? defaultMistralOcrEndpointUrl, {
method: 'POST',
headers: {
Authorization: `Bearer ${options.apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(request),
});
if (!response.ok) {
const errorBody = await response.text();
throw new Error(`Mistral OCR request failed with status ${response.status}: ${errorBody}`);
}
return (await response.json()) as IMistralOcrResponse;
},
};
};
const getPageConfidence = (page: IMistralOcrPageResponse): number | undefined => {
const confidenceScores = page.confidence_scores ?? page.confidenceScores;
return (
confidenceScores?.average_page_confidence_score ??
confidenceScores?.averagePageConfidenceScore
);
};
export const createMistralOcrEngine = (
options: ISmartAiMistralOcrOptions = {}
): ISmartAiOcrEngine => {
const transport =
options.transport ??
createMistralOcrHttpTransport({
apiKey: options.apiKey,
endpointUrl: options.endpointUrl,
});
const model = options.model ?? defaultMistralOcrModel;
return {
recognizeImage: async (input, recognizeOptions = {}) => {
if (!input.dataBase64) {
throw new Error('Mistral OCR image input requires dataBase64.');
}
if (!input.mimeType) {
throw new Error('Mistral OCR image input requires mimeType.');
}
const response = await transport.process({
model,
document: {
type: 'image_url',
image_url: `data:${input.mimeType};base64,${input.dataBase64}`,
},
include_image_base64:
recognizeOptions.includeImageBase64 ?? options.includeImageBase64 ?? false,
table_format: recognizeOptions.tableFormat ?? options.tableFormat,
extract_header: recognizeOptions.extractHeader ?? options.extractHeader,
extract_footer: recognizeOptions.extractFooter ?? options.extractFooter,
confidence_scores_granularity:
recognizeOptions.confidenceScoresGranularity ?? options.confidenceScoresGranularity,
});
const pages = response.pages.map((page) => ({
index: page.index,
text: page.markdown,
confidence: getPageConfidence(page),
}));
const pageConfidences = pages
.map((page) => page.confidence)
.filter((confidence): confidence is number => typeof confidence === 'number');
const confidence = pageConfidences.length
? pageConfidences.reduce((sum, value) => sum + value, 0) / pageConfidences.length
: undefined;
return {
text: pages.map((page) => page.text).join('\n\n').trim(),
confidence,
pages,
raw: response,
};
},
};
};