193 lines
5.6 KiB
TypeScript
193 lines
5.6 KiB
TypeScript
|
|
export type TSmartAiOcrImageMimeType =
|
||
|
|
| 'image/png'
|
||
|
|
| 'image/jpeg'
|
||
|
|
| 'image/webp'
|
||
|
|
| 'image/gif'
|
||
|
|
| string;
|
||
|
|
|
||
|
|
export type TSmartAiMistralOcrTableFormat = 'markdown' | 'html';
|
||
|
|
|
||
|
|
export type TSmartAiMistralOcrConfidenceScoresGranularity = 'page' | 'word';
|
||
|
|
|
||
|
|
export interface ISmartAiOcrImageInput {
|
||
|
|
dataBase64: string;
|
||
|
|
mimeType: TSmartAiOcrImageMimeType;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ISmartAiOcrPageResult {
|
||
|
|
index: number;
|
||
|
|
text: string;
|
||
|
|
confidence?: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ISmartAiOcrResult<TRaw = unknown> {
|
||
|
|
text: string;
|
||
|
|
confidence?: number;
|
||
|
|
pages: ISmartAiOcrPageResult[];
|
||
|
|
raw: TRaw;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ISmartAiOcrEngine {
|
||
|
|
recognizeImage: (
|
||
|
|
input: ISmartAiOcrImageInput,
|
||
|
|
options?: ISmartAiMistralOcrRecognizeOptions
|
||
|
|
) => Promise<ISmartAiOcrResult<IMistralOcrResponse>>;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface IMistralOcrPageConfidenceScores {
|
||
|
|
average_page_confidence_score?: number;
|
||
|
|
averagePageConfidenceScore?: number;
|
||
|
|
minimum_page_confidence_score?: number;
|
||
|
|
minimumPageConfidenceScore?: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface IMistralOcrPageResponse {
|
||
|
|
index: number;
|
||
|
|
markdown: string;
|
||
|
|
confidence_scores?: IMistralOcrPageConfidenceScores | null;
|
||
|
|
confidenceScores?: IMistralOcrPageConfidenceScores | null;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface IMistralOcrResponse {
|
||
|
|
pages: IMistralOcrPageResponse[];
|
||
|
|
model: string;
|
||
|
|
document_annotation?: unknown;
|
||
|
|
documentAnnotation?: unknown;
|
||
|
|
usage_info?: unknown;
|
||
|
|
usageInfo?: unknown;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface IMistralOcrRequest {
|
||
|
|
model: string;
|
||
|
|
document: {
|
||
|
|
type: 'image_url';
|
||
|
|
image_url: string;
|
||
|
|
};
|
||
|
|
include_image_base64?: boolean;
|
||
|
|
table_format?: TSmartAiMistralOcrTableFormat;
|
||
|
|
extract_header?: boolean;
|
||
|
|
extract_footer?: boolean;
|
||
|
|
confidence_scores_granularity?: TSmartAiMistralOcrConfidenceScoresGranularity;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ISmartAiMistralOcrTransport {
|
||
|
|
process: (request: IMistralOcrRequest) => Promise<IMistralOcrResponse>;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ISmartAiMistralOcrOptions {
|
||
|
|
apiKey?: string;
|
||
|
|
model?: string;
|
||
|
|
endpointUrl?: string;
|
||
|
|
transport?: ISmartAiMistralOcrTransport;
|
||
|
|
includeImageBase64?: boolean;
|
||
|
|
tableFormat?: TSmartAiMistralOcrTableFormat;
|
||
|
|
extractHeader?: boolean;
|
||
|
|
extractFooter?: boolean;
|
||
|
|
confidenceScoresGranularity?: TSmartAiMistralOcrConfidenceScoresGranularity;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ISmartAiMistralOcrRecognizeOptions {
|
||
|
|
includeImageBase64?: boolean;
|
||
|
|
tableFormat?: TSmartAiMistralOcrTableFormat;
|
||
|
|
extractHeader?: boolean;
|
||
|
|
extractFooter?: boolean;
|
||
|
|
confidenceScoresGranularity?: TSmartAiMistralOcrConfidenceScoresGranularity;
|
||
|
|
}
|
||
|
|
|
||
|
|
const defaultMistralOcrModel = 'mistral-ocr-latest';
|
||
|
|
const defaultMistralOcrEndpointUrl = 'https://api.mistral.ai/v1/ocr';
|
||
|
|
|
||
|
|
const createMistralOcrHttpTransport = (options: {
|
||
|
|
apiKey?: string;
|
||
|
|
endpointUrl?: string;
|
||
|
|
}): ISmartAiMistralOcrTransport => {
|
||
|
|
return {
|
||
|
|
process: async (request) => {
|
||
|
|
if (!options.apiKey) {
|
||
|
|
throw new Error('Mistral OCR requires an apiKey when no custom transport is provided.');
|
||
|
|
}
|
||
|
|
|
||
|
|
const response = await fetch(options.endpointUrl ?? defaultMistralOcrEndpointUrl, {
|
||
|
|
method: 'POST',
|
||
|
|
headers: {
|
||
|
|
Authorization: `Bearer ${options.apiKey}`,
|
||
|
|
'Content-Type': 'application/json',
|
||
|
|
},
|
||
|
|
body: JSON.stringify(request),
|
||
|
|
});
|
||
|
|
|
||
|
|
if (!response.ok) {
|
||
|
|
const errorBody = await response.text();
|
||
|
|
throw new Error(`Mistral OCR request failed with status ${response.status}: ${errorBody}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
return (await response.json()) as IMistralOcrResponse;
|
||
|
|
},
|
||
|
|
};
|
||
|
|
};
|
||
|
|
|
||
|
|
const getPageConfidence = (page: IMistralOcrPageResponse): number | undefined => {
|
||
|
|
const confidenceScores = page.confidence_scores ?? page.confidenceScores;
|
||
|
|
return (
|
||
|
|
confidenceScores?.average_page_confidence_score ??
|
||
|
|
confidenceScores?.averagePageConfidenceScore
|
||
|
|
);
|
||
|
|
};
|
||
|
|
|
||
|
|
export const createMistralOcrEngine = (
|
||
|
|
options: ISmartAiMistralOcrOptions = {}
|
||
|
|
): ISmartAiOcrEngine => {
|
||
|
|
const transport =
|
||
|
|
options.transport ??
|
||
|
|
createMistralOcrHttpTransport({
|
||
|
|
apiKey: options.apiKey,
|
||
|
|
endpointUrl: options.endpointUrl,
|
||
|
|
});
|
||
|
|
const model = options.model ?? defaultMistralOcrModel;
|
||
|
|
|
||
|
|
return {
|
||
|
|
recognizeImage: async (input, recognizeOptions = {}) => {
|
||
|
|
if (!input.dataBase64) {
|
||
|
|
throw new Error('Mistral OCR image input requires dataBase64.');
|
||
|
|
}
|
||
|
|
if (!input.mimeType) {
|
||
|
|
throw new Error('Mistral OCR image input requires mimeType.');
|
||
|
|
}
|
||
|
|
|
||
|
|
const response = await transport.process({
|
||
|
|
model,
|
||
|
|
document: {
|
||
|
|
type: 'image_url',
|
||
|
|
image_url: `data:${input.mimeType};base64,${input.dataBase64}`,
|
||
|
|
},
|
||
|
|
include_image_base64:
|
||
|
|
recognizeOptions.includeImageBase64 ?? options.includeImageBase64 ?? false,
|
||
|
|
table_format: recognizeOptions.tableFormat ?? options.tableFormat,
|
||
|
|
extract_header: recognizeOptions.extractHeader ?? options.extractHeader,
|
||
|
|
extract_footer: recognizeOptions.extractFooter ?? options.extractFooter,
|
||
|
|
confidence_scores_granularity:
|
||
|
|
recognizeOptions.confidenceScoresGranularity ?? options.confidenceScoresGranularity,
|
||
|
|
});
|
||
|
|
|
||
|
|
const pages = response.pages.map((page) => ({
|
||
|
|
index: page.index,
|
||
|
|
text: page.markdown,
|
||
|
|
confidence: getPageConfidence(page),
|
||
|
|
}));
|
||
|
|
const pageConfidences = pages
|
||
|
|
.map((page) => page.confidence)
|
||
|
|
.filter((confidence): confidence is number => typeof confidence === 'number');
|
||
|
|
const confidence = pageConfidences.length
|
||
|
|
? pageConfidences.reduce((sum, value) => sum + value, 0) / pageConfidences.length
|
||
|
|
: undefined;
|
||
|
|
|
||
|
|
return {
|
||
|
|
text: pages.map((page) => page.text).join('\n\n').trim(),
|
||
|
|
confidence,
|
||
|
|
pages,
|
||
|
|
raw: response,
|
||
|
|
};
|
||
|
|
},
|
||
|
|
};
|
||
|
|
};
|