feat(providers): Add vision and document processing capabilities to providers

This commit is contained in:
2025-02-03 15:26:00 +01:00
parent e82c510094
commit eda8ce36df
9 changed files with 212 additions and 6 deletions

View File

@@ -192,4 +192,27 @@ export class OpenAiProvider extends MultiModalModel {
message: result.choices[0].message,
};
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
const result = await this.openAiApiClient.chat.completions.create({
model: 'gpt-4-vision-preview',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: optionsArg.prompt },
{
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${optionsArg.image.toString('base64')}`
}
}
]
}
],
max_tokens: 300
});
return result.choices[0].message.content || '';
}
}