feat(providers): Add vision and document processing capabilities to providers
This commit is contained in:
@@ -192,4 +192,27 @@ export class OpenAiProvider extends MultiModalModel {
|
||||
message: result.choices[0].message,
|
||||
};
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
const result = await this.openAiApiClient.chat.completions.create({
|
||||
model: 'gpt-4-vision-preview',
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: optionsArg.prompt },
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: `data:image/jpeg;base64,${optionsArg.image.toString('base64')}`
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
max_tokens: 300
|
||||
});
|
||||
|
||||
return result.choices[0].message.content || '';
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user