Files
modelgrid/ts/containers/ollama.ts

388 lines
9.8 KiB
TypeScript
Raw Permalink Normal View History

2026-01-30 03:16:57 +00:00
/**
* Ollama Container
*
* Manages Ollama containers for running local LLMs.
*/
import type {
IContainerConfig,
ILoadedModel,
TContainerType,
} from '../interfaces/container.ts';
import type {
IChatCompletionRequest,
IChatCompletionResponse,
IChatCompletionChoice,
IChatMessage,
} from '../interfaces/api.ts';
import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts';
import { logger } from '../logger.ts';
import { BaseContainer, type TModelPullProgress } from './base-container.ts';
/**
* Ollama API response types
*/
interface IOllamaTagsResponse {
models: Array<{
name: string;
size: number;
digest: string;
modified_at: string;
}>;
}
interface IOllamaChatRequest {
model: string;
messages: Array<{
role: string;
content: string;
}>;
stream?: boolean;
options?: {
temperature?: number;
top_p?: number;
num_predict?: number;
stop?: string[];
};
}
interface IOllamaChatResponse {
model: string;
created_at: string;
message: {
role: string;
content: string;
};
done: boolean;
total_duration?: number;
load_duration?: number;
prompt_eval_count?: number;
eval_count?: number;
}
interface IOllamaPullResponse {
status: string;
digest?: string;
total?: number;
completed?: number;
}
/**
* Ollama container implementation
*/
export class OllamaContainer extends BaseContainer {
public readonly type: TContainerType = 'ollama';
public readonly displayName = 'Ollama';
public readonly defaultImage = CONTAINER_IMAGES.OLLAMA;
public readonly defaultPort = CONTAINER_PORTS.OLLAMA;
constructor(config: IContainerConfig) {
super(config);
// Set defaults if not provided
if (!config.image) {
config.image = this.defaultImage;
}
if (!config.port) {
config.port = this.defaultPort;
}
// Add default volume for model storage
if (!config.volumes || config.volumes.length === 0) {
config.volumes = [`modelgrid-ollama-${config.id}:/root/.ollama`];
}
}
/**
* Create Ollama container configuration
*/
public static createConfig(
id: string,
name: string,
gpuIds: string[],
options: Partial<IContainerConfig> = {},
): IContainerConfig {
return {
id,
name,
type: 'ollama',
image: options.image || CONTAINER_IMAGES.OLLAMA,
gpuIds,
port: options.port || CONTAINER_PORTS.OLLAMA,
externalPort: options.externalPort,
models: options.models || [],
env: options.env,
volumes: options.volumes || [`modelgrid-ollama-${id}:/root/.ollama`],
autoStart: options.autoStart ?? true,
restartPolicy: options.restartPolicy || 'unless-stopped',
memoryLimit: options.memoryLimit,
cpuLimit: options.cpuLimit,
command: options.command,
};
}
/**
* Check if Ollama is healthy
*/
public async isHealthy(): Promise<boolean> {
try {
const response = await this.fetch('/api/tags', { timeout: 5000 });
return response.ok;
} catch {
return false;
}
}
/**
* List available models
*/
public async listModels(): Promise<string[]> {
try {
const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');
return (data.models || []).map((m) => m.name);
} catch (error) {
logger.warn(`Failed to list Ollama models: ${error instanceof Error ? error.message : String(error)}`);
return [];
}
}
/**
* Get loaded models with details
*/
public async getLoadedModels(): Promise<ILoadedModel[]> {
try {
const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');
return (data.models || []).map((m) => ({
name: m.name,
size: m.size,
format: m.digest.substring(0, 12),
loaded: true, // Ollama doesn't distinguish loaded vs available
requestCount: 0,
}));
} catch {
return [];
}
}
/**
* Pull a model
*/
public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise<boolean> {
try {
logger.info(`Pulling model: ${modelName}`);
const response = await this.fetch('/api/pull', {
method: 'POST',
body: { name: modelName },
timeout: 3600000, // 1 hour for large models
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
// Read streaming response
const reader = response.body?.getReader();
if (!reader) {
throw new Error('No response body');
}
const decoder = new TextDecoder();
let lastStatus = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
const text = decoder.decode(value);
const lines = text.split('\n').filter((l) => l.trim());
for (const line of lines) {
try {
const data = JSON.parse(line) as IOllamaPullResponse;
const status = data.status;
if (status !== lastStatus) {
lastStatus = status;
let percent: number | undefined;
if (data.total && data.completed) {
percent = Math.round((data.completed / data.total) * 100);
}
if (onProgress) {
onProgress({ model: modelName, status, percent });
} else {
const progressStr = percent !== undefined ? ` (${percent}%)` : '';
logger.dim(` ${status}${progressStr}`);
}
}
} catch {
// Invalid JSON line, skip
}
}
}
logger.success(`Model ${modelName} pulled successfully`);
return true;
} catch (error) {
logger.error(`Failed to pull model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Remove a model
*/
public async removeModel(modelName: string): Promise<boolean> {
try {
const response = await this.fetch('/api/delete', {
method: 'DELETE',
body: { name: modelName },
});
if (response.ok) {
logger.success(`Model ${modelName} removed`);
return true;
}
throw new Error(`HTTP ${response.status}`);
} catch (error) {
logger.error(`Failed to remove model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Send a chat completion request
*/
public async chatCompletion(request: IChatCompletionRequest): Promise<IChatCompletionResponse> {
const ollamaRequest: IOllamaChatRequest = {
model: request.model,
messages: request.messages.map((m) => ({
role: m.role,
content: m.content,
})),
stream: false,
options: {
temperature: request.temperature,
top_p: request.top_p,
num_predict: request.max_tokens,
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
},
};
const response = await this.fetchJson<IOllamaChatResponse>('/api/chat', {
method: 'POST',
body: ollamaRequest,
timeout: 300000, // 5 minutes
});
// Convert to OpenAI format
const created = Math.floor(Date.now() / 1000);
const choice: IChatCompletionChoice = {
index: 0,
message: {
role: 'assistant',
content: response.message.content,
},
finish_reason: response.done ? 'stop' : null,
};
return {
id: this.generateRequestId(),
object: 'chat.completion',
created,
model: request.model,
choices: [choice],
usage: {
prompt_tokens: response.prompt_eval_count || 0,
completion_tokens: response.eval_count || 0,
total_tokens: (response.prompt_eval_count || 0) + (response.eval_count || 0),
},
};
}
/**
* Stream a chat completion request
*/
public async chatCompletionStream(
request: IChatCompletionRequest,
onChunk: (chunk: string) => void,
): Promise<void> {
const ollamaRequest: IOllamaChatRequest = {
model: request.model,
messages: request.messages.map((m) => ({
role: m.role,
content: m.content,
})),
stream: true,
options: {
temperature: request.temperature,
top_p: request.top_p,
num_predict: request.max_tokens,
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
},
};
const response = await this.fetch('/api/chat', {
method: 'POST',
body: ollamaRequest,
timeout: 300000,
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const reader = response.body?.getReader();
if (!reader) {
throw new Error('No response body');
}
const decoder = new TextDecoder();
const requestId = this.generateRequestId();
const created = Math.floor(Date.now() / 1000);
while (true) {
const { done, value } = await reader.read();
if (done) break;
const text = decoder.decode(value);
const lines = text.split('\n').filter((l) => l.trim());
for (const line of lines) {
try {
const data = JSON.parse(line) as IOllamaChatResponse;
// Convert to OpenAI streaming format
const chunk = {
id: requestId,
object: 'chat.completion.chunk',
created,
model: request.model,
choices: [
{
index: 0,
delta: {
content: data.message.content,
} as Partial<IChatMessage>,
finish_reason: data.done ? 'stop' : null,
},
],
};
onChunk(`data: ${JSON.stringify(chunk)}\n\n`);
if (data.done) {
onChunk('data: [DONE]\n\n');
}
} catch {
// Invalid JSON, skip
}
}
}
}
}