/** * Ollama Container * * Manages Ollama containers for running local LLMs. */ import type { IContainerConfig, ILoadedModel, TContainerType, } from '../interfaces/container.ts'; import type { IChatCompletionRequest, IChatCompletionResponse, IChatCompletionChoice, IChatMessage, } from '../interfaces/api.ts'; import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts'; import { logger } from '../logger.ts'; import { BaseContainer, type TModelPullProgress } from './base-container.ts'; /** * Ollama API response types */ interface IOllamaTagsResponse { models: Array<{ name: string; size: number; digest: string; modified_at: string; }>; } interface IOllamaChatRequest { model: string; messages: Array<{ role: string; content: string; }>; stream?: boolean; options?: { temperature?: number; top_p?: number; num_predict?: number; stop?: string[]; }; } interface IOllamaChatResponse { model: string; created_at: string; message: { role: string; content: string; }; done: boolean; total_duration?: number; load_duration?: number; prompt_eval_count?: number; eval_count?: number; } interface IOllamaPullResponse { status: string; digest?: string; total?: number; completed?: number; } /** * Ollama container implementation */ export class OllamaContainer extends BaseContainer { public readonly type: TContainerType = 'ollama'; public readonly displayName = 'Ollama'; public readonly defaultImage = CONTAINER_IMAGES.OLLAMA; public readonly defaultPort = CONTAINER_PORTS.OLLAMA; constructor(config: IContainerConfig) { super(config); // Set defaults if not provided if (!config.image) { config.image = this.defaultImage; } if (!config.port) { config.port = this.defaultPort; } // Add default volume for model storage if (!config.volumes || config.volumes.length === 0) { config.volumes = [`modelgrid-ollama-${config.id}:/root/.ollama`]; } } /** * Create Ollama container configuration */ public static createConfig( id: string, name: string, gpuIds: string[], options: Partial = {}, ): IContainerConfig { return { id, name, type: 'ollama', image: options.image || CONTAINER_IMAGES.OLLAMA, gpuIds, port: options.port || CONTAINER_PORTS.OLLAMA, externalPort: options.externalPort, models: options.models || [], env: options.env, volumes: options.volumes || [`modelgrid-ollama-${id}:/root/.ollama`], autoStart: options.autoStart ?? true, restartPolicy: options.restartPolicy || 'unless-stopped', memoryLimit: options.memoryLimit, cpuLimit: options.cpuLimit, command: options.command, }; } /** * Check if Ollama is healthy */ public async isHealthy(): Promise { try { const response = await this.fetch('/api/tags', { timeout: 5000 }); return response.ok; } catch { return false; } } /** * List available models */ public async listModels(): Promise { try { const data = await this.fetchJson('/api/tags'); return (data.models || []).map((m) => m.name); } catch (error) { logger.warn(`Failed to list Ollama models: ${error instanceof Error ? error.message : String(error)}`); return []; } } /** * Get loaded models with details */ public async getLoadedModels(): Promise { try { const data = await this.fetchJson('/api/tags'); return (data.models || []).map((m) => ({ name: m.name, size: m.size, format: m.digest.substring(0, 12), loaded: true, // Ollama doesn't distinguish loaded vs available requestCount: 0, })); } catch { return []; } } /** * Pull a model */ public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise { try { logger.info(`Pulling model: ${modelName}`); const response = await this.fetch('/api/pull', { method: 'POST', body: { name: modelName }, timeout: 3600000, // 1 hour for large models }); if (!response.ok) { throw new Error(`HTTP ${response.status}`); } // Read streaming response const reader = response.body?.getReader(); if (!reader) { throw new Error('No response body'); } const decoder = new TextDecoder(); let lastStatus = ''; while (true) { const { done, value } = await reader.read(); if (done) break; const text = decoder.decode(value); const lines = text.split('\n').filter((l) => l.trim()); for (const line of lines) { try { const data = JSON.parse(line) as IOllamaPullResponse; const status = data.status; if (status !== lastStatus) { lastStatus = status; let percent: number | undefined; if (data.total && data.completed) { percent = Math.round((data.completed / data.total) * 100); } if (onProgress) { onProgress({ model: modelName, status, percent }); } else { const progressStr = percent !== undefined ? ` (${percent}%)` : ''; logger.dim(` ${status}${progressStr}`); } } } catch { // Invalid JSON line, skip } } } logger.success(`Model ${modelName} pulled successfully`); return true; } catch (error) { logger.error(`Failed to pull model ${modelName}: ${error instanceof Error ? error.message : String(error)}`); return false; } } /** * Remove a model */ public async removeModel(modelName: string): Promise { try { const response = await this.fetch('/api/delete', { method: 'DELETE', body: { name: modelName }, }); if (response.ok) { logger.success(`Model ${modelName} removed`); return true; } throw new Error(`HTTP ${response.status}`); } catch (error) { logger.error(`Failed to remove model ${modelName}: ${error instanceof Error ? error.message : String(error)}`); return false; } } /** * Send a chat completion request */ public async chatCompletion(request: IChatCompletionRequest): Promise { const ollamaRequest: IOllamaChatRequest = { model: request.model, messages: request.messages.map((m) => ({ role: m.role, content: m.content, })), stream: false, options: { temperature: request.temperature, top_p: request.top_p, num_predict: request.max_tokens, stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined, }, }; const response = await this.fetchJson('/api/chat', { method: 'POST', body: ollamaRequest, timeout: 300000, // 5 minutes }); // Convert to OpenAI format const created = Math.floor(Date.now() / 1000); const choice: IChatCompletionChoice = { index: 0, message: { role: 'assistant', content: response.message.content, }, finish_reason: response.done ? 'stop' : null, }; return { id: this.generateRequestId(), object: 'chat.completion', created, model: request.model, choices: [choice], usage: { prompt_tokens: response.prompt_eval_count || 0, completion_tokens: response.eval_count || 0, total_tokens: (response.prompt_eval_count || 0) + (response.eval_count || 0), }, }; } /** * Stream a chat completion request */ public async chatCompletionStream( request: IChatCompletionRequest, onChunk: (chunk: string) => void, ): Promise { const ollamaRequest: IOllamaChatRequest = { model: request.model, messages: request.messages.map((m) => ({ role: m.role, content: m.content, })), stream: true, options: { temperature: request.temperature, top_p: request.top_p, num_predict: request.max_tokens, stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined, }, }; const response = await this.fetch('/api/chat', { method: 'POST', body: ollamaRequest, timeout: 300000, }); if (!response.ok) { throw new Error(`HTTP ${response.status}`); } const reader = response.body?.getReader(); if (!reader) { throw new Error('No response body'); } const decoder = new TextDecoder(); const requestId = this.generateRequestId(); const created = Math.floor(Date.now() / 1000); while (true) { const { done, value } = await reader.read(); if (done) break; const text = decoder.decode(value); const lines = text.split('\n').filter((l) => l.trim()); for (const line of lines) { try { const data = JSON.parse(line) as IOllamaChatResponse; // Convert to OpenAI streaming format const chunk = { id: requestId, object: 'chat.completion.chunk', created, model: request.model, choices: [ { index: 0, delta: { content: data.message.content, } as Partial, finish_reason: data.done ? 'stop' : null, }, ], }; onChunk(`data: ${JSON.stringify(chunk)}\n\n`); if (data.done) { onChunk('data: [DONE]\n\n'); } } catch { // Invalid JSON, skip } } } } }