ts/containers/ollama.ts

/**
 * Ollama Container
 *
 * Manages Ollama containers for running local LLMs.
 */

import type {
  IContainerConfig,
  ILoadedModel,
  TContainerType,
} from '../interfaces/container.ts';
import type {
  IChatCompletionRequest,
  IChatCompletionResponse,
  IChatCompletionChoice,
  IChatMessage,
} from '../interfaces/api.ts';
import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts';
import { logger } from '../logger.ts';
import { BaseContainer, type TModelPullProgress } from './base-container.ts';

/**
 * Ollama API response types
 */
interface IOllamaTagsResponse {
  models: Array<{
    name: string;
    size: number;
    digest: string;
    modified_at: string;
  }>;
}

interface IOllamaChatRequest {
  model: string;
  messages: Array<{
    role: string;
    content: string;
  }>;
  stream?: boolean;
  options?: {
    temperature?: number;
    top_p?: number;
    num_predict?: number;
    stop?: string[];
  };
}

interface IOllamaChatResponse {
  model: string;
  created_at: string;
  message: {
    role: string;
    content: string;
  };
  done: boolean;
  total_duration?: number;
  load_duration?: number;
  prompt_eval_count?: number;
  eval_count?: number;
}

interface IOllamaPullResponse {
  status: string;
  digest?: string;
  total?: number;
  completed?: number;
}

/**
 * Ollama container implementation
 */
export class OllamaContainer extends BaseContainer {
  public readonly type: TContainerType = 'ollama';
  public readonly displayName = 'Ollama';
  public readonly defaultImage = CONTAINER_IMAGES.OLLAMA;
  public readonly defaultPort = CONTAINER_PORTS.OLLAMA;

  constructor(config: IContainerConfig) {
    super(config);

    // Set defaults if not provided
    if (!config.image) {
      config.image = this.defaultImage;
    }
    if (!config.port) {
      config.port = this.defaultPort;
    }

    // Add default volume for model storage
    if (!config.volumes || config.volumes.length === 0) {
      config.volumes = [`modelgrid-ollama-${config.id}:/root/.ollama`];
    }
  }

  /**
   * Create Ollama container configuration
   */
  public static createConfig(
    id: string,
    name: string,
    gpuIds: string[],
    options: Partial<IContainerConfig> = {},
  ): IContainerConfig {
    return {
      id,
      name,
      type: 'ollama',
      image: options.image || CONTAINER_IMAGES.OLLAMA,
      gpuIds,
      port: options.port || CONTAINER_PORTS.OLLAMA,
      externalPort: options.externalPort,
      models: options.models || [],
      env: options.env,
      volumes: options.volumes || [`modelgrid-ollama-${id}:/root/.ollama`],
      autoStart: options.autoStart ?? true,
      restartPolicy: options.restartPolicy || 'unless-stopped',
      memoryLimit: options.memoryLimit,
      cpuLimit: options.cpuLimit,
      command: options.command,
    };
  }

  /**
   * Check if Ollama is healthy
   */
  public async isHealthy(): Promise<boolean> {
    try {
      const response = await this.fetch('/api/tags', { timeout: 5000 });
      return response.ok;
    } catch {
      return false;
    }
  }

  /**
   * List available models
   */
  public async listModels(): Promise<string[]> {
    try {
      const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');
      return (data.models || []).map((m) => m.name);
    } catch (error) {
      logger.warn(`Failed to list Ollama models: ${error instanceof Error ? error.message : String(error)}`);
      return [];
    }
  }

  /**
   * Get loaded models with details
   */
  public async getLoadedModels(): Promise<ILoadedModel[]> {
    try {
      const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');
      return (data.models || []).map((m) => ({
        name: m.name,
        size: m.size,
        format: m.digest.substring(0, 12),
        loaded: true, // Ollama doesn't distinguish loaded vs available
        requestCount: 0,
      }));
    } catch {
      return [];
    }
  }

  /**
   * Pull a model
   */
  public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise<boolean> {
    try {
      logger.info(`Pulling model: ${modelName}`);

      const response = await this.fetch('/api/pull', {
        method: 'POST',
        body: { name: modelName },
        timeout: 3600000, // 1 hour for large models
      });

      if (!response.ok) {
        throw new Error(`HTTP ${response.status}`);
      }

      // Read streaming response
      const reader = response.body?.getReader();
      if (!reader) {
        throw new Error('No response body');
      }

      const decoder = new TextDecoder();
      let lastStatus = '';

      while (true) {
        const { done, value } = await reader.read();
        if (done) break;

        const text = decoder.decode(value);
        const lines = text.split('\n').filter((l) => l.trim());

        for (const line of lines) {
          try {
            const data = JSON.parse(line) as IOllamaPullResponse;
            const status = data.status;

            if (status !== lastStatus) {
              lastStatus = status;
              let percent: number | undefined;

              if (data.total && data.completed) {
                percent = Math.round((data.completed / data.total) * 100);
              }

              if (onProgress) {
                onProgress({ model: modelName, status, percent });
              } else {
                const progressStr = percent !== undefined ? ` (${percent}%)` : '';
                logger.dim(`  ${status}${progressStr}`);
              }
            }
          } catch {
            // Invalid JSON line, skip
          }
        }
      }

      logger.success(`Model ${modelName} pulled successfully`);
      return true;
    } catch (error) {
      logger.error(`Failed to pull model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
      return false;
    }
  }

  /**
   * Remove a model
   */
  public async removeModel(modelName: string): Promise<boolean> {
    try {
      const response = await this.fetch('/api/delete', {
        method: 'DELETE',
        body: { name: modelName },
      });

      if (response.ok) {
        logger.success(`Model ${modelName} removed`);
        return true;
      }

      throw new Error(`HTTP ${response.status}`);
    } catch (error) {
      logger.error(`Failed to remove model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
      return false;
    }
  }

  /**
   * Send a chat completion request
   */
  public async chatCompletion(request: IChatCompletionRequest): Promise<IChatCompletionResponse> {
    const ollamaRequest: IOllamaChatRequest = {
      model: request.model,
      messages: request.messages.map((m) => ({
        role: m.role,
        content: m.content,
      })),
      stream: false,
      options: {
        temperature: request.temperature,
        top_p: request.top_p,
        num_predict: request.max_tokens,
        stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
      },
    };

    const response = await this.fetchJson<IOllamaChatResponse>('/api/chat', {
      method: 'POST',
      body: ollamaRequest,
      timeout: 300000, // 5 minutes
    });

    // Convert to OpenAI format
    const created = Math.floor(Date.now() / 1000);

    const choice: IChatCompletionChoice = {
      index: 0,
      message: {
        role: 'assistant',
        content: response.message.content,
      },
      finish_reason: response.done ? 'stop' : null,
    };

    return {
      id: this.generateRequestId(),
      object: 'chat.completion',
      created,
      model: request.model,
      choices: [choice],
      usage: {
        prompt_tokens: response.prompt_eval_count || 0,
        completion_tokens: response.eval_count || 0,
        total_tokens: (response.prompt_eval_count || 0) + (response.eval_count || 0),
      },
    };
  }

  /**
   * Stream a chat completion request
   */
  public async chatCompletionStream(
    request: IChatCompletionRequest,
    onChunk: (chunk: string) => void,
  ): Promise<void> {
    const ollamaRequest: IOllamaChatRequest = {
      model: request.model,
      messages: request.messages.map((m) => ({
        role: m.role,
        content: m.content,
      })),
      stream: true,
      options: {
        temperature: request.temperature,
        top_p: request.top_p,
        num_predict: request.max_tokens,
        stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
      },
    };

    const response = await this.fetch('/api/chat', {
      method: 'POST',
      body: ollamaRequest,
      timeout: 300000,
    });

    if (!response.ok) {
      throw new Error(`HTTP ${response.status}`);
    }

    const reader = response.body?.getReader();
    if (!reader) {
      throw new Error('No response body');
    }

    const decoder = new TextDecoder();
    const requestId = this.generateRequestId();
    const created = Math.floor(Date.now() / 1000);

    while (true) {
      const { done, value } = await reader.read();
      if (done) break;

      const text = decoder.decode(value);
      const lines = text.split('\n').filter((l) => l.trim());

      for (const line of lines) {
        try {
          const data = JSON.parse(line) as IOllamaChatResponse;

          // Convert to OpenAI streaming format
          const chunk = {
            id: requestId,
            object: 'chat.completion.chunk',
            created,
            model: request.model,
            choices: [
              {
                index: 0,
                delta: {
                  content: data.message.content,
                } as Partial<IChatMessage>,
                finish_reason: data.done ? 'stop' : null,
              },
            ],
          };

          onChunk(`data: ${JSON.stringify(chunk)}\n\n`);

          if (data.done) {
            onChunk('data: [DONE]\n\n');
          }
        } catch {
          // Invalid JSON, skip
        }
      }
    }
  }
}
initial 2026-01-30 03:16:57 +00:00			`/**`
			`* Ollama Container`
			`*`
			`* Manages Ollama containers for running local LLMs.`
			`*/`

			`import type {`
			`IContainerConfig,`
			`ILoadedModel,`
			`TContainerType,`
			`} from '../interfaces/container.ts';`
			`import type {`
			`IChatCompletionRequest,`
			`IChatCompletionResponse,`
			`IChatCompletionChoice,`
			`IChatMessage,`
			`} from '../interfaces/api.ts';`
			`import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts';`
			`import { logger } from '../logger.ts';`
			`import { BaseContainer, type TModelPullProgress } from './base-container.ts';`

			`/**`
			`* Ollama API response types`
			`*/`
			`interface IOllamaTagsResponse {`
			`models: Array<{`
			`name: string;`
			`size: number;`
			`digest: string;`
			`modified_at: string;`
			`}>;`
			`}`

			`interface IOllamaChatRequest {`
			`model: string;`
			`messages: Array<{`
			`role: string;`
			`content: string;`
			`}>;`
			`stream?: boolean;`
			`options?: {`
			`temperature?: number;`
			`top_p?: number;`
			`num_predict?: number;`
			`stop?: string[];`
			`};`
			`}`

			`interface IOllamaChatResponse {`
			`model: string;`
			`created_at: string;`
			`message: {`
			`role: string;`
			`content: string;`
			`};`
			`done: boolean;`
			`total_duration?: number;`
			`load_duration?: number;`
			`prompt_eval_count?: number;`
			`eval_count?: number;`
			`}`

			`interface IOllamaPullResponse {`
			`status: string;`
			`digest?: string;`
			`total?: number;`
			`completed?: number;`
			`}`

			`/**`
			`* Ollama container implementation`
			`*/`
			`export class OllamaContainer extends BaseContainer {`
			`public readonly type: TContainerType = 'ollama';`
			`public readonly displayName = 'Ollama';`
			`public readonly defaultImage = CONTAINER_IMAGES.OLLAMA;`
			`public readonly defaultPort = CONTAINER_PORTS.OLLAMA;`

			`constructor(config: IContainerConfig) {`
			`super(config);`

			`// Set defaults if not provided`
			`if (!config.image) {`
			`config.image = this.defaultImage;`
			`}`
			`if (!config.port) {`
			`config.port = this.defaultPort;`
			`}`

			`// Add default volume for model storage`
			`if (!config.volumes \|\| config.volumes.length === 0) {`
			config.volumes = [`modelgrid-ollama-${config.id}:/root/.ollama`];
			`}`
			`}`

			`/**`
			`* Create Ollama container configuration`
			`*/`
			`public static createConfig(`
			`id: string,`
			`name: string,`
			`gpuIds: string[],`
			`options: Partial<IContainerConfig> = {},`
			`): IContainerConfig {`
			`return {`
			`id,`
			`name,`
			`type: 'ollama',`
			`image: options.image \|\| CONTAINER_IMAGES.OLLAMA,`
			`gpuIds,`
			`port: options.port \|\| CONTAINER_PORTS.OLLAMA,`
			`externalPort: options.externalPort,`
			`models: options.models \|\| [],`
			`env: options.env,`
			volumes: options.volumes \|\| [`modelgrid-ollama-${id}:/root/.ollama`],
			`autoStart: options.autoStart ?? true,`
			`restartPolicy: options.restartPolicy \|\| 'unless-stopped',`
			`memoryLimit: options.memoryLimit,`
			`cpuLimit: options.cpuLimit,`
			`command: options.command,`
			`};`
			`}`

			`/**`
			`* Check if Ollama is healthy`
			`*/`
			`public async isHealthy(): Promise<boolean> {`
			`try {`
			`const response = await this.fetch('/api/tags', { timeout: 5000 });`
			`return response.ok;`
			`} catch {`
			`return false;`
			`}`
			`}`

			`/**`
			`* List available models`
			`*/`
			`public async listModels(): Promise<string[]> {`
			`try {`
			`const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');`
			`return (data.models \|\| []).map((m) => m.name);`
			`} catch (error) {`
			logger.warn(`Failed to list Ollama models: ${error instanceof Error ? error.message : String(error)}`);
			`return [];`
			`}`
			`}`

			`/**`
			`* Get loaded models with details`
			`*/`
			`public async getLoadedModels(): Promise<ILoadedModel[]> {`
			`try {`
			`const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');`
			`return (data.models \|\| []).map((m) => ({`
			`name: m.name,`
			`size: m.size,`
			`format: m.digest.substring(0, 12),`
			`loaded: true, // Ollama doesn't distinguish loaded vs available`
			`requestCount: 0,`
			`}));`
			`} catch {`
			`return [];`
			`}`
			`}`

			`/**`
			`* Pull a model`
			`*/`
			`public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise<boolean> {`
			`try {`
			logger.info(`Pulling model: ${modelName}`);

			`const response = await this.fetch('/api/pull', {`
			`method: 'POST',`
			`body: { name: modelName },`
			`timeout: 3600000, // 1 hour for large models`
			`});`

			`if (!response.ok) {`
			throw new Error(`HTTP ${response.status}`);
			`}`

			`// Read streaming response`
			`const reader = response.body?.getReader();`
			`if (!reader) {`
			`throw new Error('No response body');`
			`}`

			`const decoder = new TextDecoder();`
			`let lastStatus = '';`

			`while (true) {`
			`const { done, value } = await reader.read();`
			`if (done) break;`

			`const text = decoder.decode(value);`
			`const lines = text.split('\n').filter((l) => l.trim());`

			`for (const line of lines) {`
			`try {`
			`const data = JSON.parse(line) as IOllamaPullResponse;`
			`const status = data.status;`

			`if (status !== lastStatus) {`
			`lastStatus = status;`
			`let percent: number \| undefined;`

			`if (data.total && data.completed) {`
			`percent = Math.round((data.completed / data.total) * 100);`
			`}`

			`if (onProgress) {`
			`onProgress({ model: modelName, status, percent });`
			`} else {`
			const progressStr = percent !== undefined ? ` (${percent}%)` : '';
			logger.dim(` ${status}${progressStr}`);
			`}`
			`}`
			`} catch {`
			`// Invalid JSON line, skip`
			`}`
			`}`
			`}`

			logger.success(`Model ${modelName} pulled successfully`);
			`return true;`
			`} catch (error) {`
			logger.error(`Failed to pull model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
			`return false;`
			`}`
			`}`

			`/**`
			`* Remove a model`
			`*/`
			`public async removeModel(modelName: string): Promise<boolean> {`
			`try {`
			`const response = await this.fetch('/api/delete', {`
			`method: 'DELETE',`
			`body: { name: modelName },`
			`});`

			`if (response.ok) {`
			logger.success(`Model ${modelName} removed`);
			`return true;`
			`}`

			throw new Error(`HTTP ${response.status}`);
			`} catch (error) {`
			logger.error(`Failed to remove model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
			`return false;`
			`}`
			`}`

			`/**`
			`* Send a chat completion request`
			`*/`
			`public async chatCompletion(request: IChatCompletionRequest): Promise<IChatCompletionResponse> {`
			`const ollamaRequest: IOllamaChatRequest = {`
			`model: request.model,`
			`messages: request.messages.map((m) => ({`
			`role: m.role,`
			`content: m.content,`
			`})),`
			`stream: false,`
			`options: {`
			`temperature: request.temperature,`
			`top_p: request.top_p,`
			`num_predict: request.max_tokens,`
			`stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,`
			`},`
			`};`

			`const response = await this.fetchJson<IOllamaChatResponse>('/api/chat', {`
			`method: 'POST',`
			`body: ollamaRequest,`
			`timeout: 300000, // 5 minutes`
			`});`

			`// Convert to OpenAI format`
			`const created = Math.floor(Date.now() / 1000);`

			`const choice: IChatCompletionChoice = {`
			`index: 0,`
			`message: {`
			`role: 'assistant',`
			`content: response.message.content,`
			`},`
			`finish_reason: response.done ? 'stop' : null,`
			`};`

			`return {`
			`id: this.generateRequestId(),`
			`object: 'chat.completion',`
			`created,`
			`model: request.model,`
			`choices: [choice],`
			`usage: {`
			`prompt_tokens: response.prompt_eval_count \|\| 0,`
			`completion_tokens: response.eval_count \|\| 0,`
			`total_tokens: (response.prompt_eval_count \|\| 0) + (response.eval_count \|\| 0),`
			`},`
			`};`
			`}`

			`/**`
			`* Stream a chat completion request`
			`*/`
			`public async chatCompletionStream(`
			`request: IChatCompletionRequest,`
			`onChunk: (chunk: string) => void,`
			`): Promise<void> {`
			`const ollamaRequest: IOllamaChatRequest = {`
			`model: request.model,`
			`messages: request.messages.map((m) => ({`
			`role: m.role,`
			`content: m.content,`
			`})),`
			`stream: true,`
			`options: {`
			`temperature: request.temperature,`
			`top_p: request.top_p,`
			`num_predict: request.max_tokens,`
			`stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,`
			`},`
			`};`

			`const response = await this.fetch('/api/chat', {`
			`method: 'POST',`
			`body: ollamaRequest,`
			`timeout: 300000,`
			`});`

			`if (!response.ok) {`
			throw new Error(`HTTP ${response.status}`);
			`}`

			`const reader = response.body?.getReader();`
			`if (!reader) {`
			`throw new Error('No response body');`
			`}`

			`const decoder = new TextDecoder();`
			`const requestId = this.generateRequestId();`
			`const created = Math.floor(Date.now() / 1000);`

			`while (true) {`
			`const { done, value } = await reader.read();`
			`if (done) break;`

			`const text = decoder.decode(value);`
			`const lines = text.split('\n').filter((l) => l.trim());`

			`for (const line of lines) {`
			`try {`
			`const data = JSON.parse(line) as IOllamaChatResponse;`

			`// Convert to OpenAI streaming format`
			`const chunk = {`
			`id: requestId,`
			`object: 'chat.completion.chunk',`
			`created,`
			`model: request.model,`
			`choices: [`
			`{`
			`index: 0,`
			`delta: {`
			`content: data.message.content,`
			`} as Partial<IChatMessage>,`
			`finish_reason: data.done ? 'stop' : null,`
			`},`
			`],`
			`};`

			onChunk(`data: ${JSON.stringify(chunk)}\n\n`);

			`if (data.done) {`
			`onChunk('data: [DONE]\n\n');`
			`}`
			`} catch {`
			`// Invalid JSON, skip`
			`}`
			`}`
			`}`
			`}`
			`}`