Files

352 lines
10 KiB
TypeScript
Raw Permalink Normal View History

import { execSync } from 'child_process';
// Project container names (only manage these)
const PROJECT_CONTAINERS = [
'minicpm-test',
2026-01-18 15:54:16 +00:00
'nanonets-test',
];
// Image configurations
export interface IImageConfig {
name: string;
dockerfile: string;
buildContext: string;
containerName: string;
ports: string[];
volumes?: string[];
gpus?: boolean;
healthEndpoint?: string;
healthTimeout?: number;
}
export const IMAGES = {
minicpm: {
name: 'minicpm45v',
dockerfile: 'Dockerfile_minicpm45v_gpu',
buildContext: '.',
containerName: 'minicpm-test',
ports: ['11434:11434'],
volumes: ['ht-ollama-models:/root/.ollama'],
gpus: true,
healthEndpoint: 'http://localhost:11434/api/tags',
healthTimeout: 120000,
} as IImageConfig,
// Nanonets-OCR2-3B - Document OCR optimized VLM (Qwen2.5-VL-3B fine-tuned, Oct 2025)
2026-01-18 15:54:16 +00:00
nanonetsOcr: {
name: 'nanonets-ocr',
dockerfile: 'Dockerfile_nanonets_vllm_gpu_VRAM10GB',
2026-01-18 15:54:16 +00:00
buildContext: '.',
containerName: 'nanonets-test',
ports: ['8000:8000'],
volumes: ['ht-huggingface-cache:/root/.cache/huggingface'],
gpus: true,
healthEndpoint: 'http://localhost:8000/health',
healthTimeout: 300000, // 5 minutes for model loading
} as IImageConfig,
};
/**
* Execute a shell command and return output
*/
function exec(command: string, silent = false): string {
try {
return execSync(command, {
encoding: 'utf-8',
stdio: silent ? 'pipe' : 'inherit',
});
} catch (err: unknown) {
if (silent) return '';
throw err;
}
}
/**
* Check if a Docker image exists locally
*/
export function imageExists(imageName: string): boolean {
const result = exec(`docker images -q ${imageName}`, true);
return result.trim().length > 0;
}
/**
* Check if a container is running
*/
export function isContainerRunning(containerName: string): boolean {
const result = exec(`docker ps --filter "name=^${containerName}$" --format "{{.Names}}"`, true);
return result.trim() === containerName;
}
/**
* Check if a container exists (running or stopped)
*/
export function containerExists(containerName: string): boolean {
const result = exec(`docker ps -a --filter "name=^${containerName}$" --format "{{.Names}}"`, true);
return result.trim() === containerName;
}
/**
* Stop and remove a container
*/
export function removeContainer(containerName: string): void {
if (containerExists(containerName)) {
console.log(`[Docker] Removing container: ${containerName}`);
exec(`docker rm -f ${containerName}`, true);
}
}
/**
* Stop all project containers that conflict with the required one (port-based)
*/
export function stopConflictingContainers(requiredContainer: string, requiredPort: string): void {
// Stop project containers using the same port
for (const container of PROJECT_CONTAINERS) {
if (container === requiredContainer) continue;
if (isContainerRunning(container)) {
// Check if this container uses the same port
const ports = exec(`docker port ${container} 2>/dev/null || true`, true);
if (ports.includes(requiredPort.split(':')[0])) {
console.log(`[Docker] Stopping conflicting container: ${container}`);
exec(`docker stop ${container}`, true);
}
}
}
}
/**
* Stop all GPU-consuming project containers (for GPU memory management)
* This ensures GPU memory is freed before starting a new GPU service
*/
export function stopAllGpuContainers(exceptContainer?: string): void {
for (const container of PROJECT_CONTAINERS) {
if (container === exceptContainer) continue;
if (isContainerRunning(container)) {
console.log(`[Docker] Stopping GPU container: ${container}`);
exec(`docker stop ${container}`, true);
// Give the GPU a moment to free memory
}
}
// Brief pause to allow GPU memory to be released
execSync('sleep 2');
}
/**
* Build a Docker image
*/
export function buildImage(config: IImageConfig): void {
console.log(`[Docker] Building image: ${config.name}`);
const cmd = `docker build --load -f ${config.dockerfile} -t ${config.name} ${config.buildContext}`;
exec(cmd);
}
/**
* Start a container from an image
*/
export function startContainer(config: IImageConfig): void {
// Remove existing container if it exists
removeContainer(config.containerName);
console.log(`[Docker] Starting container: ${config.containerName}`);
const portArgs = config.ports.map((p) => `-p ${p}`).join(' ');
const volumeArgs = config.volumes?.map((v) => `-v ${v}`).join(' ') || '';
const gpuArgs = config.gpus ? '--gpus all' : '';
const cmd = `docker run -d --name ${config.containerName} ${gpuArgs} ${portArgs} ${volumeArgs} ${config.name}`;
exec(cmd);
}
/**
* Wait for a container to become healthy
*/
export async function waitForHealth(
endpoint: string,
timeoutMs: number = 120000,
intervalMs: number = 5000
): Promise<boolean> {
const startTime = Date.now();
console.log(`[Docker] Waiting for health: ${endpoint}`);
while (Date.now() - startTime < timeoutMs) {
try {
const response = await fetch(endpoint, {
method: 'GET',
signal: AbortSignal.timeout(5000),
});
if (response.ok) {
console.log(`[Docker] Service healthy!`);
return true;
}
} catch {
// Service not ready yet
}
const elapsed = Math.round((Date.now() - startTime) / 1000);
console.log(`[Docker] Waiting... (${elapsed}s)`);
await new Promise((resolve) => setTimeout(resolve, intervalMs));
}
console.log(`[Docker] Health check timeout after ${timeoutMs / 1000}s`);
return false;
}
/**
* Ensure a service is running and healthy
* - Builds image if missing
* - Stops conflicting project containers
* - Starts container if not running
* - Waits for health check
*/
export async function ensureService(config: IImageConfig): Promise<boolean> {
console.log(`\n[Docker] Ensuring service: ${config.name}`);
// Build image if it doesn't exist
if (!imageExists(config.name)) {
console.log(`[Docker] Image not found, building...`);
buildImage(config);
}
// For GPU services, stop ALL other GPU containers to free GPU memory
if (config.gpus) {
stopAllGpuContainers(config.containerName);
}
// Stop conflicting containers on the same port
const mainPort = config.ports[0];
stopConflictingContainers(config.containerName, mainPort);
// Start container if not running
if (!isContainerRunning(config.containerName)) {
startContainer(config);
} else {
console.log(`[Docker] Container already running: ${config.containerName}`);
}
// Wait for health
if (config.healthEndpoint) {
return waitForHealth(config.healthEndpoint, config.healthTimeout);
}
return true;
}
/**
* Ensure MiniCPM service is running (Ollama with GPU)
*/
export async function ensureMiniCpm(): Promise<boolean> {
return ensureService(IMAGES.minicpm);
}
/**
* Check if GPU is available
*/
export function isGpuAvailable(): boolean {
try {
const result = exec('nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null', true);
return result.trim().length > 0;
} catch {
return false;
}
}
/**
* Ensure an Ollama model is pulled and available
* Uses the MiniCPM container (which runs Ollama) to pull the model
*/
export async function ensureOllamaModel(modelName: string): Promise<boolean> {
const OLLAMA_URL = 'http://localhost:11434';
console.log(`\n[Ollama] Ensuring model: ${modelName}`);
// Check if model exists
try {
const response = await fetch(`${OLLAMA_URL}/api/tags`);
if (response.ok) {
const data = await response.json();
const models = data.models || [];
// Exact match required - don't match on prefix
const exists = models.some((m: { name: string }) => m.name === modelName);
if (exists) {
console.log(`[Ollama] Model already available: ${modelName}`);
return true;
}
}
} catch {
console.log(`[Ollama] Cannot check models, Ollama may not be running`);
return false;
}
// Pull the model
console.log(`[Ollama] Pulling model: ${modelName} (this may take a while)...`);
try {
const response = await fetch(`${OLLAMA_URL}/api/pull`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: modelName, stream: false }),
});
if (response.ok) {
console.log(`[Ollama] Model pulled successfully: ${modelName}`);
return true;
} else {
console.log(`[Ollama] Failed to pull model: ${response.status}`);
return false;
}
} catch (err) {
console.log(`[Ollama] Error pulling model: ${err}`);
return false;
}
}
/**
* Ensure Qwen2.5 7B model is available (for text-only JSON extraction)
*/
export async function ensureQwen25(): Promise<boolean> {
// First ensure the Ollama service (MiniCPM container) is running
const ollamaOk = await ensureMiniCpm();
if (!ollamaOk) return false;
// Then ensure the Qwen2.5 model is pulled
return ensureOllamaModel('qwen2.5:7b');
}
/**
* Ensure Ministral 3 8B model is available (for structured JSON extraction)
* Ministral 3 has native JSON output support and OCR-style document extraction
*/
export async function ensureMinistral3(): Promise<boolean> {
// First ensure the Ollama service (MiniCPM container) is running
const ollamaOk = await ensureMiniCpm();
if (!ollamaOk) return false;
// Then ensure the Ministral 3 8B model is pulled
return ensureOllamaModel('ministral-3:8b');
}
/**
* Ensure Qwen3-VL 8B model is available (vision-language model)
* Q4_K_M quantization (~5GB) - fits in 15GB VRAM with room to spare
*/
export async function ensureQwen3Vl(): Promise<boolean> {
// First ensure the Ollama service is running
const ollamaOk = await ensureMiniCpm();
if (!ollamaOk) return false;
// Then ensure Qwen3-VL 8B is pulled
return ensureOllamaModel('qwen3-vl:8b');
}
2026-01-18 15:54:16 +00:00
/**
* Ensure Nanonets-OCR2-3B service is running (via vLLM)
* Document OCR optimized VLM based on Qwen2.5-VL-3B (Oct 2025 release)
2026-01-18 15:54:16 +00:00
*/
export async function ensureNanonetsOcr(): Promise<boolean> {
if (!isGpuAvailable()) {
console.log('[Docker] WARNING: Nanonets-OCR2-3B requires GPU, but none detected');
2026-01-18 15:54:16 +00:00
}
return ensureService(IMAGES.nanonetsOcr);
}