feat(paddleocr-vl): add PaddleOCR-VL full pipeline Docker image and API server, plus integration tests and docker helpers
This commit is contained in:
297
test/helpers/docker.ts
Normal file
297
test/helpers/docker.ts
Normal file
@@ -0,0 +1,297 @@
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
// Project container names (only manage these)
|
||||
const PROJECT_CONTAINERS = [
|
||||
'paddleocr-vl-test',
|
||||
'paddleocr-vl-gpu-test',
|
||||
'paddleocr-vl-cpu-test',
|
||||
'paddleocr-vl-full-test',
|
||||
'minicpm-test',
|
||||
];
|
||||
|
||||
// Image configurations
|
||||
export interface IImageConfig {
|
||||
name: string;
|
||||
dockerfile: string;
|
||||
buildContext: string;
|
||||
containerName: string;
|
||||
ports: string[];
|
||||
volumes?: string[];
|
||||
gpus?: boolean;
|
||||
healthEndpoint?: string;
|
||||
healthTimeout?: number;
|
||||
}
|
||||
|
||||
export const IMAGES = {
|
||||
paddleocrVlGpu: {
|
||||
name: 'paddleocr-vl-gpu',
|
||||
dockerfile: 'Dockerfile_paddleocr_vl_gpu',
|
||||
buildContext: '.',
|
||||
containerName: 'paddleocr-vl-test',
|
||||
ports: ['8000:8000'],
|
||||
volumes: ['ht-huggingface-cache:/root/.cache/huggingface'],
|
||||
gpus: true,
|
||||
healthEndpoint: 'http://localhost:8000/health',
|
||||
healthTimeout: 300000, // 5 minutes for model loading
|
||||
} as IImageConfig,
|
||||
|
||||
paddleocrVlCpu: {
|
||||
name: 'paddleocr-vl-cpu',
|
||||
dockerfile: 'Dockerfile_paddleocr_vl_cpu',
|
||||
buildContext: '.',
|
||||
containerName: 'paddleocr-vl-test',
|
||||
ports: ['8000:8000'],
|
||||
volumes: ['ht-huggingface-cache:/root/.cache/huggingface'],
|
||||
gpus: false,
|
||||
healthEndpoint: 'http://localhost:8000/health',
|
||||
healthTimeout: 300000,
|
||||
} as IImageConfig,
|
||||
|
||||
minicpm: {
|
||||
name: 'minicpm45v',
|
||||
dockerfile: 'Dockerfile_minicpm45v',
|
||||
buildContext: '.',
|
||||
containerName: 'minicpm-test',
|
||||
ports: ['11434:11434'],
|
||||
volumes: ['ht-ollama-models:/root/.ollama'],
|
||||
gpus: true,
|
||||
healthEndpoint: 'http://localhost:11434/api/tags',
|
||||
healthTimeout: 120000,
|
||||
} as IImageConfig,
|
||||
|
||||
// Full PaddleOCR-VL pipeline with PP-DocLayoutV2 + structured JSON output
|
||||
paddleocrVlFull: {
|
||||
name: 'paddleocr-vl-full',
|
||||
dockerfile: 'Dockerfile_paddleocr_vl_full',
|
||||
buildContext: '.',
|
||||
containerName: 'paddleocr-vl-full-test',
|
||||
ports: ['8000:8000'],
|
||||
volumes: [
|
||||
'ht-huggingface-cache:/root/.cache/huggingface',
|
||||
'ht-paddleocr-cache:/root/.paddleocr',
|
||||
],
|
||||
gpus: true,
|
||||
healthEndpoint: 'http://localhost:8000/health',
|
||||
healthTimeout: 600000, // 10 minutes for model loading (vLLM + PP-DocLayoutV2)
|
||||
} as IImageConfig,
|
||||
};
|
||||
|
||||
/**
|
||||
* Execute a shell command and return output
|
||||
*/
|
||||
function exec(command: string, silent = false): string {
|
||||
try {
|
||||
return execSync(command, {
|
||||
encoding: 'utf-8',
|
||||
stdio: silent ? 'pipe' : 'inherit',
|
||||
});
|
||||
} catch (err: unknown) {
|
||||
if (silent) return '';
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a Docker image exists locally
|
||||
*/
|
||||
export function imageExists(imageName: string): boolean {
|
||||
const result = exec(`docker images -q ${imageName}`, true);
|
||||
return result.trim().length > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a container is running
|
||||
*/
|
||||
export function isContainerRunning(containerName: string): boolean {
|
||||
const result = exec(`docker ps --filter "name=^${containerName}$" --format "{{.Names}}"`, true);
|
||||
return result.trim() === containerName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a container exists (running or stopped)
|
||||
*/
|
||||
export function containerExists(containerName: string): boolean {
|
||||
const result = exec(`docker ps -a --filter "name=^${containerName}$" --format "{{.Names}}"`, true);
|
||||
return result.trim() === containerName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop and remove a container
|
||||
*/
|
||||
export function removeContainer(containerName: string): void {
|
||||
if (containerExists(containerName)) {
|
||||
console.log(`[Docker] Removing container: ${containerName}`);
|
||||
exec(`docker rm -f ${containerName}`, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop all project containers that conflict with the required one
|
||||
*/
|
||||
export function stopConflictingContainers(requiredContainer: string, requiredPort: string): void {
|
||||
// Stop project containers using the same port
|
||||
for (const container of PROJECT_CONTAINERS) {
|
||||
if (container === requiredContainer) continue;
|
||||
|
||||
if (isContainerRunning(container)) {
|
||||
// Check if this container uses the same port
|
||||
const ports = exec(`docker port ${container} 2>/dev/null || true`, true);
|
||||
if (ports.includes(requiredPort.split(':')[0])) {
|
||||
console.log(`[Docker] Stopping conflicting container: ${container}`);
|
||||
exec(`docker stop ${container}`, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Docker image
|
||||
*/
|
||||
export function buildImage(config: IImageConfig): void {
|
||||
console.log(`[Docker] Building image: ${config.name}`);
|
||||
const cmd = `docker build --load -f ${config.dockerfile} -t ${config.name} ${config.buildContext}`;
|
||||
exec(cmd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a container from an image
|
||||
*/
|
||||
export function startContainer(config: IImageConfig): void {
|
||||
// Remove existing container if it exists
|
||||
removeContainer(config.containerName);
|
||||
|
||||
console.log(`[Docker] Starting container: ${config.containerName}`);
|
||||
|
||||
const portArgs = config.ports.map((p) => `-p ${p}`).join(' ');
|
||||
const volumeArgs = config.volumes?.map((v) => `-v ${v}`).join(' ') || '';
|
||||
const gpuArgs = config.gpus ? '--gpus all' : '';
|
||||
|
||||
const cmd = `docker run -d --name ${config.containerName} ${gpuArgs} ${portArgs} ${volumeArgs} ${config.name}`;
|
||||
exec(cmd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for a container to become healthy
|
||||
*/
|
||||
export async function waitForHealth(
|
||||
endpoint: string,
|
||||
timeoutMs: number = 120000,
|
||||
intervalMs: number = 5000
|
||||
): Promise<boolean> {
|
||||
const startTime = Date.now();
|
||||
console.log(`[Docker] Waiting for health: ${endpoint}`);
|
||||
|
||||
while (Date.now() - startTime < timeoutMs) {
|
||||
try {
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'GET',
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (response.ok) {
|
||||
console.log(`[Docker] Service healthy!`);
|
||||
return true;
|
||||
}
|
||||
} catch {
|
||||
// Service not ready yet
|
||||
}
|
||||
|
||||
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
||||
console.log(`[Docker] Waiting... (${elapsed}s)`);
|
||||
await new Promise((resolve) => setTimeout(resolve, intervalMs));
|
||||
}
|
||||
|
||||
console.log(`[Docker] Health check timeout after ${timeoutMs / 1000}s`);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure a service is running and healthy
|
||||
* - Builds image if missing
|
||||
* - Stops conflicting project containers
|
||||
* - Starts container if not running
|
||||
* - Waits for health check
|
||||
*/
|
||||
export async function ensureService(config: IImageConfig): Promise<boolean> {
|
||||
console.log(`\n[Docker] Ensuring service: ${config.name}`);
|
||||
|
||||
// Build image if it doesn't exist
|
||||
if (!imageExists(config.name)) {
|
||||
console.log(`[Docker] Image not found, building...`);
|
||||
buildImage(config);
|
||||
}
|
||||
|
||||
// Stop conflicting containers on the same port
|
||||
const mainPort = config.ports[0];
|
||||
stopConflictingContainers(config.containerName, mainPort);
|
||||
|
||||
// Start container if not running
|
||||
if (!isContainerRunning(config.containerName)) {
|
||||
startContainer(config);
|
||||
} else {
|
||||
console.log(`[Docker] Container already running: ${config.containerName}`);
|
||||
}
|
||||
|
||||
// Wait for health
|
||||
if (config.healthEndpoint) {
|
||||
return waitForHealth(config.healthEndpoint, config.healthTimeout);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure PaddleOCR-VL GPU service is running
|
||||
*/
|
||||
export async function ensurePaddleOcrVlGpu(): Promise<boolean> {
|
||||
return ensureService(IMAGES.paddleocrVlGpu);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure PaddleOCR-VL CPU service is running
|
||||
*/
|
||||
export async function ensurePaddleOcrVlCpu(): Promise<boolean> {
|
||||
return ensureService(IMAGES.paddleocrVlCpu);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure MiniCPM service is running
|
||||
*/
|
||||
export async function ensureMiniCpm(): Promise<boolean> {
|
||||
return ensureService(IMAGES.minicpm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if GPU is available
|
||||
*/
|
||||
export function isGpuAvailable(): boolean {
|
||||
try {
|
||||
const result = exec('nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null', true);
|
||||
return result.trim().length > 0;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure PaddleOCR-VL service (auto-detect GPU/CPU)
|
||||
*/
|
||||
export async function ensurePaddleOcrVl(): Promise<boolean> {
|
||||
if (isGpuAvailable()) {
|
||||
console.log('[Docker] GPU detected, using GPU image');
|
||||
return ensurePaddleOcrVlGpu();
|
||||
} else {
|
||||
console.log('[Docker] No GPU detected, using CPU image');
|
||||
return ensurePaddleOcrVlCpu();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure PaddleOCR-VL Full Pipeline service (PP-DocLayoutV2 + structured output)
|
||||
* This is the recommended service for production use - outputs structured JSON/Markdown
|
||||
*/
|
||||
export async function ensurePaddleOcrVlFull(): Promise<boolean> {
|
||||
if (!isGpuAvailable()) {
|
||||
console.log('[Docker] WARNING: Full pipeline requires GPU, but none detected');
|
||||
}
|
||||
return ensureService(IMAGES.paddleocrVlFull);
|
||||
}
|
||||
Reference in New Issue
Block a user