316 lines
8.1 KiB
TypeScript
316 lines
8.1 KiB
TypeScript
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
|
import { ApiServer } from '../ts/api/server.ts';
|
|
|
|
Deno.test('ApiServer serves health metrics and authenticated model listings', async () => {
|
|
const port = 18100 + Math.floor(Math.random() * 1000);
|
|
const server = new ApiServer(
|
|
{
|
|
host: '127.0.0.1',
|
|
port,
|
|
apiKeys: ['valid-key'],
|
|
cors: false,
|
|
corsOrigins: [],
|
|
},
|
|
{
|
|
async getAllStatus() {
|
|
return new Map([
|
|
['vllm-1', { running: true, health: 'healthy' }],
|
|
]);
|
|
},
|
|
async getAllAvailableModels() {
|
|
return new Map([
|
|
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
|
|
]);
|
|
},
|
|
} as never,
|
|
{
|
|
async getAllModels() {
|
|
return [
|
|
{
|
|
id: 'meta-llama/Llama-3.1-8B-Instruct',
|
|
engine: 'vllm',
|
|
source: { repo: 'meta-llama/Llama-3.1-8B-Instruct' },
|
|
capabilities: { chat: true },
|
|
requirements: { minVramGb: 18 },
|
|
},
|
|
];
|
|
},
|
|
} as never,
|
|
{} as never,
|
|
{
|
|
getStatus() {
|
|
return {
|
|
localNode: null,
|
|
nodes: [],
|
|
models: {},
|
|
desiredDeployments: [],
|
|
};
|
|
},
|
|
} as never,
|
|
{
|
|
gpuDetector: {
|
|
async detectGpus() {
|
|
return [{ id: 'nvidia-0' }];
|
|
},
|
|
} as never,
|
|
},
|
|
);
|
|
|
|
await server.start();
|
|
|
|
try {
|
|
const healthResponse = await fetch(`http://127.0.0.1:${port}/health`);
|
|
const healthBody = await healthResponse.json();
|
|
assertEquals(healthResponse.status, 200);
|
|
assertEquals(healthBody.status, 'ok');
|
|
assertEquals(healthBody.models, 1);
|
|
assertEquals(Array.isArray(healthBody.reasons), true);
|
|
assertEquals(healthBody.reasons.length, 0);
|
|
assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string');
|
|
|
|
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
|
|
const metricsBody = await metricsResponse.text();
|
|
assertEquals(metricsResponse.status, 200);
|
|
assertEquals(metricsBody.includes('modelgrid_uptime_seconds'), true);
|
|
assertEquals(metricsBody.includes('modelgrid_models_available 1'), true);
|
|
|
|
const unauthenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`);
|
|
const unauthenticatedBody = await unauthenticatedModels.json();
|
|
assertEquals(unauthenticatedModels.status, 401);
|
|
assertEquals(unauthenticatedBody.error.type, 'authentication_error');
|
|
|
|
const authenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, {
|
|
headers: {
|
|
Authorization: 'Bearer valid-key',
|
|
'X-Request-Id': 'req-test-models',
|
|
},
|
|
});
|
|
const authenticatedBody = await authenticatedModels.json();
|
|
assertEquals(authenticatedModels.status, 200);
|
|
assertEquals(authenticatedBody.object, 'list');
|
|
assertEquals(authenticatedBody.data[0].id, 'meta-llama/Llama-3.1-8B-Instruct');
|
|
assertEquals(authenticatedModels.headers.get('x-request-id'), 'req-test-models');
|
|
|
|
const metricsAfterRequests = await fetch(`http://127.0.0.1:${port}/metrics`);
|
|
const metricsAfterRequestsBody = await metricsAfterRequests.text();
|
|
assertEquals(
|
|
metricsAfterRequestsBody.includes('modelgrid_api_requests_total{path="/v1/models"} 2'),
|
|
true,
|
|
);
|
|
assertEquals(
|
|
metricsAfterRequestsBody.includes('modelgrid_api_auth_failures_total{path="/v1/models"} 1'),
|
|
true,
|
|
);
|
|
} finally {
|
|
await server.stop();
|
|
}
|
|
});
|
|
|
|
Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async () => {
|
|
const port = 19100 + Math.floor(Math.random() * 1000);
|
|
let failModelListing = true;
|
|
const server = new ApiServer(
|
|
{
|
|
host: '127.0.0.1',
|
|
port,
|
|
apiKeys: ['valid-key'],
|
|
cors: false,
|
|
corsOrigins: [],
|
|
},
|
|
{
|
|
async getAllStatus() {
|
|
return new Map();
|
|
},
|
|
async getAllAvailableModels() {
|
|
if (failModelListing) {
|
|
failModelListing = false;
|
|
throw new Error('models unavailable');
|
|
}
|
|
|
|
return new Map();
|
|
},
|
|
} as never,
|
|
{
|
|
async getAllModels() {
|
|
return [];
|
|
},
|
|
} as never,
|
|
{} as never,
|
|
{
|
|
getStatus() {
|
|
return {
|
|
localNode: null,
|
|
nodes: [],
|
|
models: {},
|
|
desiredDeployments: [],
|
|
};
|
|
},
|
|
} as never,
|
|
{
|
|
gpuDetector: {
|
|
async detectGpus() {
|
|
return [];
|
|
},
|
|
} as never,
|
|
},
|
|
);
|
|
|
|
await server.start();
|
|
|
|
try {
|
|
const failedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, {
|
|
headers: {
|
|
Authorization: 'Bearer valid-key',
|
|
},
|
|
});
|
|
assertEquals(failedModels.status, 500);
|
|
await failedModels.text();
|
|
|
|
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
|
|
const metricsBody = await metricsResponse.text();
|
|
assertEquals(
|
|
metricsBody.includes('modelgrid_api_server_errors_total{path="/v1/models"} 1'),
|
|
true,
|
|
);
|
|
} finally {
|
|
await server.stop();
|
|
}
|
|
});
|
|
|
|
Deno.test('ApiServer health reports degraded reasons', async () => {
|
|
const port = 19300 + Math.floor(Math.random() * 1000);
|
|
const server = new ApiServer(
|
|
{
|
|
host: '127.0.0.1',
|
|
port,
|
|
apiKeys: ['valid-key'],
|
|
cors: false,
|
|
corsOrigins: [],
|
|
},
|
|
{
|
|
async getAllStatus() {
|
|
return new Map([
|
|
['vllm-1', { running: false, health: 'unhealthy' }],
|
|
]);
|
|
},
|
|
async getAllAvailableModels() {
|
|
return new Map();
|
|
},
|
|
} as never,
|
|
{
|
|
async getAllModels() {
|
|
return [];
|
|
},
|
|
} as never,
|
|
{} as never,
|
|
{
|
|
getStatus() {
|
|
return {
|
|
localNode: null,
|
|
nodes: [],
|
|
models: {},
|
|
desiredDeployments: [],
|
|
};
|
|
},
|
|
} as never,
|
|
{
|
|
gpuDetector: {
|
|
async detectGpus() {
|
|
return [{ id: 'nvidia-0' }];
|
|
},
|
|
} as never,
|
|
},
|
|
);
|
|
|
|
await server.start();
|
|
|
|
try {
|
|
const response = await fetch(`http://127.0.0.1:${port}/health`);
|
|
const body = await response.json();
|
|
|
|
assertEquals(response.status, 503);
|
|
assertEquals(body.status, 'degraded');
|
|
assertEquals(body.reasons.includes('unhealthy_container'), true);
|
|
assertEquals(body.reasons.includes('no_models_available'), true);
|
|
} finally {
|
|
await server.stop();
|
|
}
|
|
});
|
|
|
|
Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
|
|
const port = 19200 + Math.floor(Math.random() * 1000);
|
|
const server = new ApiServer(
|
|
{
|
|
host: '127.0.0.1',
|
|
port,
|
|
apiKeys: ['valid-key'],
|
|
rateLimit: 2,
|
|
cors: false,
|
|
corsOrigins: [],
|
|
},
|
|
{
|
|
async getAllStatus() {
|
|
return new Map();
|
|
},
|
|
async getAllAvailableModels() {
|
|
return new Map([
|
|
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
|
|
]);
|
|
},
|
|
} as never,
|
|
{
|
|
async getAllModels() {
|
|
return [];
|
|
},
|
|
} as never,
|
|
{} as never,
|
|
{
|
|
getStatus() {
|
|
return {
|
|
localNode: null,
|
|
nodes: [],
|
|
models: {},
|
|
desiredDeployments: [],
|
|
};
|
|
},
|
|
} as never,
|
|
{
|
|
gpuDetector: {
|
|
async detectGpus() {
|
|
return [];
|
|
},
|
|
} as never,
|
|
},
|
|
);
|
|
|
|
await server.start();
|
|
|
|
try {
|
|
const requestHeaders = {
|
|
Authorization: 'Bearer valid-key',
|
|
};
|
|
|
|
const first = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
|
|
assertEquals(first.status, 200);
|
|
await first.text();
|
|
|
|
const second = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
|
|
assertEquals(second.status, 200);
|
|
await second.text();
|
|
|
|
const third = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
|
|
assertEquals(third.status, 429);
|
|
assertEquals((await third.json()).error.type, 'rate_limit_exceeded');
|
|
|
|
const health = await fetch(`http://127.0.0.1:${port}/health`);
|
|
assertEquals(health.status, 200);
|
|
await health.text();
|
|
|
|
const metrics = await fetch(`http://127.0.0.1:${port}/metrics`);
|
|
assertEquals(metrics.status, 200);
|
|
await metrics.text();
|
|
} finally {
|
|
await server.stop();
|
|
}
|
|
});
|