Files
modelgrid/test/api-server_test.ts
T

249 lines
6.4 KiB
TypeScript
Raw Normal View History

import { assertEquals } from 'jsr:@std/assert@^1.0.0';
import { ApiServer } from '../ts/api/server.ts';
Deno.test('ApiServer serves health metrics and authenticated model listings', async () => {
const port = 18100 + Math.floor(Math.random() * 1000);
const server = new ApiServer(
{
host: '127.0.0.1',
port,
apiKeys: ['valid-key'],
cors: false,
corsOrigins: [],
},
{
async getAllStatus() {
return new Map([
['vllm-1', { running: true, health: 'healthy' }],
]);
},
async getAllAvailableModels() {
return new Map([
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
]);
},
} as never,
{
async getAllModels() {
return [
{
id: 'meta-llama/Llama-3.1-8B-Instruct',
engine: 'vllm',
source: { repo: 'meta-llama/Llama-3.1-8B-Instruct' },
capabilities: { chat: true },
requirements: { minVramGb: 18 },
},
];
},
} as never,
{} as never,
{
getStatus() {
return {
localNode: null,
nodes: [],
models: {},
desiredDeployments: [],
};
},
} as never,
{
gpuDetector: {
async detectGpus() {
return [{ id: 'nvidia-0' }];
},
} as never,
},
);
await server.start();
try {
const healthResponse = await fetch(`http://127.0.0.1:${port}/health`);
const healthBody = await healthResponse.json();
assertEquals(healthResponse.status, 200);
assertEquals(healthBody.status, 'ok');
assertEquals(healthBody.models, 1);
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
const metricsBody = await metricsResponse.text();
assertEquals(metricsResponse.status, 200);
assertEquals(metricsBody.includes('modelgrid_uptime_seconds'), true);
assertEquals(metricsBody.includes('modelgrid_models_available 1'), true);
const unauthenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`);
const unauthenticatedBody = await unauthenticatedModels.json();
assertEquals(unauthenticatedModels.status, 401);
assertEquals(unauthenticatedBody.error.type, 'authentication_error');
const authenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, {
headers: {
Authorization: 'Bearer valid-key',
},
});
const authenticatedBody = await authenticatedModels.json();
assertEquals(authenticatedModels.status, 200);
assertEquals(authenticatedBody.object, 'list');
assertEquals(authenticatedBody.data[0].id, 'meta-llama/Llama-3.1-8B-Instruct');
const metricsAfterRequests = await fetch(`http://127.0.0.1:${port}/metrics`);
const metricsAfterRequestsBody = await metricsAfterRequests.text();
assertEquals(
metricsAfterRequestsBody.includes('modelgrid_api_requests_total{path="/v1/models"} 2'),
true,
);
assertEquals(
metricsAfterRequestsBody.includes('modelgrid_api_auth_failures_total{path="/v1/models"} 1'),
true,
);
} finally {
await server.stop();
}
});
Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async () => {
const port = 19100 + Math.floor(Math.random() * 1000);
let failModelListing = true;
const server = new ApiServer(
{
host: '127.0.0.1',
port,
apiKeys: ['valid-key'],
cors: false,
corsOrigins: [],
},
{
async getAllStatus() {
return new Map();
},
async getAllAvailableModels() {
if (failModelListing) {
failModelListing = false;
throw new Error('models unavailable');
}
return new Map();
},
} as never,
{
async getAllModels() {
return [];
},
} as never,
{} as never,
{
getStatus() {
return {
localNode: null,
nodes: [],
models: {},
desiredDeployments: [],
};
},
} as never,
{
gpuDetector: {
async detectGpus() {
return [];
},
} as never,
},
);
await server.start();
try {
const failedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, {
headers: {
Authorization: 'Bearer valid-key',
},
});
assertEquals(failedModels.status, 500);
await failedModels.text();
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
const metricsBody = await metricsResponse.text();
assertEquals(
metricsBody.includes('modelgrid_api_server_errors_total{path="/v1/models"} 1'),
true,
);
} finally {
await server.stop();
}
});
Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
const port = 19200 + Math.floor(Math.random() * 1000);
const server = new ApiServer(
{
host: '127.0.0.1',
port,
apiKeys: ['valid-key'],
rateLimit: 2,
cors: false,
corsOrigins: [],
},
{
async getAllStatus() {
return new Map();
},
async getAllAvailableModels() {
return new Map();
},
} as never,
{
async getAllModels() {
return [];
},
} as never,
{} as never,
{
getStatus() {
return {
localNode: null,
nodes: [],
models: {},
desiredDeployments: [],
};
},
} as never,
{
gpuDetector: {
async detectGpus() {
return [];
},
} as never,
},
);
await server.start();
try {
const requestHeaders = {
Authorization: 'Bearer valid-key',
};
const first = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
assertEquals(first.status, 200);
await first.text();
const second = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
assertEquals(second.status, 200);
await second.text();
const third = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
assertEquals(third.status, 429);
assertEquals((await third.json()).error.type, 'rate_limit_exceeded');
const health = await fetch(`http://127.0.0.1:${port}/health`);
assertEquals(health.status, 200);
await health.text();
const metrics = await fetch(`http://127.0.0.1:${port}/metrics`);
assertEquals(metrics.status, 200);
await metrics.text();
} finally {
await server.stop();
}
});