fix(api): map upstream timeouts to 504 responses
This commit is contained in:
@@ -0,0 +1,120 @@
|
||||
import { assertEquals } from 'jsr:@std/assert@^1.0.0';
|
||||
import { ChatHandler } from '../ts/api/handlers/chat.ts';
|
||||
import { EmbeddingsHandler } from '../ts/api/handlers/embeddings.ts';
|
||||
import { UpstreamTimeoutError } from '../ts/containers/base-container.ts';
|
||||
|
||||
class TestResponse {
|
||||
public statusCode = 200;
|
||||
public headers: Record<string, string> = {};
|
||||
public body = '';
|
||||
|
||||
public writeHead(statusCode: number, headers: Record<string, string>): TestResponse {
|
||||
this.statusCode = statusCode;
|
||||
this.headers = headers;
|
||||
return this;
|
||||
}
|
||||
|
||||
public end(body = ''): TestResponse {
|
||||
this.body = body;
|
||||
return this;
|
||||
}
|
||||
|
||||
public write(_chunk: string | Uint8Array): boolean {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
Deno.test('ChatHandler maps upstream timeouts to 504 responses', async () => {
|
||||
const handler = new ChatHandler(
|
||||
{
|
||||
async findContainerForModel() {
|
||||
return {
|
||||
async chatCompletion() {
|
||||
throw new UpstreamTimeoutError();
|
||||
},
|
||||
async chatCompletionStream() {
|
||||
throw new UpstreamTimeoutError();
|
||||
},
|
||||
};
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async getModel(modelName: string) {
|
||||
return { id: modelName };
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async loadModel() {
|
||||
return { success: false };
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
shouldDeployLocallyFirst() {
|
||||
return false;
|
||||
},
|
||||
} as never,
|
||||
);
|
||||
|
||||
const response = new TestResponse();
|
||||
await handler.handleChatCompletion(
|
||||
{ headers: {} } as never,
|
||||
response as never,
|
||||
{ model: 'meta-llama/Llama-3.1-8B-Instruct', messages: [{ role: 'user', content: 'hi' }] },
|
||||
);
|
||||
|
||||
assertEquals(response.statusCode, 504);
|
||||
assertEquals(JSON.parse(response.body).error.type, 'upstream_timeout');
|
||||
});
|
||||
|
||||
Deno.test('EmbeddingsHandler maps upstream timeouts to 504 responses', async () => {
|
||||
const originalFetch = globalThis.fetch;
|
||||
globalThis.fetch = async () => {
|
||||
const error = new Error('request aborted');
|
||||
error.name = 'AbortError';
|
||||
throw error;
|
||||
};
|
||||
|
||||
try {
|
||||
const handler = new EmbeddingsHandler(
|
||||
{
|
||||
async findContainerForModel() {
|
||||
return null;
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async getModel(modelName: string) {
|
||||
return { id: modelName };
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async ensureModelViaControlPlane(modelName: string) {
|
||||
return {
|
||||
location: {
|
||||
modelId: modelName,
|
||||
nodeName: 'worker-a',
|
||||
endpoint: 'http://worker-a:8080',
|
||||
healthy: true,
|
||||
engine: 'vllm',
|
||||
containerId: 'remote',
|
||||
},
|
||||
};
|
||||
},
|
||||
getLocalNodeName() {
|
||||
return 'control';
|
||||
},
|
||||
} as never,
|
||||
);
|
||||
|
||||
const response = new TestResponse();
|
||||
await handler.handleEmbeddings(
|
||||
{ headers: {} } as never,
|
||||
response as never,
|
||||
{ model: 'BAAI/bge-m3', input: 'hello' },
|
||||
);
|
||||
|
||||
assertEquals(response.statusCode, 504);
|
||||
assertEquals(JSON.parse(response.body).error.type, 'upstream_timeout');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
Reference in New Issue
Block a user