532 lines
18 KiB
TypeScript
532 lines
18 KiB
TypeScript
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|
import { MockLanguageModelV3, convertArrayToReadableStream } from 'ai/test';
|
|
import * as smartagent from '../ts/index.js';
|
|
import { createBrowserTools, createFilesystemTools, createShellTools, filesystemTool, shellTool, httpTool, jsonTool, truncateOutput } from '../ts_tools/index.js';
|
|
import { compactMessages } from '../ts_compaction/index.js';
|
|
|
|
const createUsage = (inputTokens: number, outputTokens: number) => ({
|
|
inputTokens: {
|
|
total: inputTokens,
|
|
noCache: inputTokens,
|
|
cacheRead: 0,
|
|
cacheWrite: 0,
|
|
},
|
|
outputTokens: {
|
|
total: outputTokens,
|
|
text: outputTokens,
|
|
reasoning: 0,
|
|
},
|
|
});
|
|
|
|
const createTextStreamResult = (text: string) => ({
|
|
stream: convertArrayToReadableStream([
|
|
{ type: 'stream-start', warnings: [] },
|
|
{ type: 'response-metadata', id: 'response-1', timestamp: new Date(0), modelId: 'mock-model' },
|
|
{ type: 'text-start', id: 'text-1' },
|
|
{ type: 'text-delta', id: 'text-1', delta: text },
|
|
{ type: 'text-end', id: 'text-1' },
|
|
{
|
|
type: 'finish',
|
|
finishReason: { unified: 'stop', raw: 'stop' },
|
|
usage: createUsage(1, 1),
|
|
},
|
|
] as any[]),
|
|
});
|
|
|
|
const createReasoningStreamResult = (reasoning: string, text: string) => ({
|
|
stream: convertArrayToReadableStream([
|
|
{ type: 'stream-start', warnings: [] },
|
|
{ type: 'response-metadata', id: 'response-1', timestamp: new Date(0), modelId: 'mock-model' },
|
|
{ type: 'reasoning-start', id: 'reasoning-1' },
|
|
{ type: 'reasoning-delta', id: 'reasoning-1', delta: reasoning.slice(0, 7) },
|
|
{ type: 'reasoning-delta', id: 'reasoning-1', delta: reasoning.slice(7) },
|
|
{ type: 'reasoning-end', id: 'reasoning-1' },
|
|
{ type: 'text-start', id: 'text-1' },
|
|
{ type: 'text-delta', id: 'text-1', delta: text },
|
|
{ type: 'text-end', id: 'text-1' },
|
|
{
|
|
type: 'finish',
|
|
finishReason: { unified: 'stop', raw: 'stop' },
|
|
usage: createUsage(2, 2),
|
|
},
|
|
] as any[]),
|
|
});
|
|
|
|
const createToolCallStreamResult = (toolName: string, input: unknown) => ({
|
|
stream: convertArrayToReadableStream([
|
|
{ type: 'stream-start', warnings: [] },
|
|
{ type: 'response-metadata', id: 'response-1', timestamp: new Date(0), modelId: 'mock-model' },
|
|
{
|
|
type: 'tool-call',
|
|
toolCallId: 'tool-call-1',
|
|
toolName,
|
|
input: JSON.stringify(input),
|
|
},
|
|
{
|
|
type: 'finish',
|
|
finishReason: { unified: 'tool-calls', raw: 'tool-calls' },
|
|
usage: createUsage(2, 1),
|
|
},
|
|
] as any[]),
|
|
});
|
|
|
|
// ============================================================
|
|
// Core exports
|
|
// ============================================================
|
|
|
|
tap.test('should export runAgent function', async () => {
|
|
expect(smartagent.runAgent).toBeTypeOf('function');
|
|
});
|
|
|
|
tap.test('should export ToolRegistry class', async () => {
|
|
expect(smartagent.ToolRegistry).toBeTypeOf('function');
|
|
});
|
|
|
|
tap.test('should export ContextOverflowError class', async () => {
|
|
expect(smartagent.ContextOverflowError).toBeTypeOf('function');
|
|
});
|
|
|
|
tap.test('should export truncateOutput function', async () => {
|
|
expect(smartagent.truncateOutput).toBeTypeOf('function');
|
|
});
|
|
|
|
tap.test('should re-export tool helper', async () => {
|
|
expect(smartagent.tool).toBeTypeOf('function');
|
|
});
|
|
|
|
tap.test('should re-export z (zod)', async () => {
|
|
expect(smartagent.z).toBeTruthy();
|
|
});
|
|
|
|
tap.test('should re-export stepCountIs', async () => {
|
|
expect(smartagent.stepCountIs).toBeTypeOf('function');
|
|
});
|
|
|
|
tap.test('runAgent should forward providerOptions to streamText', async () => {
|
|
const model = new MockLanguageModelV3({
|
|
doStream: async () => createTextStreamResult('ok') as any,
|
|
});
|
|
|
|
const providerOptions = {
|
|
openai: {
|
|
reasoningEffort: 'xhigh',
|
|
},
|
|
} as const;
|
|
|
|
const result = await smartagent.runAgent({
|
|
model,
|
|
prompt: 'hello',
|
|
providerOptions,
|
|
});
|
|
|
|
expect(result.text).toEqual('ok');
|
|
expect((model.doStreamCalls[0].providerOptions as any).openai.reasoningEffort).toEqual('xhigh');
|
|
});
|
|
|
|
tap.test('runAgent should add OpenAI cache defaults when sessionId is provided', async () => {
|
|
const model = new MockLanguageModelV3({
|
|
provider: 'openai',
|
|
modelId: 'gpt-5',
|
|
doStream: async () => createTextStreamResult('ok') as any,
|
|
});
|
|
|
|
const result = await smartagent.runAgent({
|
|
model,
|
|
prompt: 'hello',
|
|
sessionId: 'session-123',
|
|
providerOptions: {
|
|
openai: {
|
|
reasoningEffort: 'high',
|
|
},
|
|
} as any,
|
|
});
|
|
|
|
const openaiOptions = (model.doStreamCalls[0].providerOptions as any).openai;
|
|
|
|
expect(result.text).toEqual('ok');
|
|
expect(openaiOptions.store).toEqual(false);
|
|
expect(openaiOptions.promptCacheKey).toEqual('session-123');
|
|
expect(openaiOptions.promptCacheRetention).toEqual('in_memory');
|
|
expect(openaiOptions.reasoningEffort).toEqual('high');
|
|
});
|
|
|
|
tap.test('runAgent should stream reasoning summary callbacks', async () => {
|
|
const reasoningEvents: string[] = [];
|
|
const tokenDeltas: string[] = [];
|
|
const model = new MockLanguageModelV3({
|
|
doStream: async () => createReasoningStreamResult('thinking through it', 'done') as any,
|
|
});
|
|
|
|
const result = await smartagent.runAgent({
|
|
model,
|
|
prompt: 'hello',
|
|
onToken: (delta) => tokenDeltas.push(delta),
|
|
onReasoningStart: (id) => reasoningEvents.push('start:' + id),
|
|
onReasoningDelta: (id, delta) => reasoningEvents.push('delta:' + id + ':' + delta),
|
|
onReasoningEnd: (id, text) => reasoningEvents.push('end:' + id + ':' + text),
|
|
});
|
|
|
|
expect(result.text).toEqual('done');
|
|
expect(tokenDeltas.join('')).toEqual('done');
|
|
expect(reasoningEvents).toEqual([
|
|
'start:reasoning-1',
|
|
'delta:reasoning-1:thinkin',
|
|
'delta:reasoning-1:g through it',
|
|
'end:reasoning-1:thinking through it',
|
|
]);
|
|
});
|
|
|
|
tap.test('runAgent should mark Anthropic prompt cache breakpoints by default', async () => {
|
|
const model = new MockLanguageModelV3({
|
|
provider: 'anthropic',
|
|
modelId: 'claude-sonnet-4-5-20250929',
|
|
doStream: async () => createTextStreamResult('ok') as any,
|
|
});
|
|
|
|
const result = await smartagent.runAgent({
|
|
model,
|
|
system: 'stable system prompt',
|
|
prompt: 'hello',
|
|
});
|
|
const prompt = model.doStreamCalls[0].prompt as any[];
|
|
const systemMessage = prompt.find((message) => message.role === 'system');
|
|
const userMessage = prompt.find((message) => message.role === 'user');
|
|
|
|
expect(result.text).toEqual('ok');
|
|
expect(systemMessage.providerOptions?.anthropic?.cacheControl?.type).toEqual('ephemeral');
|
|
expect(userMessage.providerOptions?.anthropic?.cacheControl?.type).toEqual('ephemeral');
|
|
});
|
|
|
|
tap.test('runAgent should allow cache defaults to be disabled', async () => {
|
|
const model = new MockLanguageModelV3({
|
|
provider: 'openai',
|
|
modelId: 'gpt-5',
|
|
doStream: async () => createTextStreamResult('ok') as any,
|
|
});
|
|
|
|
await smartagent.runAgent({
|
|
model,
|
|
prompt: 'hello',
|
|
sessionId: 'session-123',
|
|
cache: false,
|
|
});
|
|
|
|
expect(model.doStreamCalls[0].providerOptions).toBeUndefined();
|
|
});
|
|
|
|
tap.test('runAgent should return final tool call records', async () => {
|
|
let streamCallCount = 0;
|
|
const callbackToolCalls: Array<{ name: string; input: unknown }> = [];
|
|
const callbackToolResults: Array<{ name: string; result: unknown }> = [];
|
|
const model = new MockLanguageModelV3({
|
|
doStream: async () => {
|
|
streamCallCount++;
|
|
return streamCallCount === 1
|
|
? createToolCallStreamResult('echo', { text: 'hello' }) as any
|
|
: createTextStreamResult('saved') as any;
|
|
},
|
|
});
|
|
|
|
const result = await smartagent.runAgent({
|
|
model,
|
|
prompt: 'echo hello',
|
|
tools: {
|
|
echo: smartagent.tool({
|
|
description: 'Echo text',
|
|
inputSchema: smartagent.z.object({ text: smartagent.z.string() }),
|
|
execute: async ({ text }: { text: string }) => `saved:${text}`,
|
|
}),
|
|
},
|
|
maxSteps: 5,
|
|
onToolCall: (name, input) => callbackToolCalls.push({ name, input }),
|
|
onToolResult: (name, result) => callbackToolResults.push({ name, result }),
|
|
});
|
|
|
|
const echoCall = result.toolCalls.find((toolCall) => toolCall.toolName === 'echo');
|
|
|
|
expect(result.text).toEqual('saved');
|
|
expect(echoCall).toBeTruthy();
|
|
expect(echoCall!.input).toEqual({ text: 'hello' });
|
|
expect(echoCall!.output).toEqual('saved:hello');
|
|
expect(callbackToolCalls[0]).toEqual({ name: 'echo', input: { text: 'hello' } });
|
|
expect(callbackToolResults[0]).toEqual({ name: 'echo', result: 'saved:hello' });
|
|
});
|
|
|
|
tap.test('runAgent should reprompt when validateCompletion returns a string', async () => {
|
|
let streamCallCount = 0;
|
|
let validationCallCount = 0;
|
|
const model = new MockLanguageModelV3({
|
|
doStream: async () => {
|
|
streamCallCount++;
|
|
return createTextStreamResult(streamCallCount === 1 ? 'incomplete' : 'complete') as any;
|
|
},
|
|
});
|
|
|
|
const result = await smartagent.runAgent({
|
|
model,
|
|
prompt: 'process document',
|
|
maxValidationRetries: 1,
|
|
validateCompletion: (runResult) => {
|
|
validationCallCount++;
|
|
return runResult.text === 'complete' ? undefined : 'Call a save tool before finalizing.';
|
|
},
|
|
});
|
|
|
|
expect(result.text).toEqual('complete');
|
|
expect(validationCallCount).toEqual(2);
|
|
expect(model.doStreamCalls.length).toEqual(2);
|
|
expect(JSON.stringify(model.doStreamCalls[1].prompt)).toInclude('Call a save tool before finalizing.');
|
|
});
|
|
|
|
tap.test('runAgent should reject when validation retries are exhausted', async () => {
|
|
let threw = false;
|
|
const model = new MockLanguageModelV3({
|
|
doStream: async () => createTextStreamResult('incomplete') as any,
|
|
});
|
|
|
|
try {
|
|
await smartagent.runAgent({
|
|
model,
|
|
prompt: 'process document',
|
|
validateCompletion: () => 'Missing required save tool call.',
|
|
});
|
|
} catch (error) {
|
|
threw = true;
|
|
expect((error as Error).message).toInclude('Missing required save tool call.');
|
|
}
|
|
|
|
expect(threw).toBeTrue();
|
|
});
|
|
|
|
// ============================================================
|
|
// ToolRegistry
|
|
// ============================================================
|
|
|
|
tap.test('ToolRegistry should register and return tools', async () => {
|
|
const registry = new smartagent.ToolRegistry();
|
|
const echoTool = smartagent.tool({
|
|
description: 'Echo tool',
|
|
inputSchema: smartagent.z.object({ text: smartagent.z.string() }),
|
|
execute: async ({ text }: { text: string }) => text,
|
|
});
|
|
registry.register('echo', echoTool);
|
|
const tools = registry.getTools();
|
|
expect(Object.keys(tools)).toContain('echo');
|
|
});
|
|
|
|
// ============================================================
|
|
// Truncation
|
|
// ============================================================
|
|
|
|
tap.test('truncateOutput should not truncate short strings', async () => {
|
|
const result = truncateOutput('hello world');
|
|
expect(result.truncated).toBeFalse();
|
|
expect(result.content).toEqual('hello world');
|
|
});
|
|
|
|
tap.test('truncateOutput should truncate strings over maxLines', async () => {
|
|
const lines = Array.from({ length: 3000 }, (_, i) => `line ${i}`).join('\n');
|
|
const result = truncateOutput(lines, { maxLines: 100 });
|
|
expect(result.truncated).toBeTrue();
|
|
expect(result.notice).toBeTruthy();
|
|
expect(result.content).toInclude('[Output truncated');
|
|
});
|
|
|
|
tap.test('truncateOutput should truncate strings over maxBytes', async () => {
|
|
const big = 'x'.repeat(100_000);
|
|
const result = truncateOutput(big, { maxBytes: 1000 });
|
|
expect(result.truncated).toBeTrue();
|
|
});
|
|
|
|
// ============================================================
|
|
// Tool factories
|
|
// ============================================================
|
|
|
|
tap.test('filesystemTool returns expected tool names', async () => {
|
|
const tools = filesystemTool();
|
|
const names = Object.keys(tools);
|
|
expect(names).toContain('read_file');
|
|
expect(names).toContain('write_file');
|
|
expect(names).toContain('list_directory');
|
|
expect(names).toContain('delete_file');
|
|
});
|
|
|
|
tap.test('shellTool returns expected tool names', async () => {
|
|
const tools = shellTool();
|
|
const names = Object.keys(tools);
|
|
expect(names).toContain('run_command');
|
|
});
|
|
|
|
tap.test('createShellTools should execute through supplied context', async () => {
|
|
const permissions: unknown[] = [];
|
|
const calls: unknown[] = [];
|
|
const tools = createShellTools({
|
|
cwd: '/workspace',
|
|
requestPermission: async (request) => {
|
|
permissions.push(request);
|
|
},
|
|
shell: {
|
|
run: async (command, options) => {
|
|
calls.push({ command, options });
|
|
return { exitCode: 0, stdout: 'context-output', stderr: '' };
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await (tools.run_command as any).execute({ command: 'echo test', timeoutMs: 1234 });
|
|
|
|
expect(result).toEqual('context-output');
|
|
expect(calls[0]).toEqual({ command: 'echo test', options: { cwd: '/workspace', timeoutMs: 1234, abortSignal: undefined } });
|
|
expect(JSON.stringify(permissions[0])).toInclude('echo test');
|
|
});
|
|
|
|
tap.test('httpTool returns expected tool names', async () => {
|
|
const tools = httpTool();
|
|
const names = Object.keys(tools);
|
|
expect(names).toContain('http_get');
|
|
expect(names).toContain('http_post');
|
|
});
|
|
|
|
tap.test('jsonTool returns expected tool names', async () => {
|
|
const tools = jsonTool();
|
|
const names = Object.keys(tools);
|
|
expect(names).toContain('json_validate');
|
|
expect(names).toContain('json_transform');
|
|
});
|
|
|
|
tap.test('createFilesystemTools should execute through supplied context', async () => {
|
|
const permissions: unknown[] = [];
|
|
const files = new Map<string, string>([['hello.txt', 'line1\nline2\nline3']]);
|
|
const tools = createFilesystemTools({
|
|
requestPermission: async (request) => {
|
|
permissions.push(request);
|
|
},
|
|
fs: {
|
|
readFile: async (filePath, options) => {
|
|
const content = files.get(filePath) ?? '';
|
|
if (options?.startLine || options?.endLine) {
|
|
const lines = content.split('\n');
|
|
return lines.slice((options.startLine ?? 1) - 1, options.endLine ?? lines.length).join('\n');
|
|
}
|
|
return content;
|
|
},
|
|
writeFile: async (filePath, content) => {
|
|
files.set(filePath, content);
|
|
return 'written';
|
|
},
|
|
listDirectory: async () => [...files.keys()],
|
|
},
|
|
}, { includeDelete: false });
|
|
|
|
const readResult = await (tools.read_file as any).execute({ path: 'hello.txt', startLine: 2, endLine: 2 });
|
|
const writeResult = await (tools.write_file as any).execute({ path: 'created.txt', content: 'created' });
|
|
const listResult = await (tools.list_directory as any).execute({ path: '.' });
|
|
|
|
expect(readResult).toEqual('line2');
|
|
expect(writeResult).toEqual('written');
|
|
expect(listResult).toInclude('created.txt');
|
|
expect(Object.keys(tools)).not.toContain('delete_file');
|
|
expect(JSON.stringify(permissions[0])).toInclude('created.txt');
|
|
});
|
|
|
|
tap.test('createBrowserTools should execute through supplied browser context', async () => {
|
|
const permissions: unknown[] = [];
|
|
const calls: unknown[] = [];
|
|
const tools = createBrowserTools({
|
|
requestPermission: async (request) => {
|
|
permissions.push(request);
|
|
},
|
|
browser: {
|
|
execute: async (input, options) => {
|
|
calls.push({ input, options });
|
|
return `browser:${input.action}:${input.url ?? ''}`;
|
|
},
|
|
},
|
|
});
|
|
|
|
const result = await (tools.browser as any).execute({ action: 'navigate', url: 'https://example.com', timeoutMs: 500 });
|
|
|
|
expect(result).toEqual('browser:navigate:https://example.com');
|
|
expect(JSON.stringify(calls[0])).toInclude('navigate');
|
|
expect(JSON.stringify(permissions[0])).toInclude('https://example.com');
|
|
});
|
|
|
|
tap.test('json_validate tool should validate valid JSON', async () => {
|
|
const tools = jsonTool();
|
|
const result = await (tools.json_validate as any).execute({
|
|
jsonString: '{"name":"test","value":42}',
|
|
});
|
|
expect(result).toInclude('Valid JSON');
|
|
});
|
|
|
|
tap.test('json_validate tool should detect invalid JSON', async () => {
|
|
const tools = jsonTool();
|
|
const result = await (tools.json_validate as any).execute({
|
|
jsonString: '{invalid json',
|
|
});
|
|
expect(result).toInclude('Invalid JSON');
|
|
});
|
|
|
|
tap.test('json_validate tool should check required fields', async () => {
|
|
const tools = jsonTool();
|
|
const result = await (tools.json_validate as any).execute({
|
|
jsonString: '{"name":"test"}',
|
|
requiredFields: ['name', 'missing_field'],
|
|
});
|
|
expect(result).toInclude('missing_field');
|
|
});
|
|
|
|
tap.test('json_transform tool should pretty-print JSON', async () => {
|
|
const tools = jsonTool();
|
|
const result = await (tools.json_transform as any).execute({
|
|
jsonString: '{"a":1,"b":2}',
|
|
});
|
|
expect(result).toInclude(' "a": 1');
|
|
});
|
|
|
|
// ============================================================
|
|
// Compaction export
|
|
// ============================================================
|
|
|
|
tap.test('compactMessages should be a function', async () => {
|
|
expect(compactMessages).toBeTypeOf('function');
|
|
});
|
|
|
|
// ============================================================
|
|
// Filesystem tool read/write round-trip
|
|
// ============================================================
|
|
|
|
tap.test('filesystem tool should write and read a file', async () => {
|
|
const tmpDir = '/tmp/smartagent-test-' + Date.now();
|
|
const tools = filesystemTool({ rootDir: tmpDir });
|
|
|
|
await (tools.write_file as any).execute({
|
|
path: tmpDir + '/hello.txt',
|
|
content: 'Hello, world!',
|
|
});
|
|
|
|
const content = await (tools.read_file as any).execute({
|
|
path: tmpDir + '/hello.txt',
|
|
});
|
|
expect(content).toInclude('Hello, world!');
|
|
|
|
// Cleanup
|
|
await (tools.delete_file as any).execute({
|
|
path: tmpDir + '/hello.txt',
|
|
});
|
|
});
|
|
|
|
tap.test('filesystem tool should enforce rootDir restriction', async () => {
|
|
const tools = filesystemTool({ rootDir: '/tmp/restricted' });
|
|
let threw = false;
|
|
try {
|
|
await (tools.read_file as any).execute({ path: '/etc/passwd' });
|
|
} catch (e) {
|
|
threw = true;
|
|
expect((e as Error).message).toInclude('Access denied');
|
|
}
|
|
expect(threw).toBeTrue();
|
|
});
|
|
|
|
export default tap.start();
|