BREAKING CHANGE(api): Migrate public API to ai-sdk v6 and refactor core agent architecture: replace class-based DualAgent/Driver/Guardian with a single runAgent function; introduce ts_tools factories for tools, a compactMessages compaction subpath, and truncateOutput utility; simplify ToolRegistry to return ToolSet and remove legacy BaseToolWrapper/tool classes; update package exports and dependencies and bump major version.
This commit is contained in:
239
test/test.agent-e2e.ts
Normal file
239
test/test.agent-e2e.ts
Normal file
@@ -0,0 +1,239 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as path from 'path';
|
||||
import * as fs from 'fs';
|
||||
import { runAgent, tool, z, ToolRegistry } from '../ts/index.js';
|
||||
import { filesystemTool, shellTool } from '../ts_tools/index.js';
|
||||
|
||||
const testQenv = new qenv.Qenv('./', './.nogit/');
|
||||
|
||||
let model: any;
|
||||
const workDir = '/tmp/smartagent-e2e-' + Date.now();
|
||||
|
||||
tap.test('setup: create model and workspace', async () => {
|
||||
const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN');
|
||||
if (!apiKey) {
|
||||
console.log('ANTHROPIC_TOKEN not set — skipping all E2E tests');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const { getModel } = await import('@push.rocks/smartai');
|
||||
model = getModel({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
apiKey,
|
||||
});
|
||||
|
||||
fs.mkdirSync(workDir, { recursive: true });
|
||||
console.log(` Workspace: ${workDir}`);
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Test 1: Simple tool call
|
||||
// ============================================================
|
||||
|
||||
tap.test('agent should call a single tool and incorporate the result', async () => {
|
||||
let toolCalled = false;
|
||||
|
||||
const result = await runAgent({
|
||||
model,
|
||||
prompt: 'What is the current time? Use the get_time tool.',
|
||||
system: 'You are a helpful assistant. Use tools when asked.',
|
||||
tools: {
|
||||
get_time: tool({
|
||||
description: 'Returns the current ISO timestamp',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => {
|
||||
toolCalled = true;
|
||||
return new Date().toISOString();
|
||||
},
|
||||
}),
|
||||
},
|
||||
maxSteps: 5,
|
||||
});
|
||||
|
||||
console.log(` Response: ${result.text.substring(0, 150)}`);
|
||||
console.log(` Steps: ${result.steps}, Tokens: ${result.usage.totalTokens}`);
|
||||
|
||||
expect(toolCalled).toBeTrue();
|
||||
expect(result.text).toBeTruthy();
|
||||
expect(result.usage.totalTokens).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Test 2: Multiple tools — agent chooses which to use
|
||||
// ============================================================
|
||||
|
||||
tap.test('agent should pick the right tool from multiple options', async () => {
|
||||
const callLog: string[] = [];
|
||||
|
||||
const result = await runAgent({
|
||||
model,
|
||||
prompt: 'Add 7 and 35 using the calculator tool.',
|
||||
system: 'You are a helpful assistant. Use the appropriate tool to answer.',
|
||||
tools: {
|
||||
calculator: tool({
|
||||
description: 'Perform arithmetic. Supports add, subtract, multiply, divide.',
|
||||
inputSchema: z.object({
|
||||
operation: z.enum(['add', 'subtract', 'multiply', 'divide']),
|
||||
a: z.number(),
|
||||
b: z.number(),
|
||||
}),
|
||||
execute: async ({ operation, a, b }: { operation: string; a: number; b: number }) => {
|
||||
callLog.push(`calculator:${operation}(${a}, ${b})`);
|
||||
switch (operation) {
|
||||
case 'add': return String(a + b);
|
||||
case 'subtract': return String(a - b);
|
||||
case 'multiply': return String(a * b);
|
||||
case 'divide': return b !== 0 ? String(a / b) : 'Error: division by zero';
|
||||
default: return 'Unknown operation';
|
||||
}
|
||||
},
|
||||
}),
|
||||
get_weather: tool({
|
||||
description: 'Get current weather for a city',
|
||||
inputSchema: z.object({ city: z.string() }),
|
||||
execute: async () => {
|
||||
callLog.push('get_weather');
|
||||
return 'Sunny, 22°C';
|
||||
},
|
||||
}),
|
||||
},
|
||||
maxSteps: 5,
|
||||
});
|
||||
|
||||
console.log(` Tool calls: ${callLog.join(', ')}`);
|
||||
console.log(` Response: ${result.text.substring(0, 150)}`);
|
||||
|
||||
expect(callLog.some((c) => c.startsWith('calculator:add'))).toBeTrue();
|
||||
expect(callLog).not.toContain('get_weather');
|
||||
expect(result.text).toInclude('42');
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Test 3: Multi-step — agent uses filesystem tools
|
||||
// ============================================================
|
||||
|
||||
tap.test('agent should use filesystem tools for a multi-step task', async () => {
|
||||
const fsTools = filesystemTool({ rootDir: workDir });
|
||||
|
||||
const result = await runAgent({
|
||||
model,
|
||||
prompt: `Create a file called "greeting.txt" in ${workDir} with the content "Hello from smartagent!". Then read it back and tell me what it says.`,
|
||||
system: 'You are a helpful assistant that works with files. Use the provided tools.',
|
||||
tools: fsTools,
|
||||
maxSteps: 10,
|
||||
});
|
||||
|
||||
console.log(` Steps: ${result.steps}`);
|
||||
console.log(` Response: ${result.text.substring(0, 200)}`);
|
||||
|
||||
// Verify the file was actually created
|
||||
const filePath = path.join(workDir, 'greeting.txt');
|
||||
expect(fs.existsSync(filePath)).toBeTrue();
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
expect(content).toInclude('Hello from smartagent');
|
||||
expect(result.steps).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Test 4: ToolRegistry usage
|
||||
// ============================================================
|
||||
|
||||
tap.test('agent should work with ToolRegistry', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
|
||||
registry.register('random_number', tool({
|
||||
description: 'Generate a random integer between min and max (inclusive)',
|
||||
inputSchema: z.object({
|
||||
min: z.number().describe('Minimum value'),
|
||||
max: z.number().describe('Maximum value'),
|
||||
}),
|
||||
execute: async ({ min, max }: { min: number; max: number }) => {
|
||||
const value = Math.floor(Math.random() * (max - min + 1)) + min;
|
||||
return String(value);
|
||||
},
|
||||
}));
|
||||
|
||||
registry.register('is_even', tool({
|
||||
description: 'Check if a number is even',
|
||||
inputSchema: z.object({ number: z.number() }),
|
||||
execute: async ({ number: n }: { number: number }) => {
|
||||
return n % 2 === 0 ? 'Yes, it is even' : 'No, it is odd';
|
||||
},
|
||||
}));
|
||||
|
||||
const result = await runAgent({
|
||||
model,
|
||||
prompt: 'Generate a random number between 1 and 100, then check if it is even or odd. Tell me both the number and whether it is even.',
|
||||
system: 'You are a helpful assistant. Use tools step by step.',
|
||||
tools: registry.getTools(),
|
||||
maxSteps: 10,
|
||||
});
|
||||
|
||||
console.log(` Response: ${result.text.substring(0, 200)}`);
|
||||
expect(result.text).toBeTruthy();
|
||||
expect(result.steps).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Test 5: Streaming callbacks
|
||||
// ============================================================
|
||||
|
||||
tap.test('agent should fire onToken and onToolCall callbacks', async () => {
|
||||
const tokens: string[] = [];
|
||||
const toolCalls: string[] = [];
|
||||
|
||||
const result = await runAgent({
|
||||
model,
|
||||
prompt: 'Use the echo tool to echo "test123".',
|
||||
system: 'You are a helpful assistant. Use tools when asked.',
|
||||
tools: {
|
||||
echo: tool({
|
||||
description: 'Echo back the provided text',
|
||||
inputSchema: z.object({ text: z.string() }),
|
||||
execute: async ({ text }: { text: string }) => text,
|
||||
}),
|
||||
},
|
||||
maxSteps: 5,
|
||||
onToken: (delta) => tokens.push(delta),
|
||||
onToolCall: (name) => toolCalls.push(name),
|
||||
});
|
||||
|
||||
console.log(` Streamed ${tokens.length} token chunks`);
|
||||
console.log(` Tool calls observed: ${toolCalls.join(', ')}`);
|
||||
|
||||
expect(tokens.length).toBeGreaterThan(0);
|
||||
expect(toolCalls).toContain('echo');
|
||||
expect(result.text).toInclude('test123');
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Test 6: Shell tool integration
|
||||
// ============================================================
|
||||
|
||||
tap.test('agent should use shell tool to run a command', async () => {
|
||||
const tools = shellTool();
|
||||
|
||||
const result = await runAgent({
|
||||
model,
|
||||
prompt: `Run the command "echo hello_smartagent" and tell me what it outputs.`,
|
||||
system: 'You are a helpful assistant that can run shell commands.',
|
||||
tools,
|
||||
maxSteps: 5,
|
||||
});
|
||||
|
||||
console.log(` Response: ${result.text.substring(0, 200)}`);
|
||||
expect(result.text).toInclude('hello_smartagent');
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Cleanup
|
||||
// ============================================================
|
||||
|
||||
tap.test('cleanup: remove workspace', async () => {
|
||||
fs.rmSync(workDir, { recursive: true, force: true });
|
||||
console.log(` Cleaned up ${workDir}`);
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
294
test/test.ts
294
test/test.ts
@@ -1,150 +1,188 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as smartagent from '../ts/index.js';
|
||||
import { filesystemTool, shellTool, httpTool, jsonTool, truncateOutput } from '../ts_tools/index.js';
|
||||
import { compactMessages } from '../ts_compaction/index.js';
|
||||
|
||||
// Test exports
|
||||
tap.test('should export DualAgentOrchestrator class', async () => {
|
||||
expect(smartagent.DualAgentOrchestrator).toBeTypeOf('function');
|
||||
// ============================================================
|
||||
// Core exports
|
||||
// ============================================================
|
||||
|
||||
tap.test('should export runAgent function', async () => {
|
||||
expect(smartagent.runAgent).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
tap.test('should export DriverAgent class', async () => {
|
||||
expect(smartagent.DriverAgent).toBeTypeOf('function');
|
||||
tap.test('should export ToolRegistry class', async () => {
|
||||
expect(smartagent.ToolRegistry).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
tap.test('should export GuardianAgent class', async () => {
|
||||
expect(smartagent.GuardianAgent).toBeTypeOf('function');
|
||||
tap.test('should export ContextOverflowError class', async () => {
|
||||
expect(smartagent.ContextOverflowError).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
tap.test('should export BaseToolWrapper class', async () => {
|
||||
expect(smartagent.BaseToolWrapper).toBeTypeOf('function');
|
||||
tap.test('should export truncateOutput function', async () => {
|
||||
expect(smartagent.truncateOutput).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
// Test standard tools exports
|
||||
tap.test('should export FilesystemTool class', async () => {
|
||||
expect(smartagent.FilesystemTool).toBeTypeOf('function');
|
||||
tap.test('should re-export tool helper', async () => {
|
||||
expect(smartagent.tool).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
tap.test('should export HttpTool class', async () => {
|
||||
expect(smartagent.HttpTool).toBeTypeOf('function');
|
||||
tap.test('should re-export z (zod)', async () => {
|
||||
expect(smartagent.z).toBeTruthy();
|
||||
});
|
||||
|
||||
tap.test('should export ShellTool class', async () => {
|
||||
expect(smartagent.ShellTool).toBeTypeOf('function');
|
||||
tap.test('should re-export stepCountIs', async () => {
|
||||
expect(smartagent.stepCountIs).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
tap.test('should export BrowserTool class', async () => {
|
||||
expect(smartagent.BrowserTool).toBeTypeOf('function');
|
||||
});
|
||||
// ============================================================
|
||||
// ToolRegistry
|
||||
// ============================================================
|
||||
|
||||
tap.test('should export DenoTool class', async () => {
|
||||
expect(smartagent.DenoTool).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
// Test tool instantiation
|
||||
tap.test('should be able to instantiate FilesystemTool', async () => {
|
||||
const fsTool = new smartagent.FilesystemTool();
|
||||
expect(fsTool.name).toEqual('filesystem');
|
||||
expect(fsTool.actions).toBeTypeOf('object');
|
||||
expect(fsTool.actions.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
tap.test('should be able to instantiate HttpTool', async () => {
|
||||
const httpTool = new smartagent.HttpTool();
|
||||
expect(httpTool.name).toEqual('http');
|
||||
expect(httpTool.actions).toBeTypeOf('object');
|
||||
});
|
||||
|
||||
tap.test('should be able to instantiate ShellTool', async () => {
|
||||
const shellTool = new smartagent.ShellTool();
|
||||
expect(shellTool.name).toEqual('shell');
|
||||
expect(shellTool.actions).toBeTypeOf('object');
|
||||
});
|
||||
|
||||
tap.test('should be able to instantiate BrowserTool', async () => {
|
||||
const browserTool = new smartagent.BrowserTool();
|
||||
expect(browserTool.name).toEqual('browser');
|
||||
expect(browserTool.actions).toBeTypeOf('object');
|
||||
});
|
||||
|
||||
tap.test('should be able to instantiate DenoTool', async () => {
|
||||
const denoTool = new smartagent.DenoTool();
|
||||
expect(denoTool.name).toEqual('deno');
|
||||
expect(denoTool.actions).toBeTypeOf('object');
|
||||
});
|
||||
|
||||
// Test tool descriptions
|
||||
tap.test('FilesystemTool should have required actions', async () => {
|
||||
const fsTool = new smartagent.FilesystemTool();
|
||||
const actionNames = fsTool.actions.map((a) => a.name);
|
||||
expect(actionNames).toContain('read');
|
||||
expect(actionNames).toContain('write');
|
||||
expect(actionNames).toContain('list');
|
||||
expect(actionNames).toContain('delete');
|
||||
expect(actionNames).toContain('exists');
|
||||
});
|
||||
|
||||
tap.test('HttpTool should have required actions', async () => {
|
||||
const httpTool = new smartagent.HttpTool();
|
||||
const actionNames = httpTool.actions.map((a) => a.name);
|
||||
expect(actionNames).toContain('get');
|
||||
expect(actionNames).toContain('post');
|
||||
expect(actionNames).toContain('put');
|
||||
expect(actionNames).toContain('delete');
|
||||
});
|
||||
|
||||
tap.test('ShellTool should have required actions', async () => {
|
||||
const shellTool = new smartagent.ShellTool();
|
||||
const actionNames = shellTool.actions.map((a) => a.name);
|
||||
expect(actionNames).toContain('execute');
|
||||
expect(actionNames).toContain('which');
|
||||
});
|
||||
|
||||
tap.test('BrowserTool should have required actions', async () => {
|
||||
const browserTool = new smartagent.BrowserTool();
|
||||
const actionNames = browserTool.actions.map((a) => a.name);
|
||||
expect(actionNames).toContain('screenshot');
|
||||
expect(actionNames).toContain('pdf');
|
||||
expect(actionNames).toContain('evaluate');
|
||||
expect(actionNames).toContain('getPageContent');
|
||||
});
|
||||
|
||||
tap.test('DenoTool should have required actions', async () => {
|
||||
const denoTool = new smartagent.DenoTool();
|
||||
const actionNames = denoTool.actions.map((a) => a.name);
|
||||
expect(actionNames).toContain('execute');
|
||||
expect(actionNames).toContain('executeWithResult');
|
||||
});
|
||||
|
||||
// Test getCallSummary
|
||||
tap.test('FilesystemTool should generate call summaries', async () => {
|
||||
const fsTool = new smartagent.FilesystemTool();
|
||||
const summary = fsTool.getCallSummary('read', { path: '/tmp/test.txt' });
|
||||
expect(summary).toBeTypeOf('string');
|
||||
expect(summary).toInclude('/tmp/test.txt');
|
||||
});
|
||||
|
||||
tap.test('HttpTool should generate call summaries', async () => {
|
||||
const httpTool = new smartagent.HttpTool();
|
||||
const summary = httpTool.getCallSummary('get', { url: 'https://example.com' });
|
||||
expect(summary).toBeTypeOf('string');
|
||||
expect(summary).toInclude('example.com');
|
||||
});
|
||||
|
||||
tap.test('DenoTool should generate call summaries', async () => {
|
||||
const denoTool = new smartagent.DenoTool();
|
||||
const summary = denoTool.getCallSummary('execute', { code: 'console.log("hello");' });
|
||||
expect(summary).toBeTypeOf('string');
|
||||
expect(summary).toInclude('sandboxed');
|
||||
});
|
||||
|
||||
tap.test('DenoTool should show permissions in call summary', async () => {
|
||||
const denoTool = new smartagent.DenoTool();
|
||||
const summary = denoTool.getCallSummary('execute', {
|
||||
code: 'console.log("hello");',
|
||||
permissions: ['net', 'read']
|
||||
tap.test('ToolRegistry should register and return tools', async () => {
|
||||
const registry = new smartagent.ToolRegistry();
|
||||
const echoTool = smartagent.tool({
|
||||
description: 'Echo tool',
|
||||
inputSchema: smartagent.z.object({ text: smartagent.z.string() }),
|
||||
execute: async ({ text }: { text: string }) => text,
|
||||
});
|
||||
expect(summary).toBeTypeOf('string');
|
||||
expect(summary).toInclude('permissions');
|
||||
expect(summary).toInclude('net');
|
||||
registry.register('echo', echoTool);
|
||||
const tools = registry.getTools();
|
||||
expect(Object.keys(tools)).toContain('echo');
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Truncation
|
||||
// ============================================================
|
||||
|
||||
tap.test('truncateOutput should not truncate short strings', async () => {
|
||||
const result = truncateOutput('hello world');
|
||||
expect(result.truncated).toBeFalse();
|
||||
expect(result.content).toEqual('hello world');
|
||||
});
|
||||
|
||||
tap.test('truncateOutput should truncate strings over maxLines', async () => {
|
||||
const lines = Array.from({ length: 3000 }, (_, i) => `line ${i}`).join('\n');
|
||||
const result = truncateOutput(lines, { maxLines: 100 });
|
||||
expect(result.truncated).toBeTrue();
|
||||
expect(result.notice).toBeTruthy();
|
||||
expect(result.content).toInclude('[Output truncated');
|
||||
});
|
||||
|
||||
tap.test('truncateOutput should truncate strings over maxBytes', async () => {
|
||||
const big = 'x'.repeat(100_000);
|
||||
const result = truncateOutput(big, { maxBytes: 1000 });
|
||||
expect(result.truncated).toBeTrue();
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Tool factories
|
||||
// ============================================================
|
||||
|
||||
tap.test('filesystemTool returns expected tool names', async () => {
|
||||
const tools = filesystemTool();
|
||||
const names = Object.keys(tools);
|
||||
expect(names).toContain('read_file');
|
||||
expect(names).toContain('write_file');
|
||||
expect(names).toContain('list_directory');
|
||||
expect(names).toContain('delete_file');
|
||||
});
|
||||
|
||||
tap.test('shellTool returns expected tool names', async () => {
|
||||
const tools = shellTool();
|
||||
const names = Object.keys(tools);
|
||||
expect(names).toContain('run_command');
|
||||
});
|
||||
|
||||
tap.test('httpTool returns expected tool names', async () => {
|
||||
const tools = httpTool();
|
||||
const names = Object.keys(tools);
|
||||
expect(names).toContain('http_get');
|
||||
expect(names).toContain('http_post');
|
||||
});
|
||||
|
||||
tap.test('jsonTool returns expected tool names', async () => {
|
||||
const tools = jsonTool();
|
||||
const names = Object.keys(tools);
|
||||
expect(names).toContain('json_validate');
|
||||
expect(names).toContain('json_transform');
|
||||
});
|
||||
|
||||
tap.test('json_validate tool should validate valid JSON', async () => {
|
||||
const tools = jsonTool();
|
||||
const result = await (tools.json_validate as any).execute({
|
||||
jsonString: '{"name":"test","value":42}',
|
||||
});
|
||||
expect(result).toInclude('Valid JSON');
|
||||
});
|
||||
|
||||
tap.test('json_validate tool should detect invalid JSON', async () => {
|
||||
const tools = jsonTool();
|
||||
const result = await (tools.json_validate as any).execute({
|
||||
jsonString: '{invalid json',
|
||||
});
|
||||
expect(result).toInclude('Invalid JSON');
|
||||
});
|
||||
|
||||
tap.test('json_validate tool should check required fields', async () => {
|
||||
const tools = jsonTool();
|
||||
const result = await (tools.json_validate as any).execute({
|
||||
jsonString: '{"name":"test"}',
|
||||
requiredFields: ['name', 'missing_field'],
|
||||
});
|
||||
expect(result).toInclude('missing_field');
|
||||
});
|
||||
|
||||
tap.test('json_transform tool should pretty-print JSON', async () => {
|
||||
const tools = jsonTool();
|
||||
const result = await (tools.json_transform as any).execute({
|
||||
jsonString: '{"a":1,"b":2}',
|
||||
});
|
||||
expect(result).toInclude(' "a": 1');
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Compaction export
|
||||
// ============================================================
|
||||
|
||||
tap.test('compactMessages should be a function', async () => {
|
||||
expect(compactMessages).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// Filesystem tool read/write round-trip
|
||||
// ============================================================
|
||||
|
||||
tap.test('filesystem tool should write and read a file', async () => {
|
||||
const tmpDir = '/tmp/smartagent-test-' + Date.now();
|
||||
const tools = filesystemTool({ rootDir: tmpDir });
|
||||
|
||||
await (tools.write_file as any).execute({
|
||||
path: tmpDir + '/hello.txt',
|
||||
content: 'Hello, world!',
|
||||
});
|
||||
|
||||
const content = await (tools.read_file as any).execute({
|
||||
path: tmpDir + '/hello.txt',
|
||||
});
|
||||
expect(content).toInclude('Hello, world!');
|
||||
|
||||
// Cleanup
|
||||
await (tools.delete_file as any).execute({
|
||||
path: tmpDir + '/hello.txt',
|
||||
});
|
||||
});
|
||||
|
||||
tap.test('filesystem tool should enforce rootDir restriction', async () => {
|
||||
const tools = filesystemTool({ rootDir: '/tmp/restricted' });
|
||||
let threw = false;
|
||||
try {
|
||||
await (tools.read_file as any).execute({ path: '/etc/passwd' });
|
||||
} catch (e) {
|
||||
threw = true;
|
||||
expect((e as Error).message).toInclude('Access denied');
|
||||
}
|
||||
expect(threw).toBeTrue();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
|
||||
Reference in New Issue
Block a user