240 lines
8.1 KiB
TypeScript
240 lines
8.1 KiB
TypeScript
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|
import * as qenv from '@push.rocks/qenv';
|
|
import * as path from 'path';
|
|
import * as fs from 'fs';
|
|
import { runAgent, tool, z, ToolRegistry } from '../ts/index.js';
|
|
import { filesystemTool, shellTool } from '../ts_tools/index.js';
|
|
|
|
const testQenv = new qenv.Qenv('./', './.nogit/');
|
|
|
|
let model: any;
|
|
const workDir = '/tmp/smartagent-e2e-' + Date.now();
|
|
|
|
tap.test('setup: create model and workspace', async () => {
|
|
const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN');
|
|
if (!apiKey) {
|
|
console.log('ANTHROPIC_TOKEN not set — skipping all E2E tests');
|
|
process.exit(0);
|
|
}
|
|
|
|
const { getModel } = await import('@push.rocks/smartai');
|
|
model = getModel({
|
|
provider: 'anthropic',
|
|
model: 'claude-sonnet-4-5-20250929',
|
|
apiKey,
|
|
});
|
|
|
|
fs.mkdirSync(workDir, { recursive: true });
|
|
console.log(` Workspace: ${workDir}`);
|
|
});
|
|
|
|
// ============================================================
|
|
// Test 1: Simple tool call
|
|
// ============================================================
|
|
|
|
tap.test('agent should call a single tool and incorporate the result', async () => {
|
|
let toolCalled = false;
|
|
|
|
const result = await runAgent({
|
|
model,
|
|
prompt: 'What is the current time? Use the get_time tool.',
|
|
system: 'You are a helpful assistant. Use tools when asked.',
|
|
tools: {
|
|
get_time: tool({
|
|
description: 'Returns the current ISO timestamp',
|
|
inputSchema: z.object({}),
|
|
execute: async () => {
|
|
toolCalled = true;
|
|
return new Date().toISOString();
|
|
},
|
|
}),
|
|
},
|
|
maxSteps: 5,
|
|
});
|
|
|
|
console.log(` Response: ${result.text.substring(0, 150)}`);
|
|
console.log(` Steps: ${result.steps}, Tokens: ${result.usage.totalTokens}`);
|
|
|
|
expect(toolCalled).toBeTrue();
|
|
expect(result.text).toBeTruthy();
|
|
expect(result.usage.totalTokens).toBeGreaterThan(0);
|
|
});
|
|
|
|
// ============================================================
|
|
// Test 2: Multiple tools — agent chooses which to use
|
|
// ============================================================
|
|
|
|
tap.test('agent should pick the right tool from multiple options', async () => {
|
|
const callLog: string[] = [];
|
|
|
|
const result = await runAgent({
|
|
model,
|
|
prompt: 'Add 7 and 35 using the calculator tool.',
|
|
system: 'You are a helpful assistant. Use the appropriate tool to answer.',
|
|
tools: {
|
|
calculator: tool({
|
|
description: 'Perform arithmetic. Supports add, subtract, multiply, divide.',
|
|
inputSchema: z.object({
|
|
operation: z.enum(['add', 'subtract', 'multiply', 'divide']),
|
|
a: z.number(),
|
|
b: z.number(),
|
|
}),
|
|
execute: async ({ operation, a, b }: { operation: string; a: number; b: number }) => {
|
|
callLog.push(`calculator:${operation}(${a}, ${b})`);
|
|
switch (operation) {
|
|
case 'add': return String(a + b);
|
|
case 'subtract': return String(a - b);
|
|
case 'multiply': return String(a * b);
|
|
case 'divide': return b !== 0 ? String(a / b) : 'Error: division by zero';
|
|
default: return 'Unknown operation';
|
|
}
|
|
},
|
|
}),
|
|
get_weather: tool({
|
|
description: 'Get current weather for a city',
|
|
inputSchema: z.object({ city: z.string() }),
|
|
execute: async () => {
|
|
callLog.push('get_weather');
|
|
return 'Sunny, 22°C';
|
|
},
|
|
}),
|
|
},
|
|
maxSteps: 5,
|
|
});
|
|
|
|
console.log(` Tool calls: ${callLog.join(', ')}`);
|
|
console.log(` Response: ${result.text.substring(0, 150)}`);
|
|
|
|
expect(callLog.some((c) => c.startsWith('calculator:add'))).toBeTrue();
|
|
expect(callLog).not.toContain('get_weather');
|
|
expect(result.text).toInclude('42');
|
|
});
|
|
|
|
// ============================================================
|
|
// Test 3: Multi-step — agent uses filesystem tools
|
|
// ============================================================
|
|
|
|
tap.test('agent should use filesystem tools for a multi-step task', async () => {
|
|
const fsTools = filesystemTool({ rootDir: workDir });
|
|
|
|
const result = await runAgent({
|
|
model,
|
|
prompt: `Create a file called "greeting.txt" in ${workDir} with the content "Hello from smartagent!". Then read it back and tell me what it says.`,
|
|
system: 'You are a helpful assistant that works with files. Use the provided tools.',
|
|
tools: fsTools,
|
|
maxSteps: 10,
|
|
});
|
|
|
|
console.log(` Steps: ${result.steps}`);
|
|
console.log(` Response: ${result.text.substring(0, 200)}`);
|
|
|
|
// Verify the file was actually created
|
|
const filePath = path.join(workDir, 'greeting.txt');
|
|
expect(fs.existsSync(filePath)).toBeTrue();
|
|
const content = fs.readFileSync(filePath, 'utf-8');
|
|
expect(content).toInclude('Hello from smartagent');
|
|
expect(result.steps).toBeGreaterThanOrEqual(2);
|
|
});
|
|
|
|
// ============================================================
|
|
// Test 4: ToolRegistry usage
|
|
// ============================================================
|
|
|
|
tap.test('agent should work with ToolRegistry', async () => {
|
|
const registry = new ToolRegistry();
|
|
|
|
registry.register('random_number', tool({
|
|
description: 'Generate a random integer between min and max (inclusive)',
|
|
inputSchema: z.object({
|
|
min: z.number().describe('Minimum value'),
|
|
max: z.number().describe('Maximum value'),
|
|
}),
|
|
execute: async ({ min, max }: { min: number; max: number }) => {
|
|
const value = Math.floor(Math.random() * (max - min + 1)) + min;
|
|
return String(value);
|
|
},
|
|
}));
|
|
|
|
registry.register('is_even', tool({
|
|
description: 'Check if a number is even',
|
|
inputSchema: z.object({ number: z.number() }),
|
|
execute: async ({ number: n }: { number: number }) => {
|
|
return n % 2 === 0 ? 'Yes, it is even' : 'No, it is odd';
|
|
},
|
|
}));
|
|
|
|
const result = await runAgent({
|
|
model,
|
|
prompt: 'Generate a random number between 1 and 100, then check if it is even or odd. Tell me both the number and whether it is even.',
|
|
system: 'You are a helpful assistant. Use tools step by step.',
|
|
tools: registry.getTools(),
|
|
maxSteps: 10,
|
|
});
|
|
|
|
console.log(` Response: ${result.text.substring(0, 200)}`);
|
|
expect(result.text).toBeTruthy();
|
|
expect(result.steps).toBeGreaterThanOrEqual(2);
|
|
});
|
|
|
|
// ============================================================
|
|
// Test 5: Streaming callbacks
|
|
// ============================================================
|
|
|
|
tap.test('agent should fire onToken and onToolCall callbacks', async () => {
|
|
const tokens: string[] = [];
|
|
const toolCalls: string[] = [];
|
|
|
|
const result = await runAgent({
|
|
model,
|
|
prompt: 'Use the echo tool to echo "test123".',
|
|
system: 'You are a helpful assistant. Use tools when asked.',
|
|
tools: {
|
|
echo: tool({
|
|
description: 'Echo back the provided text',
|
|
inputSchema: z.object({ text: z.string() }),
|
|
execute: async ({ text }: { text: string }) => text,
|
|
}),
|
|
},
|
|
maxSteps: 5,
|
|
onToken: (delta) => tokens.push(delta),
|
|
onToolCall: (name) => toolCalls.push(name),
|
|
});
|
|
|
|
console.log(` Streamed ${tokens.length} token chunks`);
|
|
console.log(` Tool calls observed: ${toolCalls.join(', ')}`);
|
|
|
|
expect(tokens.length).toBeGreaterThan(0);
|
|
expect(toolCalls).toContain('echo');
|
|
expect(result.text).toInclude('test123');
|
|
});
|
|
|
|
// ============================================================
|
|
// Test 6: Shell tool integration
|
|
// ============================================================
|
|
|
|
tap.test('agent should use shell tool to run a command', async () => {
|
|
const tools = shellTool();
|
|
|
|
const result = await runAgent({
|
|
model,
|
|
prompt: `Run the command "echo hello_smartagent" and tell me what it outputs.`,
|
|
system: 'You are a helpful assistant that can run shell commands.',
|
|
tools,
|
|
maxSteps: 5,
|
|
});
|
|
|
|
console.log(` Response: ${result.text.substring(0, 200)}`);
|
|
expect(result.text).toInclude('hello_smartagent');
|
|
});
|
|
|
|
// ============================================================
|
|
// Cleanup
|
|
// ============================================================
|
|
|
|
tap.test('cleanup: remove workspace', async () => {
|
|
fs.rmSync(workDir, { recursive: true, force: true });
|
|
console.log(` Cleaned up ${workDir}`);
|
|
});
|
|
|
|
export default tap.start();
|