import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as qenv from '@push.rocks/qenv'; import * as path from 'path'; import * as fs from 'fs'; import { runAgent, tool, z, ToolRegistry } from '../ts/index.js'; import { filesystemTool, shellTool } from '../ts_tools/index.js'; const testQenv = new qenv.Qenv('./', './.nogit/'); let model: any; const workDir = '/tmp/smartagent-e2e-' + Date.now(); tap.test('setup: create model and workspace', async () => { const apiKey = await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'); if (!apiKey) { console.log('ANTHROPIC_TOKEN not set — skipping all E2E tests'); process.exit(0); } const { getModel } = await import('@push.rocks/smartai'); model = getModel({ provider: 'anthropic', model: 'claude-sonnet-4-5-20250929', apiKey, }); fs.mkdirSync(workDir, { recursive: true }); console.log(` Workspace: ${workDir}`); }); // ============================================================ // Test 1: Simple tool call // ============================================================ tap.test('agent should call a single tool and incorporate the result', async () => { let toolCalled = false; const result = await runAgent({ model, prompt: 'What is the current time? Use the get_time tool.', system: 'You are a helpful assistant. Use tools when asked.', tools: { get_time: tool({ description: 'Returns the current ISO timestamp', inputSchema: z.object({}), execute: async () => { toolCalled = true; return new Date().toISOString(); }, }), }, maxSteps: 5, }); console.log(` Response: ${result.text.substring(0, 150)}`); console.log(` Steps: ${result.steps}, Tokens: ${result.usage.totalTokens}`); expect(toolCalled).toBeTrue(); expect(result.text).toBeTruthy(); expect(result.usage.totalTokens).toBeGreaterThan(0); }); // ============================================================ // Test 2: Multiple tools — agent chooses which to use // ============================================================ tap.test('agent should pick the right tool from multiple options', async () => { const callLog: string[] = []; const result = await runAgent({ model, prompt: 'Add 7 and 35 using the calculator tool.', system: 'You are a helpful assistant. Use the appropriate tool to answer.', tools: { calculator: tool({ description: 'Perform arithmetic. Supports add, subtract, multiply, divide.', inputSchema: z.object({ operation: z.enum(['add', 'subtract', 'multiply', 'divide']), a: z.number(), b: z.number(), }), execute: async ({ operation, a, b }: { operation: string; a: number; b: number }) => { callLog.push(`calculator:${operation}(${a}, ${b})`); switch (operation) { case 'add': return String(a + b); case 'subtract': return String(a - b); case 'multiply': return String(a * b); case 'divide': return b !== 0 ? String(a / b) : 'Error: division by zero'; default: return 'Unknown operation'; } }, }), get_weather: tool({ description: 'Get current weather for a city', inputSchema: z.object({ city: z.string() }), execute: async () => { callLog.push('get_weather'); return 'Sunny, 22°C'; }, }), }, maxSteps: 5, }); console.log(` Tool calls: ${callLog.join(', ')}`); console.log(` Response: ${result.text.substring(0, 150)}`); expect(callLog.some((c) => c.startsWith('calculator:add'))).toBeTrue(); expect(callLog).not.toContain('get_weather'); expect(result.text).toInclude('42'); }); // ============================================================ // Test 3: Multi-step — agent uses filesystem tools // ============================================================ tap.test('agent should use filesystem tools for a multi-step task', async () => { const fsTools = filesystemTool({ rootDir: workDir }); const result = await runAgent({ model, prompt: `Create a file called "greeting.txt" in ${workDir} with the content "Hello from smartagent!". Then read it back and tell me what it says.`, system: 'You are a helpful assistant that works with files. Use the provided tools.', tools: fsTools, maxSteps: 10, }); console.log(` Steps: ${result.steps}`); console.log(` Response: ${result.text.substring(0, 200)}`); // Verify the file was actually created const filePath = path.join(workDir, 'greeting.txt'); expect(fs.existsSync(filePath)).toBeTrue(); const content = fs.readFileSync(filePath, 'utf-8'); expect(content).toInclude('Hello from smartagent'); expect(result.steps).toBeGreaterThanOrEqual(2); }); // ============================================================ // Test 4: ToolRegistry usage // ============================================================ tap.test('agent should work with ToolRegistry', async () => { const registry = new ToolRegistry(); registry.register('random_number', tool({ description: 'Generate a random integer between min and max (inclusive)', inputSchema: z.object({ min: z.number().describe('Minimum value'), max: z.number().describe('Maximum value'), }), execute: async ({ min, max }: { min: number; max: number }) => { const value = Math.floor(Math.random() * (max - min + 1)) + min; return String(value); }, })); registry.register('is_even', tool({ description: 'Check if a number is even', inputSchema: z.object({ number: z.number() }), execute: async ({ number: n }: { number: number }) => { return n % 2 === 0 ? 'Yes, it is even' : 'No, it is odd'; }, })); const result = await runAgent({ model, prompt: 'Generate a random number between 1 and 100, then check if it is even or odd. Tell me both the number and whether it is even.', system: 'You are a helpful assistant. Use tools step by step.', tools: registry.getTools(), maxSteps: 10, }); console.log(` Response: ${result.text.substring(0, 200)}`); expect(result.text).toBeTruthy(); expect(result.steps).toBeGreaterThanOrEqual(2); }); // ============================================================ // Test 5: Streaming callbacks // ============================================================ tap.test('agent should fire onToken and onToolCall callbacks', async () => { const tokens: string[] = []; const toolCalls: string[] = []; const result = await runAgent({ model, prompt: 'Use the echo tool to echo "test123".', system: 'You are a helpful assistant. Use tools when asked.', tools: { echo: tool({ description: 'Echo back the provided text', inputSchema: z.object({ text: z.string() }), execute: async ({ text }: { text: string }) => text, }), }, maxSteps: 5, onToken: (delta) => tokens.push(delta), onToolCall: (name) => toolCalls.push(name), }); console.log(` Streamed ${tokens.length} token chunks`); console.log(` Tool calls observed: ${toolCalls.join(', ')}`); expect(tokens.length).toBeGreaterThan(0); expect(toolCalls).toContain('echo'); expect(result.text).toInclude('test123'); }); // ============================================================ // Test 6: Shell tool integration // ============================================================ tap.test('agent should use shell tool to run a command', async () => { const tools = shellTool(); const result = await runAgent({ model, prompt: `Run the command "echo hello_smartagent" and tell me what it outputs.`, system: 'You are a helpful assistant that can run shell commands.', tools, maxSteps: 5, }); console.log(` Response: ${result.text.substring(0, 200)}`); expect(result.text).toInclude('hello_smartagent'); }); // ============================================================ // Cleanup // ============================================================ tap.test('cleanup: remove workspace', async () => { fs.rmSync(workDir, { recursive: true, force: true }); console.log(` Cleaned up ${workDir}`); }); export default tap.start();