import * as plugins from './plugins.js'; import * as interfaces from './smartagent.interfaces.js'; import type { BaseToolWrapper } from './smartagent.tools.base.js'; /** * Options for configuring the DriverAgent */ export interface IDriverAgentOptions { /** Custom system message for the driver */ systemMessage?: string; /** Maximum history messages to pass to API (default: 20). Set to 0 for unlimited. */ maxHistoryMessages?: number; /** Callback fired for each token during LLM generation */ onToken?: (token: string) => void; } /** * DriverAgent - Executes tasks by reasoning and proposing tool calls * Works in conjunction with GuardianAgent for approval */ export class DriverAgent { private provider: plugins.smartai.MultiModalModel; private systemMessage: string; private maxHistoryMessages: number; private messageHistory: plugins.smartai.ChatMessage[] = []; private tools: Map = new Map(); private onToken?: (token: string) => void; constructor( provider: plugins.smartai.MultiModalModel, options?: IDriverAgentOptions | string ) { this.provider = provider; // Support both legacy string systemMessage and new options object if (typeof options === 'string') { this.systemMessage = options || this.getDefaultSystemMessage(); this.maxHistoryMessages = 20; } else { this.systemMessage = options?.systemMessage || this.getDefaultSystemMessage(); this.maxHistoryMessages = options?.maxHistoryMessages ?? 20; this.onToken = options?.onToken; } } /** * Set the token callback for streaming mode * @param callback Function to call for each generated token */ public setOnToken(callback: (token: string) => void): void { this.onToken = callback; } /** * Register a tool for use by the driver */ public registerTool(tool: BaseToolWrapper): void { this.tools.set(tool.name, tool); } /** * Get all registered tools */ public getTools(): Map { return this.tools; } /** * Initialize a new conversation for a task * @param task The task description * @param images Optional base64-encoded images for vision tasks */ public async startTask(task: string, images?: string[]): Promise { // Reset message history this.messageHistory = []; // Build the user message based on available tools const hasTools = this.tools.size > 0; let userMessage: string; if (hasTools) { userMessage = `TASK: ${task}\n\nAnalyze this task and determine what actions are needed. If you need to use a tool, provide a tool call proposal.`; } else { userMessage = `TASK: ${task}\n\nComplete this task directly. When done, wrap your final output in your output here tags.`; } // Add to history this.messageHistory.push({ role: 'user', content: userMessage, }); // Build the system message - adapt based on available tools let fullSystemMessage: string; if (hasTools) { const toolDescriptions = this.buildToolDescriptions(); fullSystemMessage = `${this.systemMessage}\n\n## Available Tools\n${toolDescriptions}`; } else { // Use a simpler system message when no tools are available fullSystemMessage = this.getNoToolsSystemMessage(); } // Get response from provider - use streaming if available and callback is set let response: plugins.smartai.ChatResponse; if (this.onToken && typeof (this.provider as any).chatStreaming === 'function') { // Use streaming mode with token callback response = await (this.provider as any).chatStreaming({ systemMessage: fullSystemMessage, userMessage: userMessage, messageHistory: [], images: images, onToken: this.onToken, }); } else { // Fallback to non-streaming mode response = await this.provider.chat({ systemMessage: fullSystemMessage, userMessage: userMessage, messageHistory: [], images: images, }); } // Add assistant response to history (store images if provided) const historyMessage: plugins.smartai.ChatMessage = { role: 'assistant', content: response.message, }; this.messageHistory.push(historyMessage); return { role: 'assistant', content: response.message, }; } /** * Continue the conversation with feedback or results */ public async continueWithMessage(message: string): Promise { // Add the new message to history this.messageHistory.push({ role: 'user', content: message, }); // Build the system message - adapt based on available tools const hasTools = this.tools.size > 0; let fullSystemMessage: string; if (hasTools) { const toolDescriptions = this.buildToolDescriptions(); fullSystemMessage = `${this.systemMessage}\n\n## Available Tools\n${toolDescriptions}`; } else { fullSystemMessage = this.getNoToolsSystemMessage(); } // Get response from provider with history windowing // Keep original task and most recent messages to avoid token explosion let historyForChat: plugins.smartai.ChatMessage[]; const fullHistory = this.messageHistory.slice(0, -1); // Exclude the just-added message if (this.maxHistoryMessages > 0 && fullHistory.length > this.maxHistoryMessages) { // Keep the original task (first message) and most recent messages historyForChat = [ fullHistory[0], // Original task ...fullHistory.slice(-(this.maxHistoryMessages - 1)), // Recent messages ]; } else { historyForChat = fullHistory; } // Get response from provider - use streaming if available and callback is set let response: plugins.smartai.ChatResponse; if (this.onToken && typeof (this.provider as any).chatStreaming === 'function') { // Use streaming mode with token callback response = await (this.provider as any).chatStreaming({ systemMessage: fullSystemMessage, userMessage: message, messageHistory: historyForChat, onToken: this.onToken, }); } else { // Fallback to non-streaming mode response = await this.provider.chat({ systemMessage: fullSystemMessage, userMessage: message, messageHistory: historyForChat, }); } // Add assistant response to history this.messageHistory.push({ role: 'assistant', content: response.message, }); return { role: 'assistant', content: response.message, }; } /** * Parse tool call proposals from assistant response */ public parseToolCallProposals(response: string): interfaces.IToolCallProposal[] { const proposals: interfaces.IToolCallProposal[] = []; // Match ... blocks const toolCallRegex = /([\s\S]*?)<\/tool_call>/g; let match; while ((match = toolCallRegex.exec(response)) !== null) { const content = match[1]; try { const proposal = this.parseToolCallContent(content); if (proposal) { proposals.push(proposal); } } catch (error) { // Skip malformed tool calls console.warn('Failed to parse tool call:', error); } } return proposals; } /** * Parse the content inside a tool_call block */ private parseToolCallContent(content: string): interfaces.IToolCallProposal | null { // Extract tool name const toolMatch = content.match(/(.*?)<\/tool>/s); if (!toolMatch) return null; const toolName = toolMatch[1].trim(); // Extract action const actionMatch = content.match(/(.*?)<\/action>/s); if (!actionMatch) return null; const action = actionMatch[1].trim(); // Extract params (JSON) const paramsMatch = content.match(/([\s\S]*?)<\/params>/); let params: Record = {}; if (paramsMatch) { try { params = JSON.parse(paramsMatch[1].trim()); } catch { // Try to extract individual parameters if JSON fails params = this.extractParamsFromXml(paramsMatch[1]); } } // Extract reasoning (optional) const reasoningMatch = content.match(/([\s\S]*?)<\/reasoning>/); const reasoning = reasoningMatch ? reasoningMatch[1].trim() : undefined; return { proposalId: this.generateProposalId(), toolName, action, params, reasoning, }; } /** * Extract parameters from XML-like format when JSON parsing fails */ private extractParamsFromXml(content: string): Record { const params: Record = {}; const paramRegex = /<(\w+)>([\s\S]*?)<\/\1>/g; let match; while ((match = paramRegex.exec(content)) !== null) { const key = match[1]; let value: unknown = match[2].trim(); // Try to parse as JSON for arrays/objects try { value = JSON.parse(value as string); } catch { // Keep as string if not valid JSON } params[key] = value; } return params; } /** * Check if the response indicates task completion */ public isTaskComplete(response: string): boolean { // Check for explicit completion markers const completionMarkers = [ '', '', 'TASK COMPLETE', 'Task completed successfully', ]; const lowerResponse = response.toLowerCase(); return completionMarkers.some(marker => lowerResponse.includes(marker.toLowerCase()) ); } /** * Check if the response needs clarification or user input */ public needsClarification(response: string): boolean { const clarificationMarkers = [ '', '', 'please clarify', 'could you specify', 'what do you mean by', ]; const lowerResponse = response.toLowerCase(); return clarificationMarkers.some(marker => lowerResponse.includes(marker.toLowerCase()) ); } /** * Extract the final result from a completed task */ public extractTaskResult(response: string): string | null { // Try to extract from result tags const resultMatch = response.match(/([\s\S]*?)<\/task_result>/); if (resultMatch) { return resultMatch[1].trim(); } const completeMatch = response.match(/([\s\S]*?)<\/task_complete>/); if (completeMatch) { return completeMatch[1].trim(); } return null; } /** * Build tool descriptions for the system message */ private buildToolDescriptions(): string { const descriptions: string[] = []; for (const tool of this.tools.values()) { descriptions.push(tool.getFullDescription()); } return descriptions.join('\n\n'); } /** * Generate a unique proposal ID */ private generateProposalId(): string { return `prop_${Date.now()}_${Math.random().toString(36).substring(2, 8)}`; } /** * Get the default system message for the driver */ private getDefaultSystemMessage(): string { return `You are an AI assistant that executes tasks by using available tools. ## Your Role You analyze tasks, break them down into steps, and use tools to accomplish goals. ## CRITICAL: Tool Usage Format To use a tool, you MUST literally write out the XML tags in your response. The system parses your output looking for these exact tags. Do NOT just describe or mention the tool call - you must OUTPUT the actual XML. CORRECT (the XML is in the output): json validate {"jsonString": "{\\"key\\":\\"value\\"}"} WRONG (just describing, no actual XML): "I will call json.validate now" or "Let me use the tool" ## Guidelines 1. Think step by step about what needs to be done 2. When you need a tool, OUTPUT the XML tags - do not just mention them 3. Only propose ONE tool call at a time 4. Wait for the result before proposing the next action 5. When the task is complete, OUTPUT: Your final result here ## Important - The and tags MUST appear literally in your response - If you just say "I'll call the tool" without the actual XML, it will NOT work - If you need clarification, ask using your question`; } /** * Get the system message when no tools are available * Used for direct task completion without tool usage */ private getNoToolsSystemMessage(): string { // Use custom system message if provided, otherwise use a simple default if (this.systemMessage && this.systemMessage !== this.getDefaultSystemMessage()) { return this.systemMessage; } return `You are an AI assistant that completes tasks directly. ## Your Role You analyze tasks and provide complete, high-quality outputs. ## Output Format When you have completed the task, wrap your final output in task_complete tags: Your complete output here ## Guidelines 1. Analyze the task requirements carefully 2. Provide a complete and accurate response 3. Always wrap your final output in tags 4. If you need clarification, ask using your question`; } /** * Reset the conversation state */ public reset(): void { this.messageHistory = []; } }