The system message now clearly states that the <tool_call> XML tags MUST be literally written in the response, not just described. Includes examples of CORRECT vs WRONG usage to help smaller models understand.
444 lines
13 KiB
TypeScript
444 lines
13 KiB
TypeScript
import * as plugins from './plugins.js';
|
|
import * as interfaces from './smartagent.interfaces.js';
|
|
import type { BaseToolWrapper } from './smartagent.tools.base.js';
|
|
|
|
/**
|
|
* Options for configuring the DriverAgent
|
|
*/
|
|
export interface IDriverAgentOptions {
|
|
/** Custom system message for the driver */
|
|
systemMessage?: string;
|
|
/** Maximum history messages to pass to API (default: 20). Set to 0 for unlimited. */
|
|
maxHistoryMessages?: number;
|
|
/** Callback fired for each token during LLM generation */
|
|
onToken?: (token: string) => void;
|
|
}
|
|
|
|
/**
|
|
* DriverAgent - Executes tasks by reasoning and proposing tool calls
|
|
* Works in conjunction with GuardianAgent for approval
|
|
*/
|
|
export class DriverAgent {
|
|
private provider: plugins.smartai.MultiModalModel;
|
|
private systemMessage: string;
|
|
private maxHistoryMessages: number;
|
|
private messageHistory: plugins.smartai.ChatMessage[] = [];
|
|
private tools: Map<string, BaseToolWrapper> = new Map();
|
|
private onToken?: (token: string) => void;
|
|
|
|
constructor(
|
|
provider: plugins.smartai.MultiModalModel,
|
|
options?: IDriverAgentOptions | string
|
|
) {
|
|
this.provider = provider;
|
|
|
|
// Support both legacy string systemMessage and new options object
|
|
if (typeof options === 'string') {
|
|
this.systemMessage = options || this.getDefaultSystemMessage();
|
|
this.maxHistoryMessages = 20;
|
|
} else {
|
|
this.systemMessage = options?.systemMessage || this.getDefaultSystemMessage();
|
|
this.maxHistoryMessages = options?.maxHistoryMessages ?? 20;
|
|
this.onToken = options?.onToken;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set the token callback for streaming mode
|
|
* @param callback Function to call for each generated token
|
|
*/
|
|
public setOnToken(callback: (token: string) => void): void {
|
|
this.onToken = callback;
|
|
}
|
|
|
|
/**
|
|
* Register a tool for use by the driver
|
|
*/
|
|
public registerTool(tool: BaseToolWrapper): void {
|
|
this.tools.set(tool.name, tool);
|
|
}
|
|
|
|
/**
|
|
* Get all registered tools
|
|
*/
|
|
public getTools(): Map<string, BaseToolWrapper> {
|
|
return this.tools;
|
|
}
|
|
|
|
/**
|
|
* Initialize a new conversation for a task
|
|
* @param task The task description
|
|
* @param images Optional base64-encoded images for vision tasks
|
|
*/
|
|
public async startTask(task: string, images?: string[]): Promise<interfaces.IAgentMessage> {
|
|
// Reset message history
|
|
this.messageHistory = [];
|
|
|
|
// Build the user message based on available tools
|
|
const hasTools = this.tools.size > 0;
|
|
let userMessage: string;
|
|
if (hasTools) {
|
|
userMessage = `TASK: ${task}\n\nAnalyze this task and determine what actions are needed. If you need to use a tool, provide a tool call proposal.`;
|
|
} else {
|
|
userMessage = `TASK: ${task}\n\nComplete this task directly. When done, wrap your final output in <task_complete>your output here</task_complete> tags.`;
|
|
}
|
|
|
|
// Add to history
|
|
this.messageHistory.push({
|
|
role: 'user',
|
|
content: userMessage,
|
|
});
|
|
|
|
// Build the system message - adapt based on available tools
|
|
let fullSystemMessage: string;
|
|
if (hasTools) {
|
|
const toolDescriptions = this.buildToolDescriptions();
|
|
fullSystemMessage = `${this.systemMessage}\n\n## Available Tools\n${toolDescriptions}`;
|
|
} else {
|
|
// Use a simpler system message when no tools are available
|
|
fullSystemMessage = this.getNoToolsSystemMessage();
|
|
}
|
|
|
|
// Get response from provider - use streaming if available and callback is set
|
|
let response: plugins.smartai.ChatResponse;
|
|
|
|
if (this.onToken && typeof (this.provider as any).chatStreaming === 'function') {
|
|
// Use streaming mode with token callback
|
|
response = await (this.provider as any).chatStreaming({
|
|
systemMessage: fullSystemMessage,
|
|
userMessage: userMessage,
|
|
messageHistory: [],
|
|
images: images,
|
|
onToken: this.onToken,
|
|
});
|
|
} else {
|
|
// Fallback to non-streaming mode
|
|
response = await this.provider.chat({
|
|
systemMessage: fullSystemMessage,
|
|
userMessage: userMessage,
|
|
messageHistory: [],
|
|
images: images,
|
|
});
|
|
}
|
|
|
|
// Add assistant response to history (store images if provided)
|
|
const historyMessage: plugins.smartai.ChatMessage = {
|
|
role: 'assistant',
|
|
content: response.message,
|
|
};
|
|
this.messageHistory.push(historyMessage);
|
|
|
|
return {
|
|
role: 'assistant',
|
|
content: response.message,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Continue the conversation with feedback or results
|
|
*/
|
|
public async continueWithMessage(message: string): Promise<interfaces.IAgentMessage> {
|
|
// Add the new message to history
|
|
this.messageHistory.push({
|
|
role: 'user',
|
|
content: message,
|
|
});
|
|
|
|
// Build the system message - adapt based on available tools
|
|
const hasTools = this.tools.size > 0;
|
|
let fullSystemMessage: string;
|
|
if (hasTools) {
|
|
const toolDescriptions = this.buildToolDescriptions();
|
|
fullSystemMessage = `${this.systemMessage}\n\n## Available Tools\n${toolDescriptions}`;
|
|
} else {
|
|
fullSystemMessage = this.getNoToolsSystemMessage();
|
|
}
|
|
|
|
// Get response from provider with history windowing
|
|
// Keep original task and most recent messages to avoid token explosion
|
|
let historyForChat: plugins.smartai.ChatMessage[];
|
|
const fullHistory = this.messageHistory.slice(0, -1); // Exclude the just-added message
|
|
|
|
if (this.maxHistoryMessages > 0 && fullHistory.length > this.maxHistoryMessages) {
|
|
// Keep the original task (first message) and most recent messages
|
|
historyForChat = [
|
|
fullHistory[0], // Original task
|
|
...fullHistory.slice(-(this.maxHistoryMessages - 1)), // Recent messages
|
|
];
|
|
} else {
|
|
historyForChat = fullHistory;
|
|
}
|
|
|
|
// Get response from provider - use streaming if available and callback is set
|
|
let response: plugins.smartai.ChatResponse;
|
|
|
|
if (this.onToken && typeof (this.provider as any).chatStreaming === 'function') {
|
|
// Use streaming mode with token callback
|
|
response = await (this.provider as any).chatStreaming({
|
|
systemMessage: fullSystemMessage,
|
|
userMessage: message,
|
|
messageHistory: historyForChat,
|
|
onToken: this.onToken,
|
|
});
|
|
} else {
|
|
// Fallback to non-streaming mode
|
|
response = await this.provider.chat({
|
|
systemMessage: fullSystemMessage,
|
|
userMessage: message,
|
|
messageHistory: historyForChat,
|
|
});
|
|
}
|
|
|
|
// Add assistant response to history
|
|
this.messageHistory.push({
|
|
role: 'assistant',
|
|
content: response.message,
|
|
});
|
|
|
|
return {
|
|
role: 'assistant',
|
|
content: response.message,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Parse tool call proposals from assistant response
|
|
*/
|
|
public parseToolCallProposals(response: string): interfaces.IToolCallProposal[] {
|
|
const proposals: interfaces.IToolCallProposal[] = [];
|
|
|
|
// Match <tool_call>...</tool_call> blocks
|
|
const toolCallRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
|
|
let match;
|
|
|
|
while ((match = toolCallRegex.exec(response)) !== null) {
|
|
const content = match[1];
|
|
|
|
try {
|
|
const proposal = this.parseToolCallContent(content);
|
|
if (proposal) {
|
|
proposals.push(proposal);
|
|
}
|
|
} catch (error) {
|
|
// Skip malformed tool calls
|
|
console.warn('Failed to parse tool call:', error);
|
|
}
|
|
}
|
|
|
|
return proposals;
|
|
}
|
|
|
|
/**
|
|
* Parse the content inside a tool_call block
|
|
*/
|
|
private parseToolCallContent(content: string): interfaces.IToolCallProposal | null {
|
|
// Extract tool name
|
|
const toolMatch = content.match(/<tool>(.*?)<\/tool>/s);
|
|
if (!toolMatch) return null;
|
|
const toolName = toolMatch[1].trim();
|
|
|
|
// Extract action
|
|
const actionMatch = content.match(/<action>(.*?)<\/action>/s);
|
|
if (!actionMatch) return null;
|
|
const action = actionMatch[1].trim();
|
|
|
|
// Extract params (JSON)
|
|
const paramsMatch = content.match(/<params>([\s\S]*?)<\/params>/);
|
|
let params: Record<string, unknown> = {};
|
|
if (paramsMatch) {
|
|
try {
|
|
params = JSON.parse(paramsMatch[1].trim());
|
|
} catch {
|
|
// Try to extract individual parameters if JSON fails
|
|
params = this.extractParamsFromXml(paramsMatch[1]);
|
|
}
|
|
}
|
|
|
|
// Extract reasoning (optional)
|
|
const reasoningMatch = content.match(/<reasoning>([\s\S]*?)<\/reasoning>/);
|
|
const reasoning = reasoningMatch ? reasoningMatch[1].trim() : undefined;
|
|
|
|
return {
|
|
proposalId: this.generateProposalId(),
|
|
toolName,
|
|
action,
|
|
params,
|
|
reasoning,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Extract parameters from XML-like format when JSON parsing fails
|
|
*/
|
|
private extractParamsFromXml(content: string): Record<string, unknown> {
|
|
const params: Record<string, unknown> = {};
|
|
const paramRegex = /<(\w+)>([\s\S]*?)<\/\1>/g;
|
|
let match;
|
|
|
|
while ((match = paramRegex.exec(content)) !== null) {
|
|
const key = match[1];
|
|
let value: unknown = match[2].trim();
|
|
|
|
// Try to parse as JSON for arrays/objects
|
|
try {
|
|
value = JSON.parse(value as string);
|
|
} catch {
|
|
// Keep as string if not valid JSON
|
|
}
|
|
|
|
params[key] = value;
|
|
}
|
|
|
|
return params;
|
|
}
|
|
|
|
/**
|
|
* Check if the response indicates task completion
|
|
*/
|
|
public isTaskComplete(response: string): boolean {
|
|
// Check for explicit completion markers
|
|
const completionMarkers = [
|
|
'<task_complete>',
|
|
'<task_completed>',
|
|
'TASK COMPLETE',
|
|
'Task completed successfully',
|
|
];
|
|
|
|
const lowerResponse = response.toLowerCase();
|
|
return completionMarkers.some(marker =>
|
|
lowerResponse.includes(marker.toLowerCase())
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Check if the response needs clarification or user input
|
|
*/
|
|
public needsClarification(response: string): boolean {
|
|
const clarificationMarkers = [
|
|
'<needs_clarification>',
|
|
'<question>',
|
|
'please clarify',
|
|
'could you specify',
|
|
'what do you mean by',
|
|
];
|
|
|
|
const lowerResponse = response.toLowerCase();
|
|
return clarificationMarkers.some(marker =>
|
|
lowerResponse.includes(marker.toLowerCase())
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract the final result from a completed task
|
|
*/
|
|
public extractTaskResult(response: string): string | null {
|
|
// Try to extract from result tags
|
|
const resultMatch = response.match(/<task_result>([\s\S]*?)<\/task_result>/);
|
|
if (resultMatch) {
|
|
return resultMatch[1].trim();
|
|
}
|
|
|
|
const completeMatch = response.match(/<task_complete>([\s\S]*?)<\/task_complete>/);
|
|
if (completeMatch) {
|
|
return completeMatch[1].trim();
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Build tool descriptions for the system message
|
|
*/
|
|
private buildToolDescriptions(): string {
|
|
const descriptions: string[] = [];
|
|
|
|
for (const tool of this.tools.values()) {
|
|
descriptions.push(tool.getFullDescription());
|
|
}
|
|
|
|
return descriptions.join('\n\n');
|
|
}
|
|
|
|
/**
|
|
* Generate a unique proposal ID
|
|
*/
|
|
private generateProposalId(): string {
|
|
return `prop_${Date.now()}_${Math.random().toString(36).substring(2, 8)}`;
|
|
}
|
|
|
|
/**
|
|
* Get the default system message for the driver
|
|
*/
|
|
private getDefaultSystemMessage(): string {
|
|
return `You are an AI assistant that executes tasks by using available tools.
|
|
|
|
## Your Role
|
|
You analyze tasks, break them down into steps, and use tools to accomplish goals.
|
|
|
|
## CRITICAL: Tool Usage Format
|
|
To use a tool, you MUST literally write out the XML tags in your response. The system parses your output looking for these exact tags. Do NOT just describe or mention the tool call - you must OUTPUT the actual XML.
|
|
|
|
CORRECT (the XML is in the output):
|
|
<tool_call>
|
|
<tool>json</tool>
|
|
<action>validate</action>
|
|
<params>{"jsonString": "{\\"key\\":\\"value\\"}"}</params>
|
|
</tool_call>
|
|
|
|
WRONG (just describing, no actual XML):
|
|
"I will call json.validate now" or "Let me use the tool"
|
|
|
|
## Guidelines
|
|
1. Think step by step about what needs to be done
|
|
2. When you need a tool, OUTPUT the <tool_call> XML tags - do not just mention them
|
|
3. Only propose ONE tool call at a time
|
|
4. Wait for the result before proposing the next action
|
|
5. When the task is complete, OUTPUT:
|
|
|
|
<task_complete>
|
|
Your final result here
|
|
</task_complete>
|
|
|
|
## Important
|
|
- The <tool_call> and <task_complete> tags MUST appear literally in your response
|
|
- If you just say "I'll call the tool" without the actual XML, it will NOT work
|
|
- If you need clarification, ask using <needs_clarification>your question</needs_clarification>`;
|
|
}
|
|
|
|
/**
|
|
* Get the system message when no tools are available
|
|
* Used for direct task completion without tool usage
|
|
*/
|
|
private getNoToolsSystemMessage(): string {
|
|
// Use custom system message if provided, otherwise use a simple default
|
|
if (this.systemMessage && this.systemMessage !== this.getDefaultSystemMessage()) {
|
|
return this.systemMessage;
|
|
}
|
|
|
|
return `You are an AI assistant that completes tasks directly.
|
|
|
|
## Your Role
|
|
You analyze tasks and provide complete, high-quality outputs.
|
|
|
|
## Output Format
|
|
When you have completed the task, wrap your final output in task_complete tags:
|
|
|
|
<task_complete>
|
|
Your complete output here
|
|
</task_complete>
|
|
|
|
## Guidelines
|
|
1. Analyze the task requirements carefully
|
|
2. Provide a complete and accurate response
|
|
3. Always wrap your final output in <task_complete></task_complete> tags
|
|
4. If you need clarification, ask using <needs_clarification>your question</needs_clarification>`;
|
|
}
|
|
|
|
/**
|
|
* Reset the conversation state
|
|
*/
|
|
public reset(): void {
|
|
this.messageHistory = [];
|
|
}
|
|
}
|