Files
smartagent/ts/smartagent.classes.driveragent.ts

444 lines
13 KiB
TypeScript
Raw Normal View History

2025-12-02 10:59:09 +00:00
import * as plugins from './plugins.js';
import * as interfaces from './smartagent.interfaces.js';
import type { BaseToolWrapper } from './smartagent.tools.base.js';
2025-12-15 14:49:26 +00:00
/**
* Options for configuring the DriverAgent
*/
export interface IDriverAgentOptions {
/** Custom system message for the driver */
systemMessage?: string;
/** Maximum history messages to pass to API (default: 20). Set to 0 for unlimited. */
maxHistoryMessages?: number;
/** Callback fired for each token during LLM generation */
onToken?: (token: string) => void;
2025-12-15 14:49:26 +00:00
}
2025-12-02 10:59:09 +00:00
/**
* DriverAgent - Executes tasks by reasoning and proposing tool calls
* Works in conjunction with GuardianAgent for approval
*/
export class DriverAgent {
private provider: plugins.smartai.MultiModalModel;
private systemMessage: string;
2025-12-15 14:49:26 +00:00
private maxHistoryMessages: number;
2025-12-02 10:59:09 +00:00
private messageHistory: plugins.smartai.ChatMessage[] = [];
private tools: Map<string, BaseToolWrapper> = new Map();
private onToken?: (token: string) => void;
2025-12-02 10:59:09 +00:00
constructor(
provider: plugins.smartai.MultiModalModel,
2025-12-15 14:49:26 +00:00
options?: IDriverAgentOptions | string
2025-12-02 10:59:09 +00:00
) {
this.provider = provider;
2025-12-15 14:49:26 +00:00
// Support both legacy string systemMessage and new options object
if (typeof options === 'string') {
this.systemMessage = options || this.getDefaultSystemMessage();
this.maxHistoryMessages = 20;
} else {
this.systemMessage = options?.systemMessage || this.getDefaultSystemMessage();
this.maxHistoryMessages = options?.maxHistoryMessages ?? 20;
this.onToken = options?.onToken;
2025-12-15 14:49:26 +00:00
}
2025-12-02 10:59:09 +00:00
}
/**
* Set the token callback for streaming mode
* @param callback Function to call for each generated token
*/
public setOnToken(callback: (token: string) => void): void {
this.onToken = callback;
}
2025-12-02 10:59:09 +00:00
/**
* Register a tool for use by the driver
*/
public registerTool(tool: BaseToolWrapper): void {
this.tools.set(tool.name, tool);
}
/**
* Get all registered tools
*/
public getTools(): Map<string, BaseToolWrapper> {
return this.tools;
}
/**
* Initialize a new conversation for a task
* @param task The task description
* @param images Optional base64-encoded images for vision tasks
2025-12-02 10:59:09 +00:00
*/
public async startTask(task: string, images?: string[]): Promise<interfaces.IAgentMessage> {
2025-12-02 10:59:09 +00:00
// Reset message history
this.messageHistory = [];
2025-12-15 12:37:19 +00:00
// Build the user message based on available tools
const hasTools = this.tools.size > 0;
let userMessage: string;
if (hasTools) {
userMessage = `TASK: ${task}\n\nAnalyze this task and determine what actions are needed. If you need to use a tool, provide a tool call proposal.`;
} else {
userMessage = `TASK: ${task}\n\nComplete this task directly. When done, wrap your final output in <task_complete>your output here</task_complete> tags.`;
}
2025-12-02 10:59:09 +00:00
// Add to history
this.messageHistory.push({
role: 'user',
content: userMessage,
});
2025-12-15 12:37:19 +00:00
// Build the system message - adapt based on available tools
let fullSystemMessage: string;
if (hasTools) {
const toolDescriptions = this.buildToolDescriptions();
fullSystemMessage = `${this.systemMessage}\n\n## Available Tools\n${toolDescriptions}`;
} else {
// Use a simpler system message when no tools are available
fullSystemMessage = this.getNoToolsSystemMessage();
}
2025-12-02 10:59:09 +00:00
// Get response from provider - use streaming if available and callback is set
let response: plugins.smartai.ChatResponse;
if (this.onToken && typeof (this.provider as any).chatStreaming === 'function') {
// Use streaming mode with token callback
response = await (this.provider as any).chatStreaming({
systemMessage: fullSystemMessage,
userMessage: userMessage,
messageHistory: [],
images: images,
onToken: this.onToken,
});
} else {
// Fallback to non-streaming mode
response = await this.provider.chat({
systemMessage: fullSystemMessage,
userMessage: userMessage,
messageHistory: [],
images: images,
});
}
2025-12-02 10:59:09 +00:00
// Add assistant response to history (store images if provided)
const historyMessage: plugins.smartai.ChatMessage = {
2025-12-02 10:59:09 +00:00
role: 'assistant',
content: response.message,
};
this.messageHistory.push(historyMessage);
2025-12-02 10:59:09 +00:00
return {
role: 'assistant',
content: response.message,
};
}
/**
* Continue the conversation with feedback or results
*/
public async continueWithMessage(message: string): Promise<interfaces.IAgentMessage> {
// Add the new message to history
this.messageHistory.push({
role: 'user',
content: message,
});
2025-12-15 12:37:19 +00:00
// Build the system message - adapt based on available tools
const hasTools = this.tools.size > 0;
let fullSystemMessage: string;
if (hasTools) {
const toolDescriptions = this.buildToolDescriptions();
fullSystemMessage = `${this.systemMessage}\n\n## Available Tools\n${toolDescriptions}`;
} else {
fullSystemMessage = this.getNoToolsSystemMessage();
}
2025-12-02 10:59:09 +00:00
2025-12-15 14:49:26 +00:00
// Get response from provider with history windowing
// Keep original task and most recent messages to avoid token explosion
let historyForChat: plugins.smartai.ChatMessage[];
const fullHistory = this.messageHistory.slice(0, -1); // Exclude the just-added message
if (this.maxHistoryMessages > 0 && fullHistory.length > this.maxHistoryMessages) {
// Keep the original task (first message) and most recent messages
historyForChat = [
fullHistory[0], // Original task
...fullHistory.slice(-(this.maxHistoryMessages - 1)), // Recent messages
];
} else {
historyForChat = fullHistory;
}
2025-12-02 10:59:09 +00:00
// Get response from provider - use streaming if available and callback is set
let response: plugins.smartai.ChatResponse;
if (this.onToken && typeof (this.provider as any).chatStreaming === 'function') {
// Use streaming mode with token callback
response = await (this.provider as any).chatStreaming({
systemMessage: fullSystemMessage,
userMessage: message,
messageHistory: historyForChat,
onToken: this.onToken,
});
} else {
// Fallback to non-streaming mode
response = await this.provider.chat({
systemMessage: fullSystemMessage,
userMessage: message,
messageHistory: historyForChat,
});
}
2025-12-02 10:59:09 +00:00
// Add assistant response to history
this.messageHistory.push({
role: 'assistant',
content: response.message,
});
return {
role: 'assistant',
content: response.message,
};
}
/**
* Parse tool call proposals from assistant response
*/
public parseToolCallProposals(response: string): interfaces.IToolCallProposal[] {
const proposals: interfaces.IToolCallProposal[] = [];
// Match <tool_call>...</tool_call> blocks
const toolCallRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
let match;
while ((match = toolCallRegex.exec(response)) !== null) {
const content = match[1];
try {
const proposal = this.parseToolCallContent(content);
if (proposal) {
proposals.push(proposal);
}
} catch (error) {
// Skip malformed tool calls
console.warn('Failed to parse tool call:', error);
}
}
return proposals;
}
/**
* Parse the content inside a tool_call block
*/
private parseToolCallContent(content: string): interfaces.IToolCallProposal | null {
// Extract tool name
const toolMatch = content.match(/<tool>(.*?)<\/tool>/s);
if (!toolMatch) return null;
const toolName = toolMatch[1].trim();
// Extract action
const actionMatch = content.match(/<action>(.*?)<\/action>/s);
if (!actionMatch) return null;
const action = actionMatch[1].trim();
// Extract params (JSON)
const paramsMatch = content.match(/<params>([\s\S]*?)<\/params>/);
let params: Record<string, unknown> = {};
if (paramsMatch) {
try {
params = JSON.parse(paramsMatch[1].trim());
} catch {
// Try to extract individual parameters if JSON fails
params = this.extractParamsFromXml(paramsMatch[1]);
}
}
// Extract reasoning (optional)
const reasoningMatch = content.match(/<reasoning>([\s\S]*?)<\/reasoning>/);
const reasoning = reasoningMatch ? reasoningMatch[1].trim() : undefined;
return {
proposalId: this.generateProposalId(),
toolName,
action,
params,
reasoning,
};
}
/**
* Extract parameters from XML-like format when JSON parsing fails
*/
private extractParamsFromXml(content: string): Record<string, unknown> {
const params: Record<string, unknown> = {};
const paramRegex = /<(\w+)>([\s\S]*?)<\/\1>/g;
let match;
while ((match = paramRegex.exec(content)) !== null) {
const key = match[1];
let value: unknown = match[2].trim();
// Try to parse as JSON for arrays/objects
try {
value = JSON.parse(value as string);
} catch {
// Keep as string if not valid JSON
}
params[key] = value;
}
return params;
}
/**
* Check if the response indicates task completion
*/
public isTaskComplete(response: string): boolean {
// Check for explicit completion markers
const completionMarkers = [
'<task_complete>',
'<task_completed>',
'TASK COMPLETE',
'Task completed successfully',
];
const lowerResponse = response.toLowerCase();
return completionMarkers.some(marker =>
lowerResponse.includes(marker.toLowerCase())
);
}
/**
* Check if the response needs clarification or user input
*/
public needsClarification(response: string): boolean {
const clarificationMarkers = [
'<needs_clarification>',
'<question>',
'please clarify',
'could you specify',
'what do you mean by',
];
const lowerResponse = response.toLowerCase();
return clarificationMarkers.some(marker =>
lowerResponse.includes(marker.toLowerCase())
);
}
/**
* Extract the final result from a completed task
*/
public extractTaskResult(response: string): string | null {
// Try to extract from result tags
const resultMatch = response.match(/<task_result>([\s\S]*?)<\/task_result>/);
if (resultMatch) {
return resultMatch[1].trim();
}
const completeMatch = response.match(/<task_complete>([\s\S]*?)<\/task_complete>/);
if (completeMatch) {
return completeMatch[1].trim();
}
return null;
}
/**
* Build tool descriptions for the system message
*/
private buildToolDescriptions(): string {
const descriptions: string[] = [];
for (const tool of this.tools.values()) {
descriptions.push(tool.getFullDescription());
}
return descriptions.join('\n\n');
}
/**
* Generate a unique proposal ID
*/
private generateProposalId(): string {
return `prop_${Date.now()}_${Math.random().toString(36).substring(2, 8)}`;
}
/**
* Get the default system message for the driver
*/
private getDefaultSystemMessage(): string {
return `You are an AI assistant that executes tasks by using available tools.
## Your Role
You analyze tasks, break them down into steps, and use tools to accomplish goals.
## CRITICAL: Tool Usage Format
To use a tool, you MUST literally write out the XML tags in your response. The system parses your output looking for these exact tags. Do NOT just describe or mention the tool call - you must OUTPUT the actual XML.
2025-12-02 10:59:09 +00:00
CORRECT (the XML is in the output):
2025-12-02 10:59:09 +00:00
<tool_call>
<tool>json</tool>
<action>validate</action>
<params>{"jsonString": "{\\"key\\":\\"value\\"}"}</params>
2025-12-02 10:59:09 +00:00
</tool_call>
WRONG (just describing, no actual XML):
"I will call json.validate now" or "Let me use the tool"
2025-12-02 10:59:09 +00:00
## Guidelines
1. Think step by step about what needs to be done
2. When you need a tool, OUTPUT the <tool_call> XML tags - do not just mention them
3. Only propose ONE tool call at a time
4. Wait for the result before proposing the next action
5. When the task is complete, OUTPUT:
2025-12-02 10:59:09 +00:00
<task_complete>
Your final result here
2025-12-02 10:59:09 +00:00
</task_complete>
## Important
- The <tool_call> and <task_complete> tags MUST appear literally in your response
- If you just say "I'll call the tool" without the actual XML, it will NOT work
2025-12-02 10:59:09 +00:00
- If you need clarification, ask using <needs_clarification>your question</needs_clarification>`;
}
2025-12-15 12:37:19 +00:00
/**
* Get the system message when no tools are available
* Used for direct task completion without tool usage
*/
private getNoToolsSystemMessage(): string {
// Use custom system message if provided, otherwise use a simple default
if (this.systemMessage && this.systemMessage !== this.getDefaultSystemMessage()) {
return this.systemMessage;
}
return `You are an AI assistant that completes tasks directly.
## Your Role
You analyze tasks and provide complete, high-quality outputs.
## Output Format
When you have completed the task, wrap your final output in task_complete tags:
<task_complete>
Your complete output here
</task_complete>
## Guidelines
1. Analyze the task requirements carefully
2. Provide a complete and accurate response
3. Always wrap your final output in <task_complete></task_complete> tags
4. If you need clarification, ask using <needs_clarification>your question</needs_clarification>`;
}
2025-12-02 10:59:09 +00:00
/**
* Reset the conversation state
*/
public reset(): void {
this.messageHistory = [];
}
}