smartagent/ts/smartagent.classes.driveragent.ts

import * as plugins from './plugins.js';
import * as interfaces from './smartagent.interfaces.js';
import type { BaseToolWrapper } from './smartagent.tools.base.js';

/**
 * Options for configuring the DriverAgent
 */
export interface IDriverAgentOptions {
  /** Custom system message for the driver */
  systemMessage?: string;
  /** Maximum history messages to pass to API (default: 20). Set to 0 for unlimited. */
  maxHistoryMessages?: number;
  /** Callback fired for each token during LLM generation */
  onToken?: (token: string) => void;
}

/**
 * DriverAgent - Executes tasks by reasoning and proposing tool calls
 * Works in conjunction with GuardianAgent for approval
 */
export class DriverAgent {
  private provider: plugins.smartai.MultiModalModel;
  private systemMessage: string;
  private maxHistoryMessages: number;
  private messageHistory: plugins.smartai.ChatMessage[] = [];
  private tools: Map<string, BaseToolWrapper> = new Map();
  private onToken?: (token: string) => void;

  constructor(
    provider: plugins.smartai.MultiModalModel,
    options?: IDriverAgentOptions | string
  ) {
    this.provider = provider;

    // Support both legacy string systemMessage and new options object
    if (typeof options === 'string') {
      this.systemMessage = options || this.getDefaultSystemMessage();
      this.maxHistoryMessages = 20;
    } else {
      this.systemMessage = options?.systemMessage || this.getDefaultSystemMessage();
      this.maxHistoryMessages = options?.maxHistoryMessages ?? 20;
      this.onToken = options?.onToken;
    }
  }

  /**
   * Set the token callback for streaming mode
   * @param callback Function to call for each generated token
   */
  public setOnToken(callback: (token: string) => void): void {
    this.onToken = callback;
  }

  /**
   * Register a tool for use by the driver
   */
  public registerTool(tool: BaseToolWrapper): void {
    this.tools.set(tool.name, tool);
  }

  /**
   * Get all registered tools
   */
  public getTools(): Map<string, BaseToolWrapper> {
    return this.tools;
  }

  /**
   * Initialize a new conversation for a task
   * @param task The task description
   * @param images Optional base64-encoded images for vision tasks
   */
  public async startTask(task: string, images?: string[]): Promise<interfaces.IAgentMessage> {
    // Reset message history
    this.messageHistory = [];

    // Build the user message based on available tools
    const hasTools = this.tools.size > 0;
    let userMessage: string;
    if (hasTools) {
      userMessage = `TASK: ${task}\n\nAnalyze this task and determine what actions are needed. If you need to use a tool, provide a tool call proposal.`;
    } else {
      userMessage = `TASK: ${task}\n\nComplete this task directly. When done, wrap your final output in <task_complete>your output here</task_complete> tags.`;
    }

    // Add to history
    this.messageHistory.push({
      role: 'user',
      content: userMessage,
    });

    // Build the system message - adapt based on available tools
    let fullSystemMessage: string;
    if (hasTools) {
      const toolDescriptions = this.buildToolDescriptions();
      fullSystemMessage = `${this.systemMessage}\n\n## Available Tools\n${toolDescriptions}`;
    } else {
      // Use a simpler system message when no tools are available
      fullSystemMessage = this.getNoToolsSystemMessage();
    }

    // Get response from provider - use streaming if available and callback is set
    let response: plugins.smartai.ChatResponse;

    if (this.onToken && typeof (this.provider as any).chatStreaming === 'function') {
      // Use streaming mode with token callback
      response = await (this.provider as any).chatStreaming({
        systemMessage: fullSystemMessage,
        userMessage: userMessage,
        messageHistory: [],
        images: images,
        onToken: this.onToken,
      });
    } else {
      // Fallback to non-streaming mode
      response = await this.provider.chat({
        systemMessage: fullSystemMessage,
        userMessage: userMessage,
        messageHistory: [],
        images: images,
      });
    }

    // Add assistant response to history (store images if provided)
    const historyMessage: plugins.smartai.ChatMessage = {
      role: 'assistant',
      content: response.message,
    };
    this.messageHistory.push(historyMessage);

    return {
      role: 'assistant',
      content: response.message,
    };
  }

  /**
   * Continue the conversation with feedback or results
   */
  public async continueWithMessage(message: string): Promise<interfaces.IAgentMessage> {
    // Add the new message to history
    this.messageHistory.push({
      role: 'user',
      content: message,
    });

    // Build the system message - adapt based on available tools
    const hasTools = this.tools.size > 0;
    let fullSystemMessage: string;
    if (hasTools) {
      const toolDescriptions = this.buildToolDescriptions();
      fullSystemMessage = `${this.systemMessage}\n\n## Available Tools\n${toolDescriptions}`;
    } else {
      fullSystemMessage = this.getNoToolsSystemMessage();
    }

    // Get response from provider with history windowing
    // Keep original task and most recent messages to avoid token explosion
    let historyForChat: plugins.smartai.ChatMessage[];
    const fullHistory = this.messageHistory.slice(0, -1); // Exclude the just-added message

    if (this.maxHistoryMessages > 0 && fullHistory.length > this.maxHistoryMessages) {
      // Keep the original task (first message) and most recent messages
      historyForChat = [
        fullHistory[0], // Original task
        ...fullHistory.slice(-(this.maxHistoryMessages - 1)), // Recent messages
      ];
    } else {
      historyForChat = fullHistory;
    }

    // Get response from provider - use streaming if available and callback is set
    let response: plugins.smartai.ChatResponse;

    if (this.onToken && typeof (this.provider as any).chatStreaming === 'function') {
      // Use streaming mode with token callback
      response = await (this.provider as any).chatStreaming({
        systemMessage: fullSystemMessage,
        userMessage: message,
        messageHistory: historyForChat,
        onToken: this.onToken,
      });
    } else {
      // Fallback to non-streaming mode
      response = await this.provider.chat({
        systemMessage: fullSystemMessage,
        userMessage: message,
        messageHistory: historyForChat,
      });
    }

    // Add assistant response to history
    this.messageHistory.push({
      role: 'assistant',
      content: response.message,
    });

    return {
      role: 'assistant',
      content: response.message,
    };
  }

  /**
   * Parse tool call proposals from assistant response
   */
  public parseToolCallProposals(response: string): interfaces.IToolCallProposal[] {
    const proposals: interfaces.IToolCallProposal[] = [];

    // Match <tool_call>...</tool_call> blocks
    const toolCallRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
    let match;

    while ((match = toolCallRegex.exec(response)) !== null) {
      const content = match[1];

      try {
        const proposal = this.parseToolCallContent(content);
        if (proposal) {
          proposals.push(proposal);
        }
      } catch (error) {
        // Skip malformed tool calls
        console.warn('Failed to parse tool call:', error);
      }
    }

    return proposals;
  }

  /**
   * Parse the content inside a tool_call block
   */
  private parseToolCallContent(content: string): interfaces.IToolCallProposal | null {
    // Extract tool name
    const toolMatch = content.match(/<tool>(.*?)<\/tool>/s);
    if (!toolMatch) return null;
    const toolName = toolMatch[1].trim();

    // Extract action
    const actionMatch = content.match(/<action>(.*?)<\/action>/s);
    if (!actionMatch) return null;
    const action = actionMatch[1].trim();

    // Extract params (JSON)
    const paramsMatch = content.match(/<params>([\s\S]*?)<\/params>/);
    let params: Record<string, unknown> = {};
    if (paramsMatch) {
      try {
        params = JSON.parse(paramsMatch[1].trim());
      } catch {
        // Try to extract individual parameters if JSON fails
        params = this.extractParamsFromXml(paramsMatch[1]);
      }
    }

    // Extract reasoning (optional)
    const reasoningMatch = content.match(/<reasoning>([\s\S]*?)<\/reasoning>/);
    const reasoning = reasoningMatch ? reasoningMatch[1].trim() : undefined;

    return {
      proposalId: this.generateProposalId(),
      toolName,
      action,
      params,
      reasoning,
    };
  }

  /**
   * Extract parameters from XML-like format when JSON parsing fails
   */
  private extractParamsFromXml(content: string): Record<string, unknown> {
    const params: Record<string, unknown> = {};
    const paramRegex = /<(\w+)>([\s\S]*?)<\/\1>/g;
    let match;

    while ((match = paramRegex.exec(content)) !== null) {
      const key = match[1];
      let value: unknown = match[2].trim();

      // Try to parse as JSON for arrays/objects
      try {
        value = JSON.parse(value as string);
      } catch {
        // Keep as string if not valid JSON
      }

      params[key] = value;
    }

    return params;
  }

  /**
   * Check if the response indicates task completion
   */
  public isTaskComplete(response: string): boolean {
    // Check for explicit completion markers
    const completionMarkers = [
      '<task_complete>',
      '<task_completed>',
      'TASK COMPLETE',
      'Task completed successfully',
    ];

    const lowerResponse = response.toLowerCase();
    return completionMarkers.some(marker =>
      lowerResponse.includes(marker.toLowerCase())
    );
  }

  /**
   * Check if the response needs clarification or user input
   */
  public needsClarification(response: string): boolean {
    const clarificationMarkers = [
      '<needs_clarification>',
      '<question>',
      'please clarify',
      'could you specify',
      'what do you mean by',
    ];

    const lowerResponse = response.toLowerCase();
    return clarificationMarkers.some(marker =>
      lowerResponse.includes(marker.toLowerCase())
    );
  }

  /**
   * Extract the final result from a completed task
   */
  public extractTaskResult(response: string): string | null {
    // Try to extract from result tags
    const resultMatch = response.match(/<task_result>([\s\S]*?)<\/task_result>/);
    if (resultMatch) {
      return resultMatch[1].trim();
    }

    const completeMatch = response.match(/<task_complete>([\s\S]*?)<\/task_complete>/);
    if (completeMatch) {
      return completeMatch[1].trim();
    }

    return null;
  }

  /**
   * Build tool descriptions for the system message
   */
  private buildToolDescriptions(): string {
    const descriptions: string[] = [];

    for (const tool of this.tools.values()) {
      descriptions.push(tool.getFullDescription());
    }

    return descriptions.join('\n\n');
  }

  /**
   * Generate a unique proposal ID
   */
  private generateProposalId(): string {
    return `prop_${Date.now()}_${Math.random().toString(36).substring(2, 8)}`;
  }

  /**
   * Get the default system message for the driver
   */
  private getDefaultSystemMessage(): string {
    return `You are an AI assistant that executes tasks by using available tools.

## Your Role
You analyze tasks, break them down into steps, and use tools to accomplish goals.

## CRITICAL: Tool Usage Format
To use a tool, you MUST literally write out the XML tags in your response. The system parses your output looking for these exact tags. Do NOT just describe or mention the tool call - you must OUTPUT the actual XML.

CORRECT (the XML is in the output):
<tool_call>
  <tool>json</tool>
  <action>validate</action>
  <params>{"jsonString": "{\\"key\\":\\"value\\"}"}</params>
</tool_call>

WRONG (just describing, no actual XML):
"I will call json.validate now" or "Let me use the tool"

## Guidelines
1. Think step by step about what needs to be done
2. When you need a tool, OUTPUT the <tool_call> XML tags - do not just mention them
3. Only propose ONE tool call at a time
4. Wait for the result before proposing the next action
5. When the task is complete, OUTPUT:

<task_complete>
Your final result here
</task_complete>

## Important
- The <tool_call> and <task_complete> tags MUST appear literally in your response
- If you just say "I'll call the tool" without the actual XML, it will NOT work
- If you need clarification, ask using <needs_clarification>your question</needs_clarification>`;
  }

  /**
   * Get the system message when no tools are available
   * Used for direct task completion without tool usage
   */
  private getNoToolsSystemMessage(): string {
    // Use custom system message if provided, otherwise use a simple default
    if (this.systemMessage && this.systemMessage !== this.getDefaultSystemMessage()) {
      return this.systemMessage;
    }

    return `You are an AI assistant that completes tasks directly.

## Your Role
You analyze tasks and provide complete, high-quality outputs.

## Output Format
When you have completed the task, wrap your final output in task_complete tags:

<task_complete>
Your complete output here
</task_complete>

## Guidelines
1. Analyze the task requirements carefully
2. Provide a complete and accurate response
3. Always wrap your final output in <task_complete></task_complete> tags
4. If you need clarification, ask using <needs_clarification>your question</needs_clarification>`;
  }

  /**
   * Reset the conversation state
   */
  public reset(): void {
    this.messageHistory = [];
  }
}