initial

2025-12-02 10:59:09 +00:00
commit c1317712a9
23 changed files with 11942 additions and 0 deletions
--- a/ts/smartagent.classes.guardianagent.ts
+++ b/ts/smartagent.classes.guardianagent.ts
@@ -0,0 +1,241 @@
+import * as plugins from './plugins.js';
+import * as interfaces from './smartagent.interfaces.js';
+import type { BaseToolWrapper } from './smartagent.tools.base.js';
+
+/**
+ * GuardianAgent - Evaluates tool call proposals against a policy
+ * Uses AI reasoning to approve or reject tool calls
+ */
+export class GuardianAgent {
+  private provider: plugins.smartai.MultiModalModel;
+  private policyPrompt: string;
+  private tools: Map<string, BaseToolWrapper> = new Map();
+
+  constructor(
+    provider: plugins.smartai.MultiModalModel,
+    policyPrompt: string
+  ) {
+    this.provider = provider;
+    this.policyPrompt = policyPrompt;
+  }
+
+  /**
+   * Register a tool for reference during evaluation
+   */
+  public registerTool(tool: BaseToolWrapper): void {
+    this.tools.set(tool.name, tool);
+  }
+
+  /**
+   * Evaluate a tool call proposal against the policy
+   */
+  public async evaluate(
+    proposal: interfaces.IToolCallProposal,
+    taskContext: string
+  ): Promise<interfaces.IGuardianDecision> {
+    // Get the tool to generate a human-readable summary
+    const tool = this.tools.get(proposal.toolName);
+    let callSummary = `${proposal.toolName}.${proposal.action}(${JSON.stringify(proposal.params)})`;
+
+    if (tool) {
+      try {
+        callSummary = tool.getCallSummary(proposal.action, proposal.params);
+      } catch {
+        // Fallback to basic summary
+      }
+    }
+
+    // Build the evaluation prompt
+    const evaluationPrompt = this.buildEvaluationPrompt(
+      proposal,
+      callSummary,
+      taskContext
+    );
+
+    // Get response from provider
+    const response = await this.provider.chat({
+      systemMessage: this.buildGuardianSystemMessage(),
+      userMessage: evaluationPrompt,
+      messageHistory: [],
+    });
+
+    // Parse the decision from the response
+    return this.parseDecision(response.message, proposal);
+  }
+
+  /**
+   * Build the system message for the Guardian
+   */
+  private buildGuardianSystemMessage(): string {
+    return `You are a Guardian AI responsible for evaluating tool call proposals.
+
+## Your Role
+You evaluate whether proposed tool calls are safe and aligned with the policy.
+
+## Policy to Enforce
+${this.policyPrompt}
+
+## Response Format
+For EVERY evaluation, respond with a decision in this exact format:
+
+<guardian_decision>
+  <decision>approve OR reject</decision>
+  <reason>Your detailed explanation</reason>
+  <concerns>List any concerns, even if approving</concerns>
+  <suggestions>Alternative approaches if rejecting</suggestions>
+</guardian_decision>
+
+## Guidelines
+1. Carefully analyze what the tool call will do
+2. Consider security implications
+3. Check against the policy requirements
+4. If uncertain, err on the side of caution (reject)
+5. Provide actionable feedback when rejecting`;
+  }
+
+  /**
+   * Build the evaluation prompt for a specific proposal
+   */
+  private buildEvaluationPrompt(
+    proposal: interfaces.IToolCallProposal,
+    callSummary: string,
+    taskContext: string
+  ): string {
+    const toolInfo = this.tools.get(proposal.toolName);
+    const toolDescription = toolInfo ? toolInfo.getFullDescription() : 'Unknown tool';
+
+    return `## Task Context
+${taskContext}
+
+## Tool Being Used
+${toolDescription}
+
+## Proposed Tool Call
+- **Tool**: ${proposal.toolName}
+- **Action**: ${proposal.action}
+- **Parameters**: ${JSON.stringify(proposal.params, null, 2)}
+
+## Human-Readable Summary
+${callSummary}
+
+## Driver's Reasoning
+${proposal.reasoning || 'No reasoning provided'}
+
+---
+
+Evaluate this tool call against the policy. Should it be approved or rejected?`;
+  }
+
+  /**
+   * Parse the guardian decision from the response
+   */
+  private parseDecision(
+    response: string,
+    proposal: interfaces.IToolCallProposal
+  ): interfaces.IGuardianDecision {
+    // Try to extract from XML tags
+    const decisionMatch = response.match(/<decision>(.*?)<\/decision>/s);
+    const reasonMatch = response.match(/<reason>([\s\S]*?)<\/reason>/);
+    const concernsMatch = response.match(/<concerns>([\s\S]*?)<\/concerns>/);
+    const suggestionsMatch = response.match(/<suggestions>([\s\S]*?)<\/suggestions>/);
+
+    // Determine decision
+    let decision: 'approve' | 'reject' = 'reject';
+    if (decisionMatch) {
+      const decisionText = decisionMatch[1].trim().toLowerCase();
+      decision = decisionText.includes('approve') ? 'approve' : 'reject';
+    } else {
+      // Fallback: look for approval keywords in the response
+      const lowerResponse = response.toLowerCase();
+      if (
+        lowerResponse.includes('approved') ||
+        lowerResponse.includes('i approve') ||
+        lowerResponse.includes('looks safe')
+      ) {
+        decision = 'approve';
+      }
+    }
+
+    // Extract reason
+    let reason = reasonMatch ? reasonMatch[1].trim() : '';
+    if (!reason) {
+      // Use the full response as reason if no tag found
+      reason = response.substring(0, 500);
+    }
+
+    // Extract concerns
+    const concerns: string[] = [];
+    if (concernsMatch) {
+      const concernsText = concernsMatch[1].trim();
+      if (concernsText && concernsText.toLowerCase() !== 'none') {
+        // Split by newlines or bullet points
+        const concernLines = concernsText.split(/[\n\r]+/).map(l => l.trim()).filter(l => l);
+        concerns.push(...concernLines);
+      }
+    }
+
+    // Extract suggestions
+    const suggestions = suggestionsMatch ? suggestionsMatch[1].trim() : undefined;
+
+    return {
+      decision,
+      reason,
+      concerns: concerns.length > 0 ? concerns : undefined,
+      suggestions: suggestions && suggestions.toLowerCase() !== 'none' ? suggestions : undefined,
+    };
+  }
+
+  /**
+   * Quick validation without AI (for obviously safe/unsafe operations)
+   * Returns null if AI evaluation is needed
+   */
+  public quickValidate(proposal: interfaces.IToolCallProposal): interfaces.IGuardianDecision | null {
+    // Check if tool exists
+    if (!this.tools.has(proposal.toolName)) {
+      return {
+        decision: 'reject',
+        reason: `Unknown tool: ${proposal.toolName}`,
+      };
+    }
+
+    // Check if action exists
+    const tool = this.tools.get(proposal.toolName)!;
+    const validAction = tool.actions.find(a => a.name === proposal.action);
+    if (!validAction) {
+      return {
+        decision: 'reject',
+        reason: `Unknown action "${proposal.action}" for tool "${proposal.toolName}". Available actions: ${tool.actions.map(a => a.name).join(', ')}`,
+      };
+    }
+
+    // Check required parameters
+    const schema = validAction.parameters;
+    if (schema && schema.required && Array.isArray(schema.required)) {
+      for (const requiredParam of schema.required as string[]) {
+        if (!(requiredParam in proposal.params)) {
+          return {
+            decision: 'reject',
+            reason: `Missing required parameter: ${requiredParam}`,
+          };
+        }
+      }
+    }
+
+    // Needs full AI evaluation
+    return null;
+  }
+
+  /**
+   * Update the policy prompt
+   */
+  public setPolicy(policyPrompt: string): void {
+    this.policyPrompt = policyPrompt;
+  }
+
+  /**
+   * Get current policy
+   */
+  public getPolicy(): string {
+    return this.policyPrompt;
+  }
+}