Files
smartagent/ts/smartagent.classes.guardianagent.ts
2025-12-02 10:59:09 +00:00

242 lines
7.0 KiB
TypeScript

import * as plugins from './plugins.js';
import * as interfaces from './smartagent.interfaces.js';
import type { BaseToolWrapper } from './smartagent.tools.base.js';
/**
* GuardianAgent - Evaluates tool call proposals against a policy
* Uses AI reasoning to approve or reject tool calls
*/
export class GuardianAgent {
private provider: plugins.smartai.MultiModalModel;
private policyPrompt: string;
private tools: Map<string, BaseToolWrapper> = new Map();
constructor(
provider: plugins.smartai.MultiModalModel,
policyPrompt: string
) {
this.provider = provider;
this.policyPrompt = policyPrompt;
}
/**
* Register a tool for reference during evaluation
*/
public registerTool(tool: BaseToolWrapper): void {
this.tools.set(tool.name, tool);
}
/**
* Evaluate a tool call proposal against the policy
*/
public async evaluate(
proposal: interfaces.IToolCallProposal,
taskContext: string
): Promise<interfaces.IGuardianDecision> {
// Get the tool to generate a human-readable summary
const tool = this.tools.get(proposal.toolName);
let callSummary = `${proposal.toolName}.${proposal.action}(${JSON.stringify(proposal.params)})`;
if (tool) {
try {
callSummary = tool.getCallSummary(proposal.action, proposal.params);
} catch {
// Fallback to basic summary
}
}
// Build the evaluation prompt
const evaluationPrompt = this.buildEvaluationPrompt(
proposal,
callSummary,
taskContext
);
// Get response from provider
const response = await this.provider.chat({
systemMessage: this.buildGuardianSystemMessage(),
userMessage: evaluationPrompt,
messageHistory: [],
});
// Parse the decision from the response
return this.parseDecision(response.message, proposal);
}
/**
* Build the system message for the Guardian
*/
private buildGuardianSystemMessage(): string {
return `You are a Guardian AI responsible for evaluating tool call proposals.
## Your Role
You evaluate whether proposed tool calls are safe and aligned with the policy.
## Policy to Enforce
${this.policyPrompt}
## Response Format
For EVERY evaluation, respond with a decision in this exact format:
<guardian_decision>
<decision>approve OR reject</decision>
<reason>Your detailed explanation</reason>
<concerns>List any concerns, even if approving</concerns>
<suggestions>Alternative approaches if rejecting</suggestions>
</guardian_decision>
## Guidelines
1. Carefully analyze what the tool call will do
2. Consider security implications
3. Check against the policy requirements
4. If uncertain, err on the side of caution (reject)
5. Provide actionable feedback when rejecting`;
}
/**
* Build the evaluation prompt for a specific proposal
*/
private buildEvaluationPrompt(
proposal: interfaces.IToolCallProposal,
callSummary: string,
taskContext: string
): string {
const toolInfo = this.tools.get(proposal.toolName);
const toolDescription = toolInfo ? toolInfo.getFullDescription() : 'Unknown tool';
return `## Task Context
${taskContext}
## Tool Being Used
${toolDescription}
## Proposed Tool Call
- **Tool**: ${proposal.toolName}
- **Action**: ${proposal.action}
- **Parameters**: ${JSON.stringify(proposal.params, null, 2)}
## Human-Readable Summary
${callSummary}
## Driver's Reasoning
${proposal.reasoning || 'No reasoning provided'}
---
Evaluate this tool call against the policy. Should it be approved or rejected?`;
}
/**
* Parse the guardian decision from the response
*/
private parseDecision(
response: string,
proposal: interfaces.IToolCallProposal
): interfaces.IGuardianDecision {
// Try to extract from XML tags
const decisionMatch = response.match(/<decision>(.*?)<\/decision>/s);
const reasonMatch = response.match(/<reason>([\s\S]*?)<\/reason>/);
const concernsMatch = response.match(/<concerns>([\s\S]*?)<\/concerns>/);
const suggestionsMatch = response.match(/<suggestions>([\s\S]*?)<\/suggestions>/);
// Determine decision
let decision: 'approve' | 'reject' = 'reject';
if (decisionMatch) {
const decisionText = decisionMatch[1].trim().toLowerCase();
decision = decisionText.includes('approve') ? 'approve' : 'reject';
} else {
// Fallback: look for approval keywords in the response
const lowerResponse = response.toLowerCase();
if (
lowerResponse.includes('approved') ||
lowerResponse.includes('i approve') ||
lowerResponse.includes('looks safe')
) {
decision = 'approve';
}
}
// Extract reason
let reason = reasonMatch ? reasonMatch[1].trim() : '';
if (!reason) {
// Use the full response as reason if no tag found
reason = response.substring(0, 500);
}
// Extract concerns
const concerns: string[] = [];
if (concernsMatch) {
const concernsText = concernsMatch[1].trim();
if (concernsText && concernsText.toLowerCase() !== 'none') {
// Split by newlines or bullet points
const concernLines = concernsText.split(/[\n\r]+/).map(l => l.trim()).filter(l => l);
concerns.push(...concernLines);
}
}
// Extract suggestions
const suggestions = suggestionsMatch ? suggestionsMatch[1].trim() : undefined;
return {
decision,
reason,
concerns: concerns.length > 0 ? concerns : undefined,
suggestions: suggestions && suggestions.toLowerCase() !== 'none' ? suggestions : undefined,
};
}
/**
* Quick validation without AI (for obviously safe/unsafe operations)
* Returns null if AI evaluation is needed
*/
public quickValidate(proposal: interfaces.IToolCallProposal): interfaces.IGuardianDecision | null {
// Check if tool exists
if (!this.tools.has(proposal.toolName)) {
return {
decision: 'reject',
reason: `Unknown tool: ${proposal.toolName}`,
};
}
// Check if action exists
const tool = this.tools.get(proposal.toolName)!;
const validAction = tool.actions.find(a => a.name === proposal.action);
if (!validAction) {
return {
decision: 'reject',
reason: `Unknown action "${proposal.action}" for tool "${proposal.toolName}". Available actions: ${tool.actions.map(a => a.name).join(', ')}`,
};
}
// Check required parameters
const schema = validAction.parameters;
if (schema && schema.required && Array.isArray(schema.required)) {
for (const requiredParam of schema.required as string[]) {
if (!(requiredParam in proposal.params)) {
return {
decision: 'reject',
reason: `Missing required parameter: ${requiredParam}`,
};
}
}
}
// Needs full AI evaluation
return null;
}
/**
* Update the policy prompt
*/
public setPolicy(policyPrompt: string): void {
this.policyPrompt = policyPrompt;
}
/**
* Get current policy
*/
public getPolicy(): string {
return this.policyPrompt;
}
}