242 lines
7.0 KiB
TypeScript
242 lines
7.0 KiB
TypeScript
|
|
import * as plugins from './plugins.js';
|
||
|
|
import * as interfaces from './smartagent.interfaces.js';
|
||
|
|
import type { BaseToolWrapper } from './smartagent.tools.base.js';
|
||
|
|
|
||
|
|
/**
|
||
|
|
* GuardianAgent - Evaluates tool call proposals against a policy
|
||
|
|
* Uses AI reasoning to approve or reject tool calls
|
||
|
|
*/
|
||
|
|
export class GuardianAgent {
|
||
|
|
private provider: plugins.smartai.MultiModalModel;
|
||
|
|
private policyPrompt: string;
|
||
|
|
private tools: Map<string, BaseToolWrapper> = new Map();
|
||
|
|
|
||
|
|
constructor(
|
||
|
|
provider: plugins.smartai.MultiModalModel,
|
||
|
|
policyPrompt: string
|
||
|
|
) {
|
||
|
|
this.provider = provider;
|
||
|
|
this.policyPrompt = policyPrompt;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Register a tool for reference during evaluation
|
||
|
|
*/
|
||
|
|
public registerTool(tool: BaseToolWrapper): void {
|
||
|
|
this.tools.set(tool.name, tool);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Evaluate a tool call proposal against the policy
|
||
|
|
*/
|
||
|
|
public async evaluate(
|
||
|
|
proposal: interfaces.IToolCallProposal,
|
||
|
|
taskContext: string
|
||
|
|
): Promise<interfaces.IGuardianDecision> {
|
||
|
|
// Get the tool to generate a human-readable summary
|
||
|
|
const tool = this.tools.get(proposal.toolName);
|
||
|
|
let callSummary = `${proposal.toolName}.${proposal.action}(${JSON.stringify(proposal.params)})`;
|
||
|
|
|
||
|
|
if (tool) {
|
||
|
|
try {
|
||
|
|
callSummary = tool.getCallSummary(proposal.action, proposal.params);
|
||
|
|
} catch {
|
||
|
|
// Fallback to basic summary
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Build the evaluation prompt
|
||
|
|
const evaluationPrompt = this.buildEvaluationPrompt(
|
||
|
|
proposal,
|
||
|
|
callSummary,
|
||
|
|
taskContext
|
||
|
|
);
|
||
|
|
|
||
|
|
// Get response from provider
|
||
|
|
const response = await this.provider.chat({
|
||
|
|
systemMessage: this.buildGuardianSystemMessage(),
|
||
|
|
userMessage: evaluationPrompt,
|
||
|
|
messageHistory: [],
|
||
|
|
});
|
||
|
|
|
||
|
|
// Parse the decision from the response
|
||
|
|
return this.parseDecision(response.message, proposal);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Build the system message for the Guardian
|
||
|
|
*/
|
||
|
|
private buildGuardianSystemMessage(): string {
|
||
|
|
return `You are a Guardian AI responsible for evaluating tool call proposals.
|
||
|
|
|
||
|
|
## Your Role
|
||
|
|
You evaluate whether proposed tool calls are safe and aligned with the policy.
|
||
|
|
|
||
|
|
## Policy to Enforce
|
||
|
|
${this.policyPrompt}
|
||
|
|
|
||
|
|
## Response Format
|
||
|
|
For EVERY evaluation, respond with a decision in this exact format:
|
||
|
|
|
||
|
|
<guardian_decision>
|
||
|
|
<decision>approve OR reject</decision>
|
||
|
|
<reason>Your detailed explanation</reason>
|
||
|
|
<concerns>List any concerns, even if approving</concerns>
|
||
|
|
<suggestions>Alternative approaches if rejecting</suggestions>
|
||
|
|
</guardian_decision>
|
||
|
|
|
||
|
|
## Guidelines
|
||
|
|
1. Carefully analyze what the tool call will do
|
||
|
|
2. Consider security implications
|
||
|
|
3. Check against the policy requirements
|
||
|
|
4. If uncertain, err on the side of caution (reject)
|
||
|
|
5. Provide actionable feedback when rejecting`;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Build the evaluation prompt for a specific proposal
|
||
|
|
*/
|
||
|
|
private buildEvaluationPrompt(
|
||
|
|
proposal: interfaces.IToolCallProposal,
|
||
|
|
callSummary: string,
|
||
|
|
taskContext: string
|
||
|
|
): string {
|
||
|
|
const toolInfo = this.tools.get(proposal.toolName);
|
||
|
|
const toolDescription = toolInfo ? toolInfo.getFullDescription() : 'Unknown tool';
|
||
|
|
|
||
|
|
return `## Task Context
|
||
|
|
${taskContext}
|
||
|
|
|
||
|
|
## Tool Being Used
|
||
|
|
${toolDescription}
|
||
|
|
|
||
|
|
## Proposed Tool Call
|
||
|
|
- **Tool**: ${proposal.toolName}
|
||
|
|
- **Action**: ${proposal.action}
|
||
|
|
- **Parameters**: ${JSON.stringify(proposal.params, null, 2)}
|
||
|
|
|
||
|
|
## Human-Readable Summary
|
||
|
|
${callSummary}
|
||
|
|
|
||
|
|
## Driver's Reasoning
|
||
|
|
${proposal.reasoning || 'No reasoning provided'}
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
Evaluate this tool call against the policy. Should it be approved or rejected?`;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Parse the guardian decision from the response
|
||
|
|
*/
|
||
|
|
private parseDecision(
|
||
|
|
response: string,
|
||
|
|
proposal: interfaces.IToolCallProposal
|
||
|
|
): interfaces.IGuardianDecision {
|
||
|
|
// Try to extract from XML tags
|
||
|
|
const decisionMatch = response.match(/<decision>(.*?)<\/decision>/s);
|
||
|
|
const reasonMatch = response.match(/<reason>([\s\S]*?)<\/reason>/);
|
||
|
|
const concernsMatch = response.match(/<concerns>([\s\S]*?)<\/concerns>/);
|
||
|
|
const suggestionsMatch = response.match(/<suggestions>([\s\S]*?)<\/suggestions>/);
|
||
|
|
|
||
|
|
// Determine decision
|
||
|
|
let decision: 'approve' | 'reject' = 'reject';
|
||
|
|
if (decisionMatch) {
|
||
|
|
const decisionText = decisionMatch[1].trim().toLowerCase();
|
||
|
|
decision = decisionText.includes('approve') ? 'approve' : 'reject';
|
||
|
|
} else {
|
||
|
|
// Fallback: look for approval keywords in the response
|
||
|
|
const lowerResponse = response.toLowerCase();
|
||
|
|
if (
|
||
|
|
lowerResponse.includes('approved') ||
|
||
|
|
lowerResponse.includes('i approve') ||
|
||
|
|
lowerResponse.includes('looks safe')
|
||
|
|
) {
|
||
|
|
decision = 'approve';
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Extract reason
|
||
|
|
let reason = reasonMatch ? reasonMatch[1].trim() : '';
|
||
|
|
if (!reason) {
|
||
|
|
// Use the full response as reason if no tag found
|
||
|
|
reason = response.substring(0, 500);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Extract concerns
|
||
|
|
const concerns: string[] = [];
|
||
|
|
if (concernsMatch) {
|
||
|
|
const concernsText = concernsMatch[1].trim();
|
||
|
|
if (concernsText && concernsText.toLowerCase() !== 'none') {
|
||
|
|
// Split by newlines or bullet points
|
||
|
|
const concernLines = concernsText.split(/[\n\r]+/).map(l => l.trim()).filter(l => l);
|
||
|
|
concerns.push(...concernLines);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Extract suggestions
|
||
|
|
const suggestions = suggestionsMatch ? suggestionsMatch[1].trim() : undefined;
|
||
|
|
|
||
|
|
return {
|
||
|
|
decision,
|
||
|
|
reason,
|
||
|
|
concerns: concerns.length > 0 ? concerns : undefined,
|
||
|
|
suggestions: suggestions && suggestions.toLowerCase() !== 'none' ? suggestions : undefined,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Quick validation without AI (for obviously safe/unsafe operations)
|
||
|
|
* Returns null if AI evaluation is needed
|
||
|
|
*/
|
||
|
|
public quickValidate(proposal: interfaces.IToolCallProposal): interfaces.IGuardianDecision | null {
|
||
|
|
// Check if tool exists
|
||
|
|
if (!this.tools.has(proposal.toolName)) {
|
||
|
|
return {
|
||
|
|
decision: 'reject',
|
||
|
|
reason: `Unknown tool: ${proposal.toolName}`,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
// Check if action exists
|
||
|
|
const tool = this.tools.get(proposal.toolName)!;
|
||
|
|
const validAction = tool.actions.find(a => a.name === proposal.action);
|
||
|
|
if (!validAction) {
|
||
|
|
return {
|
||
|
|
decision: 'reject',
|
||
|
|
reason: `Unknown action "${proposal.action}" for tool "${proposal.toolName}". Available actions: ${tool.actions.map(a => a.name).join(', ')}`,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
// Check required parameters
|
||
|
|
const schema = validAction.parameters;
|
||
|
|
if (schema && schema.required && Array.isArray(schema.required)) {
|
||
|
|
for (const requiredParam of schema.required as string[]) {
|
||
|
|
if (!(requiredParam in proposal.params)) {
|
||
|
|
return {
|
||
|
|
decision: 'reject',
|
||
|
|
reason: `Missing required parameter: ${requiredParam}`,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Needs full AI evaluation
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Update the policy prompt
|
||
|
|
*/
|
||
|
|
public setPolicy(policyPrompt: string): void {
|
||
|
|
this.policyPrompt = policyPrompt;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Get current policy
|
||
|
|
*/
|
||
|
|
public getPolicy(): string {
|
||
|
|
return this.policyPrompt;
|
||
|
|
}
|
||
|
|
}
|