initial
This commit is contained in:
241
ts/smartagent.classes.guardianagent.ts
Normal file
241
ts/smartagent.classes.guardianagent.ts
Normal file
@@ -0,0 +1,241 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as interfaces from './smartagent.interfaces.js';
|
||||
import type { BaseToolWrapper } from './smartagent.tools.base.js';
|
||||
|
||||
/**
|
||||
* GuardianAgent - Evaluates tool call proposals against a policy
|
||||
* Uses AI reasoning to approve or reject tool calls
|
||||
*/
|
||||
export class GuardianAgent {
|
||||
private provider: plugins.smartai.MultiModalModel;
|
||||
private policyPrompt: string;
|
||||
private tools: Map<string, BaseToolWrapper> = new Map();
|
||||
|
||||
constructor(
|
||||
provider: plugins.smartai.MultiModalModel,
|
||||
policyPrompt: string
|
||||
) {
|
||||
this.provider = provider;
|
||||
this.policyPrompt = policyPrompt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a tool for reference during evaluation
|
||||
*/
|
||||
public registerTool(tool: BaseToolWrapper): void {
|
||||
this.tools.set(tool.name, tool);
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate a tool call proposal against the policy
|
||||
*/
|
||||
public async evaluate(
|
||||
proposal: interfaces.IToolCallProposal,
|
||||
taskContext: string
|
||||
): Promise<interfaces.IGuardianDecision> {
|
||||
// Get the tool to generate a human-readable summary
|
||||
const tool = this.tools.get(proposal.toolName);
|
||||
let callSummary = `${proposal.toolName}.${proposal.action}(${JSON.stringify(proposal.params)})`;
|
||||
|
||||
if (tool) {
|
||||
try {
|
||||
callSummary = tool.getCallSummary(proposal.action, proposal.params);
|
||||
} catch {
|
||||
// Fallback to basic summary
|
||||
}
|
||||
}
|
||||
|
||||
// Build the evaluation prompt
|
||||
const evaluationPrompt = this.buildEvaluationPrompt(
|
||||
proposal,
|
||||
callSummary,
|
||||
taskContext
|
||||
);
|
||||
|
||||
// Get response from provider
|
||||
const response = await this.provider.chat({
|
||||
systemMessage: this.buildGuardianSystemMessage(),
|
||||
userMessage: evaluationPrompt,
|
||||
messageHistory: [],
|
||||
});
|
||||
|
||||
// Parse the decision from the response
|
||||
return this.parseDecision(response.message, proposal);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the system message for the Guardian
|
||||
*/
|
||||
private buildGuardianSystemMessage(): string {
|
||||
return `You are a Guardian AI responsible for evaluating tool call proposals.
|
||||
|
||||
## Your Role
|
||||
You evaluate whether proposed tool calls are safe and aligned with the policy.
|
||||
|
||||
## Policy to Enforce
|
||||
${this.policyPrompt}
|
||||
|
||||
## Response Format
|
||||
For EVERY evaluation, respond with a decision in this exact format:
|
||||
|
||||
<guardian_decision>
|
||||
<decision>approve OR reject</decision>
|
||||
<reason>Your detailed explanation</reason>
|
||||
<concerns>List any concerns, even if approving</concerns>
|
||||
<suggestions>Alternative approaches if rejecting</suggestions>
|
||||
</guardian_decision>
|
||||
|
||||
## Guidelines
|
||||
1. Carefully analyze what the tool call will do
|
||||
2. Consider security implications
|
||||
3. Check against the policy requirements
|
||||
4. If uncertain, err on the side of caution (reject)
|
||||
5. Provide actionable feedback when rejecting`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the evaluation prompt for a specific proposal
|
||||
*/
|
||||
private buildEvaluationPrompt(
|
||||
proposal: interfaces.IToolCallProposal,
|
||||
callSummary: string,
|
||||
taskContext: string
|
||||
): string {
|
||||
const toolInfo = this.tools.get(proposal.toolName);
|
||||
const toolDescription = toolInfo ? toolInfo.getFullDescription() : 'Unknown tool';
|
||||
|
||||
return `## Task Context
|
||||
${taskContext}
|
||||
|
||||
## Tool Being Used
|
||||
${toolDescription}
|
||||
|
||||
## Proposed Tool Call
|
||||
- **Tool**: ${proposal.toolName}
|
||||
- **Action**: ${proposal.action}
|
||||
- **Parameters**: ${JSON.stringify(proposal.params, null, 2)}
|
||||
|
||||
## Human-Readable Summary
|
||||
${callSummary}
|
||||
|
||||
## Driver's Reasoning
|
||||
${proposal.reasoning || 'No reasoning provided'}
|
||||
|
||||
---
|
||||
|
||||
Evaluate this tool call against the policy. Should it be approved or rejected?`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the guardian decision from the response
|
||||
*/
|
||||
private parseDecision(
|
||||
response: string,
|
||||
proposal: interfaces.IToolCallProposal
|
||||
): interfaces.IGuardianDecision {
|
||||
// Try to extract from XML tags
|
||||
const decisionMatch = response.match(/<decision>(.*?)<\/decision>/s);
|
||||
const reasonMatch = response.match(/<reason>([\s\S]*?)<\/reason>/);
|
||||
const concernsMatch = response.match(/<concerns>([\s\S]*?)<\/concerns>/);
|
||||
const suggestionsMatch = response.match(/<suggestions>([\s\S]*?)<\/suggestions>/);
|
||||
|
||||
// Determine decision
|
||||
let decision: 'approve' | 'reject' = 'reject';
|
||||
if (decisionMatch) {
|
||||
const decisionText = decisionMatch[1].trim().toLowerCase();
|
||||
decision = decisionText.includes('approve') ? 'approve' : 'reject';
|
||||
} else {
|
||||
// Fallback: look for approval keywords in the response
|
||||
const lowerResponse = response.toLowerCase();
|
||||
if (
|
||||
lowerResponse.includes('approved') ||
|
||||
lowerResponse.includes('i approve') ||
|
||||
lowerResponse.includes('looks safe')
|
||||
) {
|
||||
decision = 'approve';
|
||||
}
|
||||
}
|
||||
|
||||
// Extract reason
|
||||
let reason = reasonMatch ? reasonMatch[1].trim() : '';
|
||||
if (!reason) {
|
||||
// Use the full response as reason if no tag found
|
||||
reason = response.substring(0, 500);
|
||||
}
|
||||
|
||||
// Extract concerns
|
||||
const concerns: string[] = [];
|
||||
if (concernsMatch) {
|
||||
const concernsText = concernsMatch[1].trim();
|
||||
if (concernsText && concernsText.toLowerCase() !== 'none') {
|
||||
// Split by newlines or bullet points
|
||||
const concernLines = concernsText.split(/[\n\r]+/).map(l => l.trim()).filter(l => l);
|
||||
concerns.push(...concernLines);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract suggestions
|
||||
const suggestions = suggestionsMatch ? suggestionsMatch[1].trim() : undefined;
|
||||
|
||||
return {
|
||||
decision,
|
||||
reason,
|
||||
concerns: concerns.length > 0 ? concerns : undefined,
|
||||
suggestions: suggestions && suggestions.toLowerCase() !== 'none' ? suggestions : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Quick validation without AI (for obviously safe/unsafe operations)
|
||||
* Returns null if AI evaluation is needed
|
||||
*/
|
||||
public quickValidate(proposal: interfaces.IToolCallProposal): interfaces.IGuardianDecision | null {
|
||||
// Check if tool exists
|
||||
if (!this.tools.has(proposal.toolName)) {
|
||||
return {
|
||||
decision: 'reject',
|
||||
reason: `Unknown tool: ${proposal.toolName}`,
|
||||
};
|
||||
}
|
||||
|
||||
// Check if action exists
|
||||
const tool = this.tools.get(proposal.toolName)!;
|
||||
const validAction = tool.actions.find(a => a.name === proposal.action);
|
||||
if (!validAction) {
|
||||
return {
|
||||
decision: 'reject',
|
||||
reason: `Unknown action "${proposal.action}" for tool "${proposal.toolName}". Available actions: ${tool.actions.map(a => a.name).join(', ')}`,
|
||||
};
|
||||
}
|
||||
|
||||
// Check required parameters
|
||||
const schema = validAction.parameters;
|
||||
if (schema && schema.required && Array.isArray(schema.required)) {
|
||||
for (const requiredParam of schema.required as string[]) {
|
||||
if (!(requiredParam in proposal.params)) {
|
||||
return {
|
||||
decision: 'reject',
|
||||
reason: `Missing required parameter: ${requiredParam}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Needs full AI evaluation
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the policy prompt
|
||||
*/
|
||||
public setPolicy(policyPrompt: string): void {
|
||||
this.policyPrompt = policyPrompt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current policy
|
||||
*/
|
||||
public getPolicy(): string {
|
||||
return this.policyPrompt;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user