Files
smartagent/ts_tools/tool.browser.ts
T

95 lines
4.1 KiB
TypeScript
Raw Normal View History

import * as plugins from './plugins.js';
import {
formatToolOutput,
type IBrowserToolInput,
type IToolExecutionContext,
type TBrowserToolAction,
} from './tool.context.js';
export interface ICreateBrowserToolsOptions {
/** Maximum output lines before truncating. */
maxLines?: number;
/** Maximum output bytes before truncating. */
maxBytes?: number;
}
const browserActions = new Set<TBrowserToolAction>(['navigate', 'snapshot', 'screenshot', 'click', 'fill', 'press', 'evaluate', 'close']);
export function createBrowserTools(context: IToolExecutionContext, options: ICreateBrowserToolsOptions = {}): plugins.ToolSet {
return {
browser: plugins.tool({
description: [
'Control a browser supplied by the host execution context for web UI inspection and interaction.',
'Actions: navigate, snapshot, screenshot, click, fill, press, evaluate, close.',
'Use snapshot after navigation or interaction to inspect page text and interactive selectors before choosing the next action.',
'Actions that navigate or modify page state require host permission when configured.',
].join(' '),
inputSchema: plugins.z.object({
action: plugins.z.string().default('snapshot').describe('Action: navigate, snapshot, screenshot, click, fill, press, evaluate, or close'),
url: plugins.z.string().optional().describe('URL for navigate'),
selector: plugins.z.string().optional().describe('CSS or Playwright selector for click/fill'),
text: plugins.z.string().optional().describe('Text for fill, key name for press, or screenshot mode/full-page hint'),
script: plugins.z.string().optional().describe('JavaScript expression or function body for evaluate'),
timeoutMs: plugins.z.number().optional().describe('Optional action timeout in milliseconds'),
}),
execute: async (input: IBrowserToolInput) => {
if (!context.browser) {
throw new Error('Browser tool is not available in this execution context.');
}
const action = normalizeBrowserAction(input.action);
await requestBrowserPermission(context, { ...input, action });
const result = await context.browser.execute({ ...input, action }, {
timeoutMs: input.timeoutMs,
abortSignal: context.abortSignal,
});
return plugins.truncateOutput(formatToolOutput(result), {
maxLines: options.maxLines,
maxBytes: options.maxBytes,
}).content;
},
}),
};
}
const normalizeBrowserAction = (input: unknown): TBrowserToolAction => {
const action = typeof input === 'string' && input.trim()
? input.trim().toLowerCase()
: 'snapshot';
if (browserActions.has(action as TBrowserToolAction)) {
return action as TBrowserToolAction;
}
throw new Error(`Unsupported browser action: ${String(input)}. Use one of: ${[...browserActions].join(', ')}.`);
};
const requestBrowserPermission = async (context: IToolExecutionContext, input: IBrowserToolInput & { action: TBrowserToolAction }): Promise<void> => {
if (!context.requestPermission) return;
if (input.action === 'snapshot' || input.action === 'screenshot') return;
const titleByAction: Record<TBrowserToolAction, string> = {
navigate: 'Navigate browser',
snapshot: 'Inspect browser',
screenshot: 'Capture browser screenshot',
click: 'Click browser element',
fill: 'Fill browser element',
press: 'Press browser key',
evaluate: 'Evaluate browser JavaScript',
close: 'Close browser session',
};
await context.requestPermission({
type: 'browser',
title: titleByAction[input.action],
metadata: {
action: input.action,
url: input.url,
selector: input.selector,
key: input.action === 'press' ? input.text : undefined,
textLength: input.action === 'fill' ? input.text?.length ?? 0 : undefined,
scriptPreview: input.action === 'evaluate' && input.script ? compactMetadataText(input.script) : undefined,
},
});
};
const compactMetadataText = (text: string): string => {
const compacted = text.replace(/\s+/g, ' ').trim();
return compacted.length > 160 ? `${compacted.slice(0, 157)}...` : compacted;
};