95 lines
4.1 KiB
TypeScript
95 lines
4.1 KiB
TypeScript
import * as plugins from './plugins.js';
|
|
import {
|
|
formatToolOutput,
|
|
type IBrowserToolInput,
|
|
type IToolExecutionContext,
|
|
type TBrowserToolAction,
|
|
} from './tool.context.js';
|
|
|
|
export interface ICreateBrowserToolsOptions {
|
|
/** Maximum output lines before truncating. */
|
|
maxLines?: number;
|
|
/** Maximum output bytes before truncating. */
|
|
maxBytes?: number;
|
|
}
|
|
|
|
const browserActions = new Set<TBrowserToolAction>(['navigate', 'snapshot', 'screenshot', 'click', 'fill', 'press', 'evaluate', 'close']);
|
|
|
|
export function createBrowserTools(context: IToolExecutionContext, options: ICreateBrowserToolsOptions = {}): plugins.ToolSet {
|
|
return {
|
|
browser: plugins.tool({
|
|
description: [
|
|
'Control a browser supplied by the host execution context for web UI inspection and interaction.',
|
|
'Actions: navigate, snapshot, screenshot, click, fill, press, evaluate, close.',
|
|
'Use snapshot after navigation or interaction to inspect page text and interactive selectors before choosing the next action.',
|
|
'Actions that navigate or modify page state require host permission when configured.',
|
|
].join(' '),
|
|
inputSchema: plugins.z.object({
|
|
action: plugins.z.string().default('snapshot').describe('Action: navigate, snapshot, screenshot, click, fill, press, evaluate, or close'),
|
|
url: plugins.z.string().optional().describe('URL for navigate'),
|
|
selector: plugins.z.string().optional().describe('CSS or Playwright selector for click/fill'),
|
|
text: plugins.z.string().optional().describe('Text for fill, key name for press, or screenshot mode/full-page hint'),
|
|
script: plugins.z.string().optional().describe('JavaScript expression or function body for evaluate'),
|
|
timeoutMs: plugins.z.number().optional().describe('Optional action timeout in milliseconds'),
|
|
}),
|
|
execute: async (input: IBrowserToolInput) => {
|
|
if (!context.browser) {
|
|
throw new Error('Browser tool is not available in this execution context.');
|
|
}
|
|
const action = normalizeBrowserAction(input.action);
|
|
await requestBrowserPermission(context, { ...input, action });
|
|
const result = await context.browser.execute({ ...input, action }, {
|
|
timeoutMs: input.timeoutMs,
|
|
abortSignal: context.abortSignal,
|
|
});
|
|
return plugins.truncateOutput(formatToolOutput(result), {
|
|
maxLines: options.maxLines,
|
|
maxBytes: options.maxBytes,
|
|
}).content;
|
|
},
|
|
}),
|
|
};
|
|
}
|
|
|
|
const normalizeBrowserAction = (input: unknown): TBrowserToolAction => {
|
|
const action = typeof input === 'string' && input.trim()
|
|
? input.trim().toLowerCase()
|
|
: 'snapshot';
|
|
if (browserActions.has(action as TBrowserToolAction)) {
|
|
return action as TBrowserToolAction;
|
|
}
|
|
throw new Error(`Unsupported browser action: ${String(input)}. Use one of: ${[...browserActions].join(', ')}.`);
|
|
};
|
|
|
|
const requestBrowserPermission = async (context: IToolExecutionContext, input: IBrowserToolInput & { action: TBrowserToolAction }): Promise<void> => {
|
|
if (!context.requestPermission) return;
|
|
if (input.action === 'snapshot' || input.action === 'screenshot') return;
|
|
const titleByAction: Record<TBrowserToolAction, string> = {
|
|
navigate: 'Navigate browser',
|
|
snapshot: 'Inspect browser',
|
|
screenshot: 'Capture browser screenshot',
|
|
click: 'Click browser element',
|
|
fill: 'Fill browser element',
|
|
press: 'Press browser key',
|
|
evaluate: 'Evaluate browser JavaScript',
|
|
close: 'Close browser session',
|
|
};
|
|
await context.requestPermission({
|
|
type: 'browser',
|
|
title: titleByAction[input.action],
|
|
metadata: {
|
|
action: input.action,
|
|
url: input.url,
|
|
selector: input.selector,
|
|
key: input.action === 'press' ? input.text : undefined,
|
|
textLength: input.action === 'fill' ? input.text?.length ?? 0 : undefined,
|
|
scriptPreview: input.action === 'evaluate' && input.script ? compactMetadataText(input.script) : undefined,
|
|
},
|
|
});
|
|
};
|
|
|
|
const compactMetadataText = (text: string): string => {
|
|
const compacted = text.replace(/\s+/g, ' ').trim();
|
|
return compacted.length > 160 ? `${compacted.slice(0, 157)}...` : compacted;
|
|
};
|