Initialize smartkvm package

This commit is contained in:
2026-05-16 13:41:55 +00:00
commit 8588c6c70d
18 changed files with 8751 additions and 0 deletions
+5
View File
@@ -0,0 +1,5 @@
export * from './smartkvm.interfaces.js';
export * from './smartkvm.classes.browserkvm.js';
export * from './smartkvm.classes.kvmterminal.js';
export * from './smartkvm.commandwrappers.js';
export * from './smartkvm.tools.smartagent.js';
+9
View File
@@ -0,0 +1,9 @@
// node native scope
import * as crypto from 'node:crypto';
export { crypto };
// third party scope
import puppeteer from 'puppeteer';
export { puppeteer };
+335
View File
@@ -0,0 +1,335 @@
import * as plugins from './plugins.js';
import type {
IBrowserKvmOptions,
IKvmDriver,
IKvmFrame,
IKvmTypeTextOptions,
TKvmKey,
TKvmKind,
} from './smartkvm.interfaces.js';
const defaultViewerSelector = 'video, canvas';
interface ICaptureMediaResult {
captured: boolean;
width?: number;
height?: number;
dataBase64?: string;
error?: string;
hasMediaElement?: boolean;
mediaHasFrame?: boolean;
}
export class SmartBrowserKvm implements IKvmDriver {
public readonly kind: TKvmKind;
private options: IBrowserKvmOptions;
private browser?: plugins.puppeteer.Browser;
private page?: plugins.puppeteer.Page;
constructor(options: IBrowserKvmOptions) {
this.options = options;
this.kind = options.kind ?? 'generic';
}
public async connect(): Promise<void> {
if (this.browser && this.page) {
return;
}
const timeoutMs = this.options.timeoutMs ?? 30000;
const args: string[] = [];
if (process.env.CI || process.getuid?.() === 0) {
args.push('--no-sandbox', '--disable-setuid-sandbox');
}
this.browser = await plugins.puppeteer.launch({
args,
acceptInsecureCerts: this.options.ignoreHttpsErrors ?? false,
defaultViewport: null,
executablePath: this.options.executablePath,
headless: this.options.headless ?? true,
timeout: timeoutMs,
userDataDir: this.options.userDataDir,
});
this.page = await this.browser.newPage();
this.page.setDefaultTimeout(timeoutMs);
this.page.setDefaultNavigationTimeout(timeoutMs);
await this.page.goto(this.options.url, {
waitUntil: 'domcontentloaded',
timeout: timeoutMs,
});
await this.tryGenericLogin();
await this.waitForViewerReady();
await this.focusViewer();
}
public async disconnect(): Promise<void> {
const browser = this.browser;
this.page = undefined;
this.browser = undefined;
if (browser) {
await browser.close();
}
}
public async focusViewer(): Promise<void> {
const page = this.requirePage();
const viewerSelector = this.getViewerSelector();
const viewerElement = await page.$(viewerSelector);
if (!viewerElement) {
throw new Error(`KVM viewer selector missing: ${viewerSelector}`);
}
await viewerElement.click();
}
public async captureFrame(): Promise<IKvmFrame> {
const page = this.requirePage();
const captureSelector = this.getCaptureSelector();
const captureElement = await page.$(captureSelector);
if (!captureElement) {
throw new Error(`KVM capture selector missing: ${captureSelector}`);
}
const mediaResult = await page.evaluate((selector: string): ICaptureMediaResult => {
const rootElement = document.querySelector(selector);
if (!rootElement) {
return {
captured: false,
error: `KVM capture selector missing: ${selector}`,
};
}
const mediaElement = (rootElement.matches('video, canvas')
? rootElement
: rootElement.querySelector('video, canvas')) as HTMLVideoElement | HTMLCanvasElement | null;
if (!mediaElement) {
return {
captured: false,
hasMediaElement: false,
};
}
let width = 0;
let height = 0;
if (mediaElement instanceof HTMLVideoElement) {
width = mediaElement.videoWidth;
height = mediaElement.videoHeight;
} else if (mediaElement instanceof HTMLCanvasElement) {
width = mediaElement.width;
height = mediaElement.height;
}
if (width <= 0 || height <= 0) {
return {
captured: false,
hasMediaElement: true,
mediaHasFrame: false,
};
}
try {
const canvasElement = document.createElement('canvas');
canvasElement.width = width;
canvasElement.height = height;
const context = canvasElement.getContext('2d');
if (!context) {
return {
captured: false,
hasMediaElement: true,
mediaHasFrame: true,
error: 'Could not create canvas 2D context for KVM frame capture.',
};
}
context.drawImage(mediaElement, 0, 0, width, height);
const dataUrl = canvasElement.toDataURL('image/png');
return {
captured: true,
width,
height,
dataBase64: dataUrl.slice(dataUrl.indexOf(',') + 1),
};
} catch (error) {
return {
captured: false,
hasMediaElement: true,
mediaHasFrame: true,
error: error instanceof Error ? error.message : String(error),
};
}
}, captureSelector);
if (mediaResult.captured) {
return {
timestamp: Date.now(),
width: mediaResult.width ?? 0,
height: mediaResult.height ?? 0,
mimeType: 'image/png',
dataBase64: mediaResult.dataBase64 ?? '',
};
}
if (mediaResult.error?.startsWith('KVM capture selector missing')) {
throw new Error(mediaResult.error);
}
if (mediaResult.hasMediaElement && mediaResult.mediaHasFrame === false) {
throw new Error(`KVM media element has no frame: ${captureSelector}`);
}
const boundingBox = await captureElement.boundingBox();
if (!boundingBox || boundingBox.width <= 0 || boundingBox.height <= 0) {
throw new Error(`KVM capture selector has no visible frame: ${captureSelector}`);
}
const screenshot = await captureElement.screenshot({
type: 'png',
});
return {
timestamp: Date.now(),
width: Math.round(boundingBox.width),
height: Math.round(boundingBox.height),
mimeType: 'image/png',
dataBase64: Buffer.from(screenshot).toString('base64'),
};
}
public async typeText(text: string, options?: IKvmTypeTextOptions): Promise<void> {
const page = this.requirePage();
await this.focusViewer();
await page.keyboard.type(text, {
delay: options?.delayMs,
});
}
public async pressKey(key: TKvmKey): Promise<void> {
const page = this.requirePage();
await this.focusViewer();
await page.keyboard.press(key as plugins.puppeteer.KeyInput);
}
public async pressShortcut(keys: TKvmKey[]): Promise<void> {
const page = this.requirePage();
await this.focusViewer();
const pressedKeys: TKvmKey[] = [];
try {
for (const key of keys) {
await page.keyboard.down(key as plugins.puppeteer.KeyInput);
pressedKeys.push(key);
}
} finally {
for (const key of pressedKeys.reverse()) {
await page.keyboard.up(key as plugins.puppeteer.KeyInput);
}
}
}
public async wait(milliseconds: number): Promise<void> {
await new Promise<void>((resolve) => setTimeout(resolve, milliseconds));
}
private async tryGenericLogin(): Promise<void> {
if (!this.options.username || !this.options.password) {
return;
}
const page = this.requirePage();
const usernameElement = await this.findFirstElement([
'input[name="username"]',
'input[autocomplete="username"]',
'input[type="email"]',
'input[type="text"]',
]);
const passwordElement = await this.findFirstElement([
'input[name="password"]',
'input[autocomplete="current-password"]',
'input[type="password"]',
]);
if (!usernameElement || !passwordElement) {
return;
}
await usernameElement.click({ clickCount: 3 });
await usernameElement.type(this.options.username);
await passwordElement.click({ clickCount: 3 });
await passwordElement.type(this.options.password);
await passwordElement.press('Enter');
await Promise.race([
page
.waitForNavigation({
waitUntil: 'domcontentloaded',
timeout: Math.min(this.options.timeoutMs ?? 30000, 10000),
})
.catch(() => undefined),
this.wait(1000),
]);
}
private async waitForViewerReady(): Promise<void> {
const page = this.requirePage();
const viewerSelector = this.getViewerSelector();
await page.waitForFunction(
(selector: string) => {
const rootElement = document.querySelector(selector);
if (!rootElement) {
return false;
}
const mediaElement = (rootElement.matches('video, canvas')
? rootElement
: rootElement.querySelector('video, canvas')) as HTMLVideoElement | HTMLCanvasElement | null;
if (mediaElement instanceof HTMLVideoElement) {
return mediaElement.videoWidth > 0 && mediaElement.videoHeight > 0;
}
if (mediaElement instanceof HTMLCanvasElement) {
return mediaElement.width > 0 && mediaElement.height > 0;
}
const boundingRect = rootElement.getBoundingClientRect();
return boundingRect.width > 0 && boundingRect.height > 0;
},
{
timeout: this.options.timeoutMs ?? 30000,
},
viewerSelector
);
}
private async findFirstElement(
selectors: string[]
): Promise<plugins.puppeteer.ElementHandle<Element> | null> {
const page = this.requirePage();
for (const selector of selectors) {
const element = await page.$(selector);
if (element) {
return element;
}
}
return null;
}
private getViewerSelector(): string {
return this.options.viewerSelector ?? defaultViewerSelector;
}
private getCaptureSelector(): string {
return this.options.captureSelector ?? this.options.viewerSelector ?? defaultViewerSelector;
}
private requirePage(): plugins.puppeteer.Page {
if (!this.page) {
throw new Error('SmartBrowserKvm is not connected. Call connect() first.');
}
return this.page;
}
}
+103
View File
@@ -0,0 +1,103 @@
import { createWrappedKvmCommand, parseWrappedKvmCommandOutput } from './smartkvm.commandwrappers.js';
import type {
IKvmTerminalCommandResult,
IKvmTerminalOptions,
} from './smartkvm.interfaces.js';
export class SmartKvmTerminal {
private options: IKvmTerminalOptions;
constructor(options: IKvmTerminalOptions) {
this.options = options;
}
public async bootstrap(): Promise<void> {
const osHint = this.options.osHint ?? 'unknown';
switch (osHint) {
case 'windows':
await this.options.kvm.pressShortcut(['Meta', 'R']);
await this.options.kvm.wait(500);
await this.options.kvm.typeText('powershell -NoLogo');
await this.options.kvm.pressKey('Enter');
await this.options.kvm.wait(1500);
break;
case 'macos':
await this.options.kvm.pressShortcut(['Meta', 'Space']);
await this.options.kvm.wait(500);
await this.options.kvm.typeText('Terminal');
await this.options.kvm.pressKey('Enter');
await this.options.kvm.wait(1500);
break;
case 'linux':
await this.options.kvm.pressShortcut(['Control', 'Alt', 'T']);
await this.options.kvm.wait(1500);
break;
case 'unknown':
default:
break;
}
}
public async runCommand(command: string): Promise<IKvmTerminalCommandResult> {
const wrappedCommand = createWrappedKvmCommand(command, this.options.shellHint ?? 'unknown');
const commandTimeoutMs = this.options.commandTimeoutMs ?? 30000;
const ocrPollIntervalMs = this.options.ocrPollIntervalMs ?? 500;
const startedAt = Date.now();
let rawOcrText = '';
let attempts = 0;
await this.options.kvm.focusViewer();
await this.options.kvm.typeText(wrappedCommand.textToType);
await this.options.kvm.pressKey('Enter');
while (true) {
attempts++;
rawOcrText = await this.observeText();
const parsedResult = parseWrappedKvmCommandOutput({
commandId: wrappedCommand.commandId,
startMarker: wrappedCommand.startMarker,
endMarkerPrefix: wrappedCommand.endMarkerPrefix,
rawText: rawOcrText,
});
if (parsedResult.completed) {
return {
commandId: wrappedCommand.commandId,
command,
completed: true,
timedOut: false,
exitCode: parsedResult.exitCode,
combinedText: parsedResult.combinedText,
rawOcrText,
};
}
const reachedMaxAttempts =
typeof this.options.ocrMaxAttempts === 'number' && attempts >= this.options.ocrMaxAttempts;
const reachedTimeout = Date.now() - startedAt >= commandTimeoutMs;
if (reachedMaxAttempts || reachedTimeout) {
return {
commandId: wrappedCommand.commandId,
command,
completed: false,
timedOut: true,
combinedText: rawOcrText,
rawOcrText,
};
}
const remainingTimeMs = commandTimeoutMs - (Date.now() - startedAt);
await this.options.kvm.wait(Math.min(ocrPollIntervalMs, Math.max(remainingTimeMs, 0)));
}
}
public async observeText(): Promise<string> {
const frame = await this.options.kvm.captureFrame();
const result = await this.options.ocrEngine.recognize(frame, {
crop: this.options.ocrCrop,
language: 'eng',
});
return result.text;
}
}
+83
View File
@@ -0,0 +1,83 @@
import * as plugins from './plugins.js';
import type { IWrappedKvmCommand, TKvmShellHint } from './smartkvm.interfaces.js';
export interface IParseWrappedKvmCommandOutputOptions {
commandId: string;
startMarker: string;
endMarkerPrefix: string;
rawText: string;
}
export interface IParseWrappedKvmCommandOutputResult {
completed: boolean;
exitCode?: number;
combinedText: string;
}
export const createWrappedKvmCommand = (
command: string,
shellHint: TKvmShellHint
): IWrappedKvmCommand => {
const commandId = plugins.crypto.randomUUID().replace(/-/g, '');
const startMarker = `SMARTKVM_START_${commandId}`;
const endMarkerPrefix = `SMARTKVM_END_${commandId}_`;
let textToType: string;
switch (shellHint) {
case 'powershell':
textToType = `$__smartkvm_id = "${commandId}"; Write-Output "${startMarker}"; $__smartkvm_status = 0; try { ${command}; if ($null -ne $LASTEXITCODE) { $__smartkvm_status = $LASTEXITCODE } } catch { Write-Output $_; $__smartkvm_status = 1 }; Write-Output "${endMarkerPrefix}$__smartkvm_status"`;
break;
case 'cmd':
textToType = `echo ${startMarker} & ${command} & echo ${endMarkerPrefix}%ERRORLEVEL%`;
break;
case 'bash':
case 'zsh':
case 'sh':
case 'unknown':
default:
textToType = `printf '\n${startMarker}\n'; ${command}; __smartkvm_status=$?; printf '\n${endMarkerPrefix}%s\n' "$__smartkvm_status"`;
break;
}
return {
commandId,
shellHint,
command,
textToType,
startMarker,
endMarkerPrefix,
};
};
export const parseWrappedKvmCommandOutput = (
options: IParseWrappedKvmCommandOutputOptions
): IParseWrappedKvmCommandOutputResult => {
const normalizedRawText = options.rawText.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
const startIndex = normalizedRawText.indexOf(options.startMarker);
if (startIndex === -1) {
return {
completed: false,
combinedText: normalizedRawText,
};
}
const contentStartIndex = startIndex + options.startMarker.length;
const endIndex = normalizedRawText.indexOf(options.endMarkerPrefix, contentStartIndex);
if (endIndex === -1) {
return {
completed: false,
combinedText: normalizedRawText,
};
}
const endMarkerValueStartIndex = endIndex + options.endMarkerPrefix.length;
const endMarkerLine = normalizedRawText.slice(endMarkerValueStartIndex).split('\n')[0].trim();
const exitCodeMatch = endMarkerLine.match(/^-?\d+/);
const exitCode = exitCodeMatch ? Number.parseInt(exitCodeMatch[0], 10) : undefined;
return {
completed: true,
exitCode,
combinedText: normalizedRawText.slice(contentStartIndex, endIndex).trim(),
};
};
+152
View File
@@ -0,0 +1,152 @@
export type TKvmKind = 'jetkvm' | 'glinet' | 'pikvm' | 'tinypilot' | 'generic';
export type TKvmFrameMimeType = 'image/png' | 'image/jpeg';
export type TKvmOsHint = 'windows' | 'macos' | 'linux' | 'unknown';
export type TKvmShellHint = 'powershell' | 'cmd' | 'bash' | 'zsh' | 'sh' | 'unknown';
export type TKvmKey =
| 'Enter'
| 'Escape'
| 'Tab'
| 'Backspace'
| 'Delete'
| 'ArrowUp'
| 'ArrowDown'
| 'ArrowLeft'
| 'ArrowRight'
| 'Home'
| 'End'
| 'PageUp'
| 'PageDown'
| 'Space'
| 'Meta'
| 'Control'
| 'Alt'
| 'Shift'
| 'F1'
| 'F2'
| 'F3'
| 'F4'
| 'F5'
| 'F6'
| 'F7'
| 'F8'
| 'F9'
| 'F10'
| 'F11'
| 'F12'
| string;
export interface IKvmFrame {
timestamp: number;
width: number;
height: number;
mimeType: TKvmFrameMimeType;
dataBase64: string;
}
export interface IBrowserKvmOptions {
url: string;
kind?: TKvmKind;
username?: string;
password?: string;
headless?: boolean;
/**
* Main element that should receive keyboard focus.
* Usually video, canvas, or a wrapper around the KVM viewer.
*/
viewerSelector?: string;
/**
* Element used for frame capture.
* Defaults to video, then canvas, then viewer screenshot fallback.
*/
captureSelector?: string;
/**
* Useful for self-signed KVM certificates.
*/
ignoreHttpsErrors?: boolean;
/**
* Persist browser session cookies/login state.
*/
userDataDir?: string;
/**
* Optional browser executable path.
*/
executablePath?: string;
/**
* Optional timeout for initial load and viewer detection.
*/
timeoutMs?: number;
}
export interface IKvmTypeTextOptions {
delayMs?: number;
}
export interface IKvmDriver {
readonly kind: TKvmKind;
connect: () => Promise<void>;
disconnect: () => Promise<void>;
focusViewer: () => Promise<void>;
captureFrame: () => Promise<IKvmFrame>;
typeText: (text: string, options?: IKvmTypeTextOptions) => Promise<void>;
pressKey: (key: TKvmKey) => Promise<void>;
pressShortcut: (keys: TKvmKey[]) => Promise<void>;
wait: (milliseconds: number) => Promise<void>;
}
export interface IOcrCrop {
x: number;
y: number;
width: number;
height: number;
}
export interface IOcrRecognizeOptions {
crop?: IOcrCrop;
language?: string;
}
export interface IOcrResult {
text: string;
confidence?: number;
}
export interface IOcrEngine {
recognize: (frame: IKvmFrame, options?: IOcrRecognizeOptions) => Promise<IOcrResult>;
}
export interface IKvmTerminalOptions {
kvm: IKvmDriver;
ocrEngine: IOcrEngine;
osHint?: TKvmOsHint;
shellHint?: TKvmShellHint;
commandTimeoutMs?: number;
ocrPollIntervalMs?: number;
ocrMaxAttempts?: number;
/**
* Optional OCR crop to limit recognition to terminal area.
*/
ocrCrop?: IOcrCrop;
}
export interface IKvmTerminalCommandResult {
commandId: string;
command: string;
completed: boolean;
timedOut: boolean;
exitCode?: number;
combinedText: string;
rawOcrText: string;
}
export interface IWrappedKvmCommand {
commandId: string;
shellHint: TKvmShellHint;
command: string;
textToType: string;
startMarker: string;
endMarkerPrefix: string;
}
+58
View File
@@ -0,0 +1,58 @@
import { SmartKvmTerminal } from './smartkvm.classes.kvmterminal.js';
import type { IKvmTerminalCommandResult } from './smartkvm.interfaces.js';
export interface ISmartKvmTool<TInput = unknown, TOutput = unknown> {
name: string;
description: string;
parameters: unknown;
execute: (input: TInput) => Promise<TOutput>;
}
export interface ISmartKvmToolOptions {
terminal: SmartKvmTerminal;
}
export interface ISmartKvmTerminalRunCommandInput {
command: string;
}
export type TSmartKvmTerminalObserveInput = Record<string, never>;
export const createSmartKvmTools = (options: ISmartKvmToolOptions): ISmartKvmTool[] => {
return [
{
name: 'kvm_terminal_run_command',
description: 'Run a command through the visual KVM terminal transport and return OCR-parsed output.',
parameters: {
type: 'object',
properties: {
command: {
type: 'string',
description: 'The terminal command to run.',
},
},
required: ['command'],
additionalProperties: false,
},
execute: async (input: unknown): Promise<IKvmTerminalCommandResult> => {
const typedInput = input as Partial<ISmartKvmTerminalRunCommandInput>;
if (!typedInput || typeof typedInput.command !== 'string') {
throw new Error('kvm_terminal_run_command requires a string command.');
}
return options.terminal.runCommand(typedInput.command);
},
},
{
name: 'kvm_terminal_observe',
description: 'Observe the current visual KVM terminal text through the configured OCR engine.',
parameters: {
type: 'object',
properties: {},
additionalProperties: false,
},
execute: async (): Promise<string> => {
return options.terminal.observeText();
},
},
];
};