Initialize smartkvm package

This commit is contained in:
2026-05-16 13:41:55 +00:00
commit 8588c6c70d
18 changed files with 8751 additions and 0 deletions
+23
View File
@@ -0,0 +1,23 @@
.nogit/
# artifacts
coverage/
public/
# installs
node_modules/
# caches
.yarn/
.cache/
.rpt2_cache
# builds
dist/
dist_*/
# AI
.claude/
.serena/
#------# custom
+32
View File
@@ -0,0 +1,32 @@
{
"@git.zone/cli": {
"projectType": "npm",
"module": {
"githost": "code.foss.global",
"gitscope": "push.rocks",
"gitrepo": "smartkvm",
"description": "Programmable browser-based visual KVM automation with frame capture and keyboard transport.",
"npmPackagename": "@push.rocks/smartkvm",
"license": "MIT",
"projectDomain": "push.rocks"
},
"release": {
"targets": {
"npm": {
"registries": [
"https://verdaccio.lossless.digital",
"https://registry.npmjs.org"
],
"accessLevel": "public"
}
}
},
"schemaVersion": 2
},
"@git.zone/tsdoc": {
"legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n"
},
"@ship.zone/szci": {
"npmGlobalTools": []
}
}
+19
View File
@@ -0,0 +1,19 @@
Copyright (c) 2026 Task Venture Capital GmbH <hello@task.vc>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+32
View File
@@ -0,0 +1,32 @@
{
"@git.zone/cli": {
"projectType": "npm",
"module": {
"githost": "code.foss.global",
"gitscope": "push.rocks",
"gitrepo": "smartkvm",
"description": "Programmable browser-based visual KVM automation with frame capture and keyboard transport.",
"npmPackagename": "@push.rocks/smartkvm",
"license": "MIT",
"projectDomain": "push.rocks"
},
"release": {
"targets": {
"npm": {
"registries": [
"https://verdaccio.lossless.digital",
"https://registry.npmjs.org"
],
"accessLevel": "public"
}
}
},
"schemaVersion": 2
},
"@git.zone/tsdoc": {
"legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n"
},
"@ship.zone/szci": {
"npmGlobalTools": []
}
}
+66
View File
@@ -0,0 +1,66 @@
{
"name": "@push.rocks/smartkvm",
"version": "1.0.0",
"private": false,
"description": "Programmable browser-based visual KVM automation with frame capture and keyboard transport.",
"main": "dist_ts/index.js",
"typings": "dist_ts/index.d.ts",
"type": "module",
"exports": {
".": {
"import": "./dist_ts/index.js",
"types": "./dist_ts/index.d.ts"
}
},
"author": "Task Venture Capital GmbH <hello@task.vc>",
"license": "MIT",
"scripts": {
"test": "tstest test/ --verbose --logfile --timeout 120",
"format": "gitzone format",
"build": "tsbuild --web",
"buildDocs": "tsdoc"
},
"devDependencies": {
"@git.zone/tsbuild": "^4.4.0",
"@git.zone/tsrun": "^2.0.3",
"@git.zone/tstest": "^3.6.3",
"@types/node": "^25.6.0"
},
"dependencies": {
"puppeteer": "^24.42.0"
},
"packageManager": "pnpm@10.28.2",
"files": [
"ts/**/*",
"ts_web/**/*",
"dist/**/*",
"dist_*/**/*",
"dist_ts/**/*",
"dist_ts_web/**/*",
"assets/**/*",
".smartconfig.json",
"license",
"npmextra.json",
"readme.md"
],
"browserslist": [
"last 1 chrome versions"
],
"keywords": [
"kvm",
"browser automation",
"puppeteer",
"remote control",
"visual automation",
"keyboard automation",
"frame capture"
],
"homepage": "https://code.foss.global/push.rocks/smartkvm#readme",
"repository": {
"type": "git",
"url": "https://code.foss.global/push.rocks/smartkvm.git"
},
"bugs": {
"url": "https://code.foss.global/push.rocks/smartkvm/issues"
}
}
+7556
View File
File diff suppressed because it is too large Load Diff
+67
View File
@@ -0,0 +1,67 @@
# @push.rocks/smartkvm
Programmable browser-based visual KVM automation with frame capture and keyboard transport.
## Install
```sh
pnpm add @push.rocks/smartkvm
```
## Usage
```typescript
import {
SmartBrowserKvm,
SmartKvmTerminal,
type IOcrEngine,
} from '@push.rocks/smartkvm';
const ocrEngine: IOcrEngine = {
async recognize(frame) {
// Plug in an OCR implementation here.
return {
text: '',
confidence: 0,
};
},
};
const kvm = new SmartBrowserKvm({
url: 'https://jetkvm.local',
kind: 'jetkvm',
username: 'admin',
password: 'admin',
headless: false,
ignoreHttpsErrors: true,
});
await kvm.connect();
const terminal = new SmartKvmTerminal({
kvm,
ocrEngine,
osHint: 'linux',
shellHint: 'bash',
});
await terminal.bootstrap();
const result = await terminal.runCommand('uname -a');
console.log(result);
await kvm.disconnect();
```
## Scope
This package is transport-focused and AI-agnostic. It automates browser-based visual KVM devices by opening the KVM web UI, focusing the viewer, capturing frames, and sending keyboard input. OCR and AI/model integrations are intentionally pluggable and external.
## Manual Browser Test
Automated tests do not require real KVM hardware. To run the manual browser smoke test, set:
```sh
SMARTKVM_TEST_URL=https://your-kvm.local pnpm test
```
Optional variables are `SMARTKVM_TEST_USERNAME`, `SMARTKVM_TEST_PASSWORD`, and `SMARTKVM_TEST_HEADLESS=false`.
+29
View File
@@ -0,0 +1,29 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import { SmartBrowserKvm } from '../ts/smartkvm.classes.browserkvm.js';
tap.test('SmartBrowserKvm manual smoke test', async () => {
if (!process.env.SMARTKVM_TEST_URL) {
expect(true).toBeTrue();
return;
}
const kvm = new SmartBrowserKvm({
url: process.env.SMARTKVM_TEST_URL,
username: process.env.SMARTKVM_TEST_USERNAME,
password: process.env.SMARTKVM_TEST_PASSWORD,
headless: process.env.SMARTKVM_TEST_HEADLESS !== 'false',
ignoreHttpsErrors: true,
timeoutMs: 30000,
});
try {
await kvm.connect();
const frame = await kvm.captureFrame();
expect(frame.mimeType).toEqual('image/png');
expect(frame.dataBase64).toBeTypeofString();
} finally {
await kvm.disconnect();
}
});
export default tap.start();
+59
View File
@@ -0,0 +1,59 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import {
createWrappedKvmCommand,
parseWrappedKvmCommandOutput,
} from '../ts/smartkvm.commandwrappers.js';
tap.test('bash wrapper should contain OCR-friendly markers', async () => {
const wrappedCommand = createWrappedKvmCommand('echo hello', 'bash');
expect(wrappedCommand.commandId).toMatch(/^[a-f0-9]{32}$/);
expect(wrappedCommand.startMarker).toEqual(`SMARTKVM_START_${wrappedCommand.commandId}`);
expect(wrappedCommand.endMarkerPrefix).toEqual(`SMARTKVM_END_${wrappedCommand.commandId}_`);
expect(wrappedCommand.textToType).toInclude(wrappedCommand.startMarker);
expect(wrappedCommand.textToType).toInclude(wrappedCommand.endMarkerPrefix);
expect(wrappedCommand.textToType).toInclude('echo hello');
});
tap.test('powershell wrapper should contain valid markers', async () => {
const wrappedCommand = createWrappedKvmCommand('Get-Location', 'powershell');
expect(wrappedCommand.textToType).toInclude(wrappedCommand.startMarker);
expect(wrappedCommand.textToType).toInclude(wrappedCommand.endMarkerPrefix);
expect(wrappedCommand.textToType).toInclude('try { Get-Location;');
});
tap.test('cmd wrapper should contain valid markers', async () => {
const wrappedCommand = createWrappedKvmCommand('dir', 'cmd');
expect(wrappedCommand.textToType).toInclude(`echo ${wrappedCommand.startMarker}`);
expect(wrappedCommand.textToType).toInclude(`echo ${wrappedCommand.endMarkerPrefix}%ERRORLEVEL%`);
});
tap.test('parser should extract command output and exit code', async () => {
const parsedResult = parseWrappedKvmCommandOutput({
commandId: 'abc123',
startMarker: 'SMARTKVM_START_abc123',
endMarkerPrefix: 'SMARTKVM_END_abc123_',
rawText: `something before
SMARTKVM_START_abc123
hello world
SMARTKVM_END_abc123_0
prompt after`,
});
expect(parsedResult.completed).toBeTrue();
expect(parsedResult.exitCode).toEqual(0);
expect(parsedResult.combinedText).toEqual('hello world');
});
tap.test('parser should return incomplete result if markers are missing', async () => {
const parsedResult = parseWrappedKvmCommandOutput({
commandId: 'abc123',
startMarker: 'SMARTKVM_START_abc123',
endMarkerPrefix: 'SMARTKVM_END_abc123_',
rawText: 'hello world',
});
expect(parsedResult.completed).toBeFalse();
expect(parsedResult.combinedText).toEqual('hello world');
});
export default tap.start();
+109
View File
@@ -0,0 +1,109 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import { SmartKvmTerminal } from '../ts/smartkvm.classes.kvmterminal.js';
import type {
IKvmDriver,
IKvmFrame,
IOcrEngine,
IOcrRecognizeOptions,
TKvmKey,
TKvmKind,
} from '../ts/smartkvm.interfaces.js';
class MockKvmDriver implements IKvmDriver {
public readonly kind: TKvmKind = 'generic';
public typedTexts: string[] = [];
public pressedKeys: TKvmKey[] = [];
public pressedShortcuts: TKvmKey[][] = [];
public focusViewerCalls = 0;
public async connect(): Promise<void> {}
public async disconnect(): Promise<void> {}
public async focusViewer(): Promise<void> {
this.focusViewerCalls++;
}
public async captureFrame(): Promise<IKvmFrame> {
return {
timestamp: Date.now(),
width: 1,
height: 1,
mimeType: 'image/png',
dataBase64: 'iVBORw0KGgo=',
};
}
public async typeText(text: string): Promise<void> {
this.typedTexts.push(text);
}
public async pressKey(key: TKvmKey): Promise<void> {
this.pressedKeys.push(key);
}
public async pressShortcut(keys: TKvmKey[]): Promise<void> {
this.pressedShortcuts.push(keys);
}
public async wait(): Promise<void> {}
}
tap.test('SmartKvmTerminal should run wrapped commands through mock KVM and OCR', async () => {
const mockKvm = new MockKvmDriver();
let recognizeCalls = 0;
const mockOcrEngine: IOcrEngine = {
recognize: async (_frame: IKvmFrame, options?: IOcrRecognizeOptions) => {
recognizeCalls++;
expect(options?.language).toEqual('eng');
const typedText = mockKvm.typedTexts.join('\n');
const commandId = typedText.match(/SMARTKVM_START_([a-f0-9]+)/)?.[1] ?? 'missing';
if (recognizeCalls === 1) {
return {
text: `SMARTKVM_START_${commandId}\npartial output`,
};
}
return {
text: `prompt\nSMARTKVM_START_${commandId}\nhello terminal\nSMARTKVM_END_${commandId}_0\nprompt`,
};
},
};
const terminal = new SmartKvmTerminal({
kvm: mockKvm,
ocrEngine: mockOcrEngine,
shellHint: 'bash',
commandTimeoutMs: 2000,
ocrPollIntervalMs: 1,
});
const result = await terminal.runCommand('echo hello');
expect(mockKvm.focusViewerCalls).toBeGreaterThan(0);
expect(mockKvm.typedTexts[0]).toInclude('echo hello');
expect(mockKvm.typedTexts[0]).toInclude('SMARTKVM_START_');
expect(mockKvm.pressedKeys).toContain('Enter');
expect(recognizeCalls).toEqual(2);
expect(result.completed).toBeTrue();
expect(result.timedOut).toBeFalse();
expect(result.exitCode).toEqual(0);
expect(result.combinedText).toEqual('hello terminal');
});
tap.test('SmartKvmTerminal bootstrap should use generic Linux shortcut', async () => {
const mockKvm = new MockKvmDriver();
const terminal = new SmartKvmTerminal({
kvm: mockKvm,
ocrEngine: {
recognize: async () => ({ text: '' }),
},
osHint: 'linux',
});
await terminal.bootstrap();
expect(mockKvm.pressedShortcuts).toEqual([['Control', 'Alt', 'T']]);
});
export default tap.start();
+5
View File
@@ -0,0 +1,5 @@
export * from './smartkvm.interfaces.js';
export * from './smartkvm.classes.browserkvm.js';
export * from './smartkvm.classes.kvmterminal.js';
export * from './smartkvm.commandwrappers.js';
export * from './smartkvm.tools.smartagent.js';
+9
View File
@@ -0,0 +1,9 @@
// node native scope
import * as crypto from 'node:crypto';
export { crypto };
// third party scope
import puppeteer from 'puppeteer';
export { puppeteer };
+335
View File
@@ -0,0 +1,335 @@
import * as plugins from './plugins.js';
import type {
IBrowserKvmOptions,
IKvmDriver,
IKvmFrame,
IKvmTypeTextOptions,
TKvmKey,
TKvmKind,
} from './smartkvm.interfaces.js';
const defaultViewerSelector = 'video, canvas';
interface ICaptureMediaResult {
captured: boolean;
width?: number;
height?: number;
dataBase64?: string;
error?: string;
hasMediaElement?: boolean;
mediaHasFrame?: boolean;
}
export class SmartBrowserKvm implements IKvmDriver {
public readonly kind: TKvmKind;
private options: IBrowserKvmOptions;
private browser?: plugins.puppeteer.Browser;
private page?: plugins.puppeteer.Page;
constructor(options: IBrowserKvmOptions) {
this.options = options;
this.kind = options.kind ?? 'generic';
}
public async connect(): Promise<void> {
if (this.browser && this.page) {
return;
}
const timeoutMs = this.options.timeoutMs ?? 30000;
const args: string[] = [];
if (process.env.CI || process.getuid?.() === 0) {
args.push('--no-sandbox', '--disable-setuid-sandbox');
}
this.browser = await plugins.puppeteer.launch({
args,
acceptInsecureCerts: this.options.ignoreHttpsErrors ?? false,
defaultViewport: null,
executablePath: this.options.executablePath,
headless: this.options.headless ?? true,
timeout: timeoutMs,
userDataDir: this.options.userDataDir,
});
this.page = await this.browser.newPage();
this.page.setDefaultTimeout(timeoutMs);
this.page.setDefaultNavigationTimeout(timeoutMs);
await this.page.goto(this.options.url, {
waitUntil: 'domcontentloaded',
timeout: timeoutMs,
});
await this.tryGenericLogin();
await this.waitForViewerReady();
await this.focusViewer();
}
public async disconnect(): Promise<void> {
const browser = this.browser;
this.page = undefined;
this.browser = undefined;
if (browser) {
await browser.close();
}
}
public async focusViewer(): Promise<void> {
const page = this.requirePage();
const viewerSelector = this.getViewerSelector();
const viewerElement = await page.$(viewerSelector);
if (!viewerElement) {
throw new Error(`KVM viewer selector missing: ${viewerSelector}`);
}
await viewerElement.click();
}
public async captureFrame(): Promise<IKvmFrame> {
const page = this.requirePage();
const captureSelector = this.getCaptureSelector();
const captureElement = await page.$(captureSelector);
if (!captureElement) {
throw new Error(`KVM capture selector missing: ${captureSelector}`);
}
const mediaResult = await page.evaluate((selector: string): ICaptureMediaResult => {
const rootElement = document.querySelector(selector);
if (!rootElement) {
return {
captured: false,
error: `KVM capture selector missing: ${selector}`,
};
}
const mediaElement = (rootElement.matches('video, canvas')
? rootElement
: rootElement.querySelector('video, canvas')) as HTMLVideoElement | HTMLCanvasElement | null;
if (!mediaElement) {
return {
captured: false,
hasMediaElement: false,
};
}
let width = 0;
let height = 0;
if (mediaElement instanceof HTMLVideoElement) {
width = mediaElement.videoWidth;
height = mediaElement.videoHeight;
} else if (mediaElement instanceof HTMLCanvasElement) {
width = mediaElement.width;
height = mediaElement.height;
}
if (width <= 0 || height <= 0) {
return {
captured: false,
hasMediaElement: true,
mediaHasFrame: false,
};
}
try {
const canvasElement = document.createElement('canvas');
canvasElement.width = width;
canvasElement.height = height;
const context = canvasElement.getContext('2d');
if (!context) {
return {
captured: false,
hasMediaElement: true,
mediaHasFrame: true,
error: 'Could not create canvas 2D context for KVM frame capture.',
};
}
context.drawImage(mediaElement, 0, 0, width, height);
const dataUrl = canvasElement.toDataURL('image/png');
return {
captured: true,
width,
height,
dataBase64: dataUrl.slice(dataUrl.indexOf(',') + 1),
};
} catch (error) {
return {
captured: false,
hasMediaElement: true,
mediaHasFrame: true,
error: error instanceof Error ? error.message : String(error),
};
}
}, captureSelector);
if (mediaResult.captured) {
return {
timestamp: Date.now(),
width: mediaResult.width ?? 0,
height: mediaResult.height ?? 0,
mimeType: 'image/png',
dataBase64: mediaResult.dataBase64 ?? '',
};
}
if (mediaResult.error?.startsWith('KVM capture selector missing')) {
throw new Error(mediaResult.error);
}
if (mediaResult.hasMediaElement && mediaResult.mediaHasFrame === false) {
throw new Error(`KVM media element has no frame: ${captureSelector}`);
}
const boundingBox = await captureElement.boundingBox();
if (!boundingBox || boundingBox.width <= 0 || boundingBox.height <= 0) {
throw new Error(`KVM capture selector has no visible frame: ${captureSelector}`);
}
const screenshot = await captureElement.screenshot({
type: 'png',
});
return {
timestamp: Date.now(),
width: Math.round(boundingBox.width),
height: Math.round(boundingBox.height),
mimeType: 'image/png',
dataBase64: Buffer.from(screenshot).toString('base64'),
};
}
public async typeText(text: string, options?: IKvmTypeTextOptions): Promise<void> {
const page = this.requirePage();
await this.focusViewer();
await page.keyboard.type(text, {
delay: options?.delayMs,
});
}
public async pressKey(key: TKvmKey): Promise<void> {
const page = this.requirePage();
await this.focusViewer();
await page.keyboard.press(key as plugins.puppeteer.KeyInput);
}
public async pressShortcut(keys: TKvmKey[]): Promise<void> {
const page = this.requirePage();
await this.focusViewer();
const pressedKeys: TKvmKey[] = [];
try {
for (const key of keys) {
await page.keyboard.down(key as plugins.puppeteer.KeyInput);
pressedKeys.push(key);
}
} finally {
for (const key of pressedKeys.reverse()) {
await page.keyboard.up(key as plugins.puppeteer.KeyInput);
}
}
}
public async wait(milliseconds: number): Promise<void> {
await new Promise<void>((resolve) => setTimeout(resolve, milliseconds));
}
private async tryGenericLogin(): Promise<void> {
if (!this.options.username || !this.options.password) {
return;
}
const page = this.requirePage();
const usernameElement = await this.findFirstElement([
'input[name="username"]',
'input[autocomplete="username"]',
'input[type="email"]',
'input[type="text"]',
]);
const passwordElement = await this.findFirstElement([
'input[name="password"]',
'input[autocomplete="current-password"]',
'input[type="password"]',
]);
if (!usernameElement || !passwordElement) {
return;
}
await usernameElement.click({ clickCount: 3 });
await usernameElement.type(this.options.username);
await passwordElement.click({ clickCount: 3 });
await passwordElement.type(this.options.password);
await passwordElement.press('Enter');
await Promise.race([
page
.waitForNavigation({
waitUntil: 'domcontentloaded',
timeout: Math.min(this.options.timeoutMs ?? 30000, 10000),
})
.catch(() => undefined),
this.wait(1000),
]);
}
private async waitForViewerReady(): Promise<void> {
const page = this.requirePage();
const viewerSelector = this.getViewerSelector();
await page.waitForFunction(
(selector: string) => {
const rootElement = document.querySelector(selector);
if (!rootElement) {
return false;
}
const mediaElement = (rootElement.matches('video, canvas')
? rootElement
: rootElement.querySelector('video, canvas')) as HTMLVideoElement | HTMLCanvasElement | null;
if (mediaElement instanceof HTMLVideoElement) {
return mediaElement.videoWidth > 0 && mediaElement.videoHeight > 0;
}
if (mediaElement instanceof HTMLCanvasElement) {
return mediaElement.width > 0 && mediaElement.height > 0;
}
const boundingRect = rootElement.getBoundingClientRect();
return boundingRect.width > 0 && boundingRect.height > 0;
},
{
timeout: this.options.timeoutMs ?? 30000,
},
viewerSelector
);
}
private async findFirstElement(
selectors: string[]
): Promise<plugins.puppeteer.ElementHandle<Element> | null> {
const page = this.requirePage();
for (const selector of selectors) {
const element = await page.$(selector);
if (element) {
return element;
}
}
return null;
}
private getViewerSelector(): string {
return this.options.viewerSelector ?? defaultViewerSelector;
}
private getCaptureSelector(): string {
return this.options.captureSelector ?? this.options.viewerSelector ?? defaultViewerSelector;
}
private requirePage(): plugins.puppeteer.Page {
if (!this.page) {
throw new Error('SmartBrowserKvm is not connected. Call connect() first.');
}
return this.page;
}
}
+103
View File
@@ -0,0 +1,103 @@
import { createWrappedKvmCommand, parseWrappedKvmCommandOutput } from './smartkvm.commandwrappers.js';
import type {
IKvmTerminalCommandResult,
IKvmTerminalOptions,
} from './smartkvm.interfaces.js';
export class SmartKvmTerminal {
private options: IKvmTerminalOptions;
constructor(options: IKvmTerminalOptions) {
this.options = options;
}
public async bootstrap(): Promise<void> {
const osHint = this.options.osHint ?? 'unknown';
switch (osHint) {
case 'windows':
await this.options.kvm.pressShortcut(['Meta', 'R']);
await this.options.kvm.wait(500);
await this.options.kvm.typeText('powershell -NoLogo');
await this.options.kvm.pressKey('Enter');
await this.options.kvm.wait(1500);
break;
case 'macos':
await this.options.kvm.pressShortcut(['Meta', 'Space']);
await this.options.kvm.wait(500);
await this.options.kvm.typeText('Terminal');
await this.options.kvm.pressKey('Enter');
await this.options.kvm.wait(1500);
break;
case 'linux':
await this.options.kvm.pressShortcut(['Control', 'Alt', 'T']);
await this.options.kvm.wait(1500);
break;
case 'unknown':
default:
break;
}
}
public async runCommand(command: string): Promise<IKvmTerminalCommandResult> {
const wrappedCommand = createWrappedKvmCommand(command, this.options.shellHint ?? 'unknown');
const commandTimeoutMs = this.options.commandTimeoutMs ?? 30000;
const ocrPollIntervalMs = this.options.ocrPollIntervalMs ?? 500;
const startedAt = Date.now();
let rawOcrText = '';
let attempts = 0;
await this.options.kvm.focusViewer();
await this.options.kvm.typeText(wrappedCommand.textToType);
await this.options.kvm.pressKey('Enter');
while (true) {
attempts++;
rawOcrText = await this.observeText();
const parsedResult = parseWrappedKvmCommandOutput({
commandId: wrappedCommand.commandId,
startMarker: wrappedCommand.startMarker,
endMarkerPrefix: wrappedCommand.endMarkerPrefix,
rawText: rawOcrText,
});
if (parsedResult.completed) {
return {
commandId: wrappedCommand.commandId,
command,
completed: true,
timedOut: false,
exitCode: parsedResult.exitCode,
combinedText: parsedResult.combinedText,
rawOcrText,
};
}
const reachedMaxAttempts =
typeof this.options.ocrMaxAttempts === 'number' && attempts >= this.options.ocrMaxAttempts;
const reachedTimeout = Date.now() - startedAt >= commandTimeoutMs;
if (reachedMaxAttempts || reachedTimeout) {
return {
commandId: wrappedCommand.commandId,
command,
completed: false,
timedOut: true,
combinedText: rawOcrText,
rawOcrText,
};
}
const remainingTimeMs = commandTimeoutMs - (Date.now() - startedAt);
await this.options.kvm.wait(Math.min(ocrPollIntervalMs, Math.max(remainingTimeMs, 0)));
}
}
public async observeText(): Promise<string> {
const frame = await this.options.kvm.captureFrame();
const result = await this.options.ocrEngine.recognize(frame, {
crop: this.options.ocrCrop,
language: 'eng',
});
return result.text;
}
}
+83
View File
@@ -0,0 +1,83 @@
import * as plugins from './plugins.js';
import type { IWrappedKvmCommand, TKvmShellHint } from './smartkvm.interfaces.js';
export interface IParseWrappedKvmCommandOutputOptions {
commandId: string;
startMarker: string;
endMarkerPrefix: string;
rawText: string;
}
export interface IParseWrappedKvmCommandOutputResult {
completed: boolean;
exitCode?: number;
combinedText: string;
}
export const createWrappedKvmCommand = (
command: string,
shellHint: TKvmShellHint
): IWrappedKvmCommand => {
const commandId = plugins.crypto.randomUUID().replace(/-/g, '');
const startMarker = `SMARTKVM_START_${commandId}`;
const endMarkerPrefix = `SMARTKVM_END_${commandId}_`;
let textToType: string;
switch (shellHint) {
case 'powershell':
textToType = `$__smartkvm_id = "${commandId}"; Write-Output "${startMarker}"; $__smartkvm_status = 0; try { ${command}; if ($null -ne $LASTEXITCODE) { $__smartkvm_status = $LASTEXITCODE } } catch { Write-Output $_; $__smartkvm_status = 1 }; Write-Output "${endMarkerPrefix}$__smartkvm_status"`;
break;
case 'cmd':
textToType = `echo ${startMarker} & ${command} & echo ${endMarkerPrefix}%ERRORLEVEL%`;
break;
case 'bash':
case 'zsh':
case 'sh':
case 'unknown':
default:
textToType = `printf '\n${startMarker}\n'; ${command}; __smartkvm_status=$?; printf '\n${endMarkerPrefix}%s\n' "$__smartkvm_status"`;
break;
}
return {
commandId,
shellHint,
command,
textToType,
startMarker,
endMarkerPrefix,
};
};
export const parseWrappedKvmCommandOutput = (
options: IParseWrappedKvmCommandOutputOptions
): IParseWrappedKvmCommandOutputResult => {
const normalizedRawText = options.rawText.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
const startIndex = normalizedRawText.indexOf(options.startMarker);
if (startIndex === -1) {
return {
completed: false,
combinedText: normalizedRawText,
};
}
const contentStartIndex = startIndex + options.startMarker.length;
const endIndex = normalizedRawText.indexOf(options.endMarkerPrefix, contentStartIndex);
if (endIndex === -1) {
return {
completed: false,
combinedText: normalizedRawText,
};
}
const endMarkerValueStartIndex = endIndex + options.endMarkerPrefix.length;
const endMarkerLine = normalizedRawText.slice(endMarkerValueStartIndex).split('\n')[0].trim();
const exitCodeMatch = endMarkerLine.match(/^-?\d+/);
const exitCode = exitCodeMatch ? Number.parseInt(exitCodeMatch[0], 10) : undefined;
return {
completed: true,
exitCode,
combinedText: normalizedRawText.slice(contentStartIndex, endIndex).trim(),
};
};
+152
View File
@@ -0,0 +1,152 @@
export type TKvmKind = 'jetkvm' | 'glinet' | 'pikvm' | 'tinypilot' | 'generic';
export type TKvmFrameMimeType = 'image/png' | 'image/jpeg';
export type TKvmOsHint = 'windows' | 'macos' | 'linux' | 'unknown';
export type TKvmShellHint = 'powershell' | 'cmd' | 'bash' | 'zsh' | 'sh' | 'unknown';
export type TKvmKey =
| 'Enter'
| 'Escape'
| 'Tab'
| 'Backspace'
| 'Delete'
| 'ArrowUp'
| 'ArrowDown'
| 'ArrowLeft'
| 'ArrowRight'
| 'Home'
| 'End'
| 'PageUp'
| 'PageDown'
| 'Space'
| 'Meta'
| 'Control'
| 'Alt'
| 'Shift'
| 'F1'
| 'F2'
| 'F3'
| 'F4'
| 'F5'
| 'F6'
| 'F7'
| 'F8'
| 'F9'
| 'F10'
| 'F11'
| 'F12'
| string;
export interface IKvmFrame {
timestamp: number;
width: number;
height: number;
mimeType: TKvmFrameMimeType;
dataBase64: string;
}
export interface IBrowserKvmOptions {
url: string;
kind?: TKvmKind;
username?: string;
password?: string;
headless?: boolean;
/**
* Main element that should receive keyboard focus.
* Usually video, canvas, or a wrapper around the KVM viewer.
*/
viewerSelector?: string;
/**
* Element used for frame capture.
* Defaults to video, then canvas, then viewer screenshot fallback.
*/
captureSelector?: string;
/**
* Useful for self-signed KVM certificates.
*/
ignoreHttpsErrors?: boolean;
/**
* Persist browser session cookies/login state.
*/
userDataDir?: string;
/**
* Optional browser executable path.
*/
executablePath?: string;
/**
* Optional timeout for initial load and viewer detection.
*/
timeoutMs?: number;
}
export interface IKvmTypeTextOptions {
delayMs?: number;
}
export interface IKvmDriver {
readonly kind: TKvmKind;
connect: () => Promise<void>;
disconnect: () => Promise<void>;
focusViewer: () => Promise<void>;
captureFrame: () => Promise<IKvmFrame>;
typeText: (text: string, options?: IKvmTypeTextOptions) => Promise<void>;
pressKey: (key: TKvmKey) => Promise<void>;
pressShortcut: (keys: TKvmKey[]) => Promise<void>;
wait: (milliseconds: number) => Promise<void>;
}
export interface IOcrCrop {
x: number;
y: number;
width: number;
height: number;
}
export interface IOcrRecognizeOptions {
crop?: IOcrCrop;
language?: string;
}
export interface IOcrResult {
text: string;
confidence?: number;
}
export interface IOcrEngine {
recognize: (frame: IKvmFrame, options?: IOcrRecognizeOptions) => Promise<IOcrResult>;
}
export interface IKvmTerminalOptions {
kvm: IKvmDriver;
ocrEngine: IOcrEngine;
osHint?: TKvmOsHint;
shellHint?: TKvmShellHint;
commandTimeoutMs?: number;
ocrPollIntervalMs?: number;
ocrMaxAttempts?: number;
/**
* Optional OCR crop to limit recognition to terminal area.
*/
ocrCrop?: IOcrCrop;
}
export interface IKvmTerminalCommandResult {
commandId: string;
command: string;
completed: boolean;
timedOut: boolean;
exitCode?: number;
combinedText: string;
rawOcrText: string;
}
export interface IWrappedKvmCommand {
commandId: string;
shellHint: TKvmShellHint;
command: string;
textToType: string;
startMarker: string;
endMarkerPrefix: string;
}
+58
View File
@@ -0,0 +1,58 @@
import { SmartKvmTerminal } from './smartkvm.classes.kvmterminal.js';
import type { IKvmTerminalCommandResult } from './smartkvm.interfaces.js';
export interface ISmartKvmTool<TInput = unknown, TOutput = unknown> {
name: string;
description: string;
parameters: unknown;
execute: (input: TInput) => Promise<TOutput>;
}
export interface ISmartKvmToolOptions {
terminal: SmartKvmTerminal;
}
export interface ISmartKvmTerminalRunCommandInput {
command: string;
}
export type TSmartKvmTerminalObserveInput = Record<string, never>;
export const createSmartKvmTools = (options: ISmartKvmToolOptions): ISmartKvmTool[] => {
return [
{
name: 'kvm_terminal_run_command',
description: 'Run a command through the visual KVM terminal transport and return OCR-parsed output.',
parameters: {
type: 'object',
properties: {
command: {
type: 'string',
description: 'The terminal command to run.',
},
},
required: ['command'],
additionalProperties: false,
},
execute: async (input: unknown): Promise<IKvmTerminalCommandResult> => {
const typedInput = input as Partial<ISmartKvmTerminalRunCommandInput>;
if (!typedInput || typeof typedInput.command !== 'string') {
throw new Error('kvm_terminal_run_command requires a string command.');
}
return options.terminal.runCommand(typedInput.command);
},
},
{
name: 'kvm_terminal_observe',
description: 'Observe the current visual KVM terminal text through the configured OCR engine.',
parameters: {
type: 'object',
properties: {},
additionalProperties: false,
},
execute: async (): Promise<string> => {
return options.terminal.observeText();
},
},
];
};
+14
View File
@@ -0,0 +1,14 @@
{
"compilerOptions": {
"experimentalDecorators": true,
"useDefineForClassFields": false,
"target": "ES2022",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"noImplicitAny": true,
"esModuleInterop": true,
"verbatimModuleSyntax": true,
"types": ["node"]
},
"exclude": ["dist_*/**/*.d.ts"]
}