From daaf6559e320abdadc4499d16c2c7456df0b39c4 Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Fri, 30 Jan 2026 03:16:57 +0000 Subject: [PATCH] initial --- .gitea/release-template.md | 37 ++ .gitea/workflows/README.md | 179 +++++++++ .gitea/workflows/ci.yml | 84 +++++ .gitea/workflows/npm-publish.yml | 129 +++++++ .gitea/workflows/release.yml | 249 ++++++++++++ .gitignore | 25 ++ .npmignore | 54 +++ .vscode/settings.json | 3 + bin/modelgrid-wrapper.js | 109 ++++++ changelog.md | 66 ++++ deno.json | 41 ++ docs/example-action.sh | 53 +++ install.sh | 286 ++++++++++++++ license | 21 ++ mod.ts | 44 +++ npmextra.json | 20 + package.json | 66 ++++ readme.hints.md | 156 ++++++++ readme.md | 296 +++++++++++++++ readme.plan.md | 202 ++++++++++ scripts/compile-all.sh | 66 ++++ scripts/install-binary.js | 238 ++++++++++++ test/test.logger.ts | 157 ++++++++ test/test.showcase.ts | 216 +++++++++++ test/test.ts | 323 ++++++++++++++++ ts/00_commitinfo_data.ts | 8 + ts/api/handlers/chat.ts | 150 ++++++++ ts/api/handlers/embeddings.ts | 235 ++++++++++++ ts/api/handlers/index.ts | 9 + ts/api/handlers/models.ts | 136 +++++++ ts/api/index.ts | 10 + ts/api/middleware/auth.ts | 105 ++++++ ts/api/middleware/index.ts | 7 + ts/api/middleware/sanity.ts | 254 +++++++++++++ ts/api/router.ts | 300 +++++++++++++++ ts/api/server.ts | 300 +++++++++++++++ ts/cli.ts | 423 +++++++++++++++++++++ ts/cli/config-handler.ts | 314 ++++++++++++++++ ts/cli/container-handler.ts | 317 ++++++++++++++++ ts/cli/gpu-handler.ts | 255 +++++++++++++ ts/cli/model-handler.ts | 202 ++++++++++ ts/cli/service-handler.ts | 252 +++++++++++++ ts/colors.ts | 157 ++++++++ ts/constants.ts | 175 +++++++++ ts/containers/base-container.ts | 216 +++++++++++ ts/containers/container-manager.ts | 349 +++++++++++++++++ ts/containers/index.ts | 11 + ts/containers/ollama.ts | 387 +++++++++++++++++++ ts/containers/tgi.ts | 417 ++++++++++++++++++++ ts/containers/vllm.ts | 272 +++++++++++++ ts/daemon.ts | 268 +++++++++++++ ts/docker/container-runtime.ts | 558 +++++++++++++++++++++++++++ ts/docker/docker-manager.ts | 509 +++++++++++++++++++++++++ ts/docker/index.ts | 8 + ts/drivers/amd.ts | 281 ++++++++++++++ ts/drivers/base-driver.ts | 217 +++++++++++ ts/drivers/driver-manager.ts | 267 +++++++++++++ ts/drivers/index.ts | 11 + ts/drivers/intel.ts | 339 +++++++++++++++++ ts/drivers/nvidia.ts | 318 ++++++++++++++++ ts/hardware/gpu-detector.ts | 565 ++++++++++++++++++++++++++++ ts/hardware/index.ts | 8 + ts/hardware/system-info.ts | 233 ++++++++++++ ts/helpers/index.ts | 2 + ts/helpers/prompt.ts | 55 +++ ts/helpers/shortid.ts | 22 ++ ts/index.ts | 40 ++ ts/interfaces/api.ts | 329 ++++++++++++++++ ts/interfaces/config.ts | 121 ++++++ ts/interfaces/container.ts | 176 +++++++++ ts/interfaces/gpu.ts | 132 +++++++ ts/interfaces/index.ts | 11 + ts/interfaces/modelgrid-accessor.ts | 31 ++ ts/logger.ts | 334 ++++++++++++++++ ts/modelgrid.ts | 260 +++++++++++++ ts/models/index.ts | 8 + ts/models/loader.ts | 291 ++++++++++++++ ts/models/registry.ts | 252 +++++++++++++ ts/systemd.ts | 283 ++++++++++++++ uninstall.sh | 120 ++++++ 80 files changed, 14430 insertions(+) create mode 100644 .gitea/release-template.md create mode 100644 .gitea/workflows/README.md create mode 100644 .gitea/workflows/ci.yml create mode 100644 .gitea/workflows/npm-publish.yml create mode 100644 .gitea/workflows/release.yml create mode 100644 .gitignore create mode 100644 .npmignore create mode 100644 .vscode/settings.json create mode 100644 bin/modelgrid-wrapper.js create mode 100644 changelog.md create mode 100644 deno.json create mode 100644 docs/example-action.sh create mode 100644 install.sh create mode 100644 license create mode 100644 mod.ts create mode 100644 npmextra.json create mode 100644 package.json create mode 100644 readme.hints.md create mode 100644 readme.md create mode 100644 readme.plan.md create mode 100755 scripts/compile-all.sh create mode 100644 scripts/install-binary.js create mode 100644 test/test.logger.ts create mode 100644 test/test.showcase.ts create mode 100644 test/test.ts create mode 100644 ts/00_commitinfo_data.ts create mode 100644 ts/api/handlers/chat.ts create mode 100644 ts/api/handlers/embeddings.ts create mode 100644 ts/api/handlers/index.ts create mode 100644 ts/api/handlers/models.ts create mode 100644 ts/api/index.ts create mode 100644 ts/api/middleware/auth.ts create mode 100644 ts/api/middleware/index.ts create mode 100644 ts/api/middleware/sanity.ts create mode 100644 ts/api/router.ts create mode 100644 ts/api/server.ts create mode 100644 ts/cli.ts create mode 100644 ts/cli/config-handler.ts create mode 100644 ts/cli/container-handler.ts create mode 100644 ts/cli/gpu-handler.ts create mode 100644 ts/cli/model-handler.ts create mode 100644 ts/cli/service-handler.ts create mode 100644 ts/colors.ts create mode 100644 ts/constants.ts create mode 100644 ts/containers/base-container.ts create mode 100644 ts/containers/container-manager.ts create mode 100644 ts/containers/index.ts create mode 100644 ts/containers/ollama.ts create mode 100644 ts/containers/tgi.ts create mode 100644 ts/containers/vllm.ts create mode 100644 ts/daemon.ts create mode 100644 ts/docker/container-runtime.ts create mode 100644 ts/docker/docker-manager.ts create mode 100644 ts/docker/index.ts create mode 100644 ts/drivers/amd.ts create mode 100644 ts/drivers/base-driver.ts create mode 100644 ts/drivers/driver-manager.ts create mode 100644 ts/drivers/index.ts create mode 100644 ts/drivers/intel.ts create mode 100644 ts/drivers/nvidia.ts create mode 100644 ts/hardware/gpu-detector.ts create mode 100644 ts/hardware/index.ts create mode 100644 ts/hardware/system-info.ts create mode 100644 ts/helpers/index.ts create mode 100644 ts/helpers/prompt.ts create mode 100644 ts/helpers/shortid.ts create mode 100644 ts/index.ts create mode 100644 ts/interfaces/api.ts create mode 100644 ts/interfaces/config.ts create mode 100644 ts/interfaces/container.ts create mode 100644 ts/interfaces/gpu.ts create mode 100644 ts/interfaces/index.ts create mode 100644 ts/interfaces/modelgrid-accessor.ts create mode 100644 ts/logger.ts create mode 100644 ts/modelgrid.ts create mode 100644 ts/models/index.ts create mode 100644 ts/models/loader.ts create mode 100644 ts/models/registry.ts create mode 100644 ts/systemd.ts create mode 100644 uninstall.sh diff --git a/.gitea/release-template.md b/.gitea/release-template.md new file mode 100644 index 0000000..1607661 --- /dev/null +++ b/.gitea/release-template.md @@ -0,0 +1,37 @@ +## ModelGrid {{VERSION}} + +Pre-compiled binaries for multiple platforms. + +### Installation + +#### Option 1: Via npm (recommended) + +```bash +npm install -g @modelgrid.com/modelgrid +``` + +#### Option 2: Via installer script + +```bash +curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash +``` + +#### Option 3: Direct binary download + +Download the appropriate binary for your platform from the assets below and make it executable. + +### Supported Platforms + +- Linux x86_64 (x64) +- Linux ARM64 (aarch64) +- macOS x86_64 (Intel) +- macOS ARM64 (Apple Silicon) +- Windows x86_64 + +### Checksums + +SHA256 checksums are provided in `SHA256SUMS.txt` for binary verification. + +### npm Package + +The npm package includes automatic binary detection and installation for your platform. diff --git a/.gitea/workflows/README.md b/.gitea/workflows/README.md new file mode 100644 index 0000000..23cf220 --- /dev/null +++ b/.gitea/workflows/README.md @@ -0,0 +1,179 @@ +# Gitea Actions Workflows + +This directory contains automated workflows for ModelGrid's CI/CD pipeline. + +## Workflows + +### CI Workflow (`ci.yml`) + +**Triggers:** + +- Push to `main` branch +- Push to `migration/**` branches +- Pull requests to `main` + +**Jobs:** + +1. **Type Check & Lint** + - Runs `deno check` for TypeScript validation + - Runs `deno lint` (continues on error) + - Runs `deno fmt --check` (continues on error) + +2. **Build Test (Current Platform)** + - Compiles for Linux x86_64 (host platform) + - Tests binary execution (`--version` and `help`) + +3. **Build All Platforms** (Main/Tags only) + - Compiles all 5 platform binaries + - Uploads as artifacts (30-day retention) + - Only runs on `main` branch or tags + +### Release Workflow (`release.yml`) + +**Triggers:** + +- Push tags matching `v*` (e.g., `v1.0.0`) + +**Jobs:** + +1. **Version Verification** + - Extracts version from tag + - Verifies `deno.json` version matches tag + - Fails if mismatch detected + +2. **Compilation** + - Compiles binaries for all 5 platforms: + - `modelgrid-linux-x64` (Linux x86_64) + - `modelgrid-linux-arm64` (Linux ARM64) + - `modelgrid-macos-x64` (macOS Intel) + - `modelgrid-macos-arm64` (macOS Apple Silicon) + - `modelgrid-windows-x64.exe` (Windows x64) + +3. **Checksums** + - Generates SHA256 checksums for all binaries + - Creates `SHA256SUMS.txt` + +4. **Release Creation** + - Creates Gitea release with tag + - Extracts release notes from CHANGELOG.md (if exists) + - Uploads all binaries + checksums as release assets + +## Creating a Release + +### Prerequisites + +1. Update version in `deno.json`: + ```json + { + "version": "1.0.0" + } + ``` + +2. Update `CHANGELOG.md` with release notes (optional but recommended) + +3. Commit all changes: + ```bash + git add . + git commit -m "chore: bump version to 1.0.0" + ``` + +### Release Process + +1. Create and push a tag matching the version: + ```bash + git tag v1.0.0 + git push origin v1.0.0 + ``` + +2. Gitea Actions will automatically: + - Verify version consistency + - Compile all platform binaries + - Generate checksums + - Create release with binaries attached + +3. Monitor the workflow: + - Go to: `https://code.foss.global/modelgrid.com/modelgrid/actions` + - Check the "Release" workflow run + +### Manual Release (Fallback) + +If workflows fail, you can create a release manually: + +```bash +# Compile all binaries +bash scripts/compile-all.sh + +# Generate checksums +cd dist/binaries +sha256sum * > SHA256SUMS.txt +cd ../.. + +# Create release on Gitea UI +# Upload binaries manually +``` + +## Troubleshooting + +### Version Mismatch Error + +If the release workflow fails with "Version mismatch": + +- Ensure `deno.json` version matches the git tag +- Example: tag `v1.0.0` requires `"version": "1.0.0"` in deno.json + +### Compilation Errors + +If compilation fails: + +1. Test locally: `bash scripts/compile-all.sh` +2. Check Deno version compatibility +3. Review TypeScript errors: `deno check mod.ts` + +### Upload Failures + +If binary upload fails: + +1. Check Gitea Actions permissions +2. Verify `GITHUB_TOKEN` secret exists (auto-provided by Gitea) +3. Try manual release creation + +## Workflow Secrets + +The workflows use the following secrets: + +- `GITHUB_TOKEN` - Auto-provided by Gitea Actions (no setup needed) + +## Development + +### Testing Workflows Locally + +You can test compilation locally: + +```bash +# Install Deno +curl -fsSL https://deno.land/install.sh | sh + +# Test type checking +deno check mod.ts + +# Test compilation +bash scripts/compile-all.sh + +# Test binary +./dist/binaries/modelgrid-linux-x64 --version +``` + +### Modifying Workflows + +After modifying workflows: + +1. Test syntax: Use a YAML validator +2. Commit changes: `git add .gitea/workflows/` +3. Push to feature branch first to test CI +4. Merge to main once verified + +## Links + +- Gitea Actions Documentation: https://docs.gitea.com/usage/actions/overview +- Deno Compile Documentation: https://docs.deno.com/runtime/manual/tools/compiler +- ModelGrid Repository: https://code.foss.global/modelgrid.com/modelgrid diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml new file mode 100644 index 0000000..e6ec013 --- /dev/null +++ b/.gitea/workflows/ci.yml @@ -0,0 +1,84 @@ +name: CI + +on: + push: + branches: + - main + - 'migration/**' + pull_request: + branches: + - main + +jobs: + check: + name: Type Check & Lint + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Deno + uses: denoland/setup-deno@v1 + with: + deno-version: v2.x + + - name: Check TypeScript types + run: deno check mod.ts + + - name: Lint code + run: deno lint + continue-on-error: true + + - name: Format check + run: deno fmt --check + continue-on-error: true + + build: + name: Build Test (Current Platform) + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Deno + uses: denoland/setup-deno@v1 + with: + deno-version: v2.x + + - name: Compile for current platform + run: | + echo "Testing compilation for Linux x86_64..." + deno compile --allow-all --no-check \ + --output modelgrid-test \ + --target x86_64-unknown-linux-gnu mod.ts + + - name: Test binary execution + run: | + chmod +x modelgrid-test + ./modelgrid-test --version + ./modelgrid-test help + + build-all: + name: Build All Platforms + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Deno + uses: denoland/setup-deno@v1 + with: + deno-version: v2.x + + - name: Compile all platform binaries + run: bash scripts/compile-all.sh + + - name: Upload all binaries as artifact + uses: actions/upload-artifact@v3 + with: + name: modelgrid-binaries.zip + path: dist/binaries/* + retention-days: 30 diff --git a/.gitea/workflows/npm-publish.yml b/.gitea/workflows/npm-publish.yml new file mode 100644 index 0000000..5eaa64c --- /dev/null +++ b/.gitea/workflows/npm-publish.yml @@ -0,0 +1,129 @@ +name: Publish to npm + +on: + push: + tags: + - 'v*' + +jobs: + npm-publish: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Deno + uses: denoland/setup-deno@v1 + with: + deno-version: v2.x + + - name: Setup Node.js for npm publishing + uses: actions/setup-node@v4 + with: + node-version: '18.x' + registry-url: 'https://registry.npmjs.org/' + + - name: Get version from tag + id: version + run: | + VERSION=${GITHUB_REF#refs/tags/} + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "version_number=${VERSION#v}" >> $GITHUB_OUTPUT + echo "Publishing version: $VERSION" + + - name: Verify deno.json version matches tag + run: | + DENO_VERSION=$(grep -o '"version": "[^"]*"' deno.json | cut -d'"' -f4) + TAG_VERSION="${{ steps.version.outputs.version_number }}" + echo "deno.json version: $DENO_VERSION" + echo "Tag version: $TAG_VERSION" + if [ "$DENO_VERSION" != "$TAG_VERSION" ]; then + echo "ERROR: Version mismatch!" + echo "deno.json has version $DENO_VERSION but tag is $TAG_VERSION" + exit 1 + fi + + - name: Compile binaries for npm package + run: | + echo "Compiling binaries for npm package..." + deno task compile + echo "" + echo "Binary sizes:" + ls -lh dist/binaries/ + + - name: Generate SHA256 checksums + run: | + cd dist/binaries + sha256sum * > SHA256SUMS + cat SHA256SUMS + cd ../.. + + - name: Sync package.json version + run: | + VERSION="${{ steps.version.outputs.version_number }}" + echo "Syncing package.json to version ${VERSION}..." + npm version ${VERSION} --no-git-tag-version --allow-same-version + echo "package.json version: $(grep '"version"' package.json | head -1)" + + - name: Create npm package + run: | + echo "Creating npm package..." + npm pack + echo "" + echo "Package created:" + ls -lh *.tgz + + - name: Test local installation + run: | + echo "Testing local package installation..." + PACKAGE_FILE=$(ls *.tgz) + npm install -g ${PACKAGE_FILE} + echo "" + echo "Testing modelgrid command:" + modelgrid --version || echo "Note: Binary execution may fail in CI environment" + echo "" + echo "Checking installed files:" + npm ls -g @modelgrid.com/modelgrid || true + + - name: Publish to npm + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + echo "Publishing to npm registry..." + npm publish --access public + echo "" + echo "✅ Successfully published @modelgrid.com/modelgrid to npm!" + echo "" + echo "Package info:" + npm view @modelgrid.com/modelgrid + + - name: Verify npm package + run: | + echo "Waiting for npm propagation..." + sleep 30 + echo "" + echo "Verifying published package..." + npm view @modelgrid.com/modelgrid + echo "" + echo "Testing installation from npm:" + npm install -g @modelgrid.com/modelgrid + echo "" + echo "Package installed successfully!" + which modelgrid || echo "Binary location check skipped" + + - name: Publish Summary + run: | + echo "================================================" + echo " npm Publish Complete!" + echo "================================================" + echo "" + echo "✅ Package: @modelgrid.com/modelgrid" + echo "✅ Version: ${{ steps.version.outputs.version }}" + echo "" + echo "Installation:" + echo " npm install -g @modelgrid.com/modelgrid" + echo "" + echo "Registry:" + echo " https://www.npmjs.com/package/@modelgrid.com/modelgrid" + echo "" diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml new file mode 100644 index 0000000..49f3fcc --- /dev/null +++ b/.gitea/workflows/release.yml @@ -0,0 +1,249 @@ +name: Release + +on: + push: + tags: + - 'v*' + +jobs: + build-and-release: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Deno + uses: denoland/setup-deno@v1 + with: + deno-version: v2.x + + - name: Get version from tag + id: version + run: | + VERSION=${GITHUB_REF#refs/tags/} + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "version_number=${VERSION#v}" >> $GITHUB_OUTPUT + echo "Building version: $VERSION" + + - name: Verify deno.json version matches tag + run: | + DENO_VERSION=$(grep -o '"version": "[^"]*"' deno.json | cut -d'"' -f4) + TAG_VERSION="${{ steps.version.outputs.version_number }}" + echo "deno.json version: $DENO_VERSION" + echo "Tag version: $TAG_VERSION" + if [ "$DENO_VERSION" != "$TAG_VERSION" ]; then + echo "ERROR: Version mismatch!" + echo "deno.json has version $DENO_VERSION but tag is $TAG_VERSION" + exit 1 + fi + + - name: Compile binaries for all platforms + run: | + echo "================================================" + echo " ModelGrid Release Compilation" + echo " Version: ${{ steps.version.outputs.version }}" + echo "================================================" + echo "" + + # Clean up old binaries and create fresh directory + rm -rf dist/binaries + mkdir -p dist/binaries + echo "→ Cleaned old binaries from dist/binaries" + echo "" + + # Linux x86_64 + echo "→ Compiling for Linux x86_64..." + deno compile --allow-all --no-check \ + --output dist/binaries/modelgrid-linux-x64 \ + --target x86_64-unknown-linux-gnu mod.ts + echo " ✓ Linux x86_64 complete" + + # Linux ARM64 + echo "→ Compiling for Linux ARM64..." + deno compile --allow-all --no-check \ + --output dist/binaries/modelgrid-linux-arm64 \ + --target aarch64-unknown-linux-gnu mod.ts + echo " ✓ Linux ARM64 complete" + + # macOS x86_64 + echo "→ Compiling for macOS x86_64..." + deno compile --allow-all --no-check \ + --output dist/binaries/modelgrid-macos-x64 \ + --target x86_64-apple-darwin mod.ts + echo " ✓ macOS x86_64 complete" + + # macOS ARM64 + echo "→ Compiling for macOS ARM64..." + deno compile --allow-all --no-check \ + --output dist/binaries/modelgrid-macos-arm64 \ + --target aarch64-apple-darwin mod.ts + echo " ✓ macOS ARM64 complete" + + # Windows x86_64 + echo "→ Compiling for Windows x86_64..." + deno compile --allow-all --no-check \ + --output dist/binaries/modelgrid-windows-x64.exe \ + --target x86_64-pc-windows-msvc mod.ts + echo " ✓ Windows x86_64 complete" + + echo "" + echo "All binaries compiled successfully!" + ls -lh dist/binaries/ + + - name: Generate SHA256 checksums + run: | + cd dist/binaries + sha256sum * > SHA256SUMS.txt + cat SHA256SUMS.txt + cd ../.. + + - name: Extract changelog for this version + id: changelog + run: | + VERSION="${{ steps.version.outputs.version }}" + + # Check if CHANGELOG.md exists + if [ ! -f CHANGELOG.md ]; then + echo "No CHANGELOG.md found, using default release notes" + cat > /tmp/release_notes.md << EOF + ## ModelGrid $VERSION + + Pre-compiled binaries for multiple platforms. + + ### Installation + + Use the installation script: + \`\`\`bash + curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash + \`\`\` + + Or download the binary for your platform and make it executable. + + ### Supported Platforms + - Linux x86_64 (x64) + - Linux ARM64 (aarch64) + - macOS x86_64 (Intel) + - macOS ARM64 (Apple Silicon) + - Windows x86_64 + + ### Checksums + SHA256 checksums are provided in SHA256SUMS.txt + EOF + else + # Try to extract section for this version from CHANGELOG.md + # This is a simple extraction - adjust based on your CHANGELOG format + awk "/## \[$VERSION\]/,/## \[/" CHANGELOG.md | sed '$d' > /tmp/release_notes.md || cat > /tmp/release_notes.md << EOF + ## ModelGrid $VERSION + + See CHANGELOG.md for full details. + + ### Installation + + Use the installation script: + \`\`\`bash + curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash + \`\`\` + EOF + fi + + echo "Release notes:" + cat /tmp/release_notes.md + + - name: Delete existing release if it exists + run: | + VERSION="${{ steps.version.outputs.version }}" + + echo "Checking for existing release $VERSION..." + + # Try to get existing release by tag + EXISTING_RELEASE_ID=$(curl -s \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/tags/$VERSION" \ + | jq -r '.id // empty') + + if [ -n "$EXISTING_RELEASE_ID" ]; then + echo "Found existing release (ID: $EXISTING_RELEASE_ID), deleting..." + curl -X DELETE -s \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/$EXISTING_RELEASE_ID" + echo "Existing release deleted" + sleep 2 + else + echo "No existing release found, proceeding with creation" + fi + + - name: Create Gitea Release + run: | + VERSION="${{ steps.version.outputs.version }}" + RELEASE_NOTES=$(cat /tmp/release_notes.md) + + # Create the release + echo "Creating release for $VERSION..." + RELEASE_ID=$(curl -X POST -s \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Content-Type: application/json" \ + "https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases" \ + -d "{ + \"tag_name\": \"$VERSION\", + \"name\": \"ModelGrid $VERSION\", + \"body\": $(jq -Rs . /tmp/release_notes.md), + \"draft\": false, + \"prerelease\": false + }" | jq -r '.id') + + echo "Release created with ID: $RELEASE_ID" + + # Upload binaries as release assets + for binary in dist/binaries/*; do + filename=$(basename "$binary") + echo "Uploading $filename..." + curl -X POST -s \ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Content-Type: application/octet-stream" \ + --data-binary "@$binary" \ + "https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/$RELEASE_ID/assets?name=$filename" + done + + echo "All assets uploaded successfully" + + - name: Clean up old releases + run: | + echo "Cleaning up old releases (keeping only last 3)..." + + # Fetch all releases sorted by creation date + RELEASES=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases" | \ + jq -r 'sort_by(.created_at) | reverse | .[3:] | .[].id') + + # Delete old releases + if [ -n "$RELEASES" ]; then + echo "Found releases to delete:" + for release_id in $RELEASES; do + echo " Deleting release ID: $release_id" + curl -X DELETE -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/$release_id" + done + echo "Old releases deleted successfully" + else + echo "No old releases to delete (less than 4 releases total)" + fi + echo "" + + - name: Release Summary + run: | + echo "================================================" + echo " Release ${{ steps.version.outputs.version }} Complete!" + echo "================================================" + echo "" + echo "Binaries published:" + ls -lh dist/binaries/ + echo "" + echo "Release URL:" + echo "https://code.foss.global/modelgrid.com/modelgrid/releases/tag/${{ steps.version.outputs.version }}" + echo "" + echo "Installation command:" + echo "curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash" + echo "" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f535268 --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +# Compiled Deno binaries (built by scripts/compile-all.sh) +dist/binaries/ + +# Deno cache and lock file +.deno/ +deno.lock + +# Legacy Node.js artifacts (v3.x and earlier - kept for safety) +node_modules/ +vendor/ +dist_ts/ +npm-debug.log* + +# Logs +*.log + +# Environment +.env + +# OS specific +.DS_Store +Thumbs.db + +# Development +.nogit/ diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..4b0f1eb --- /dev/null +++ b/.npmignore @@ -0,0 +1,54 @@ +# Source code (not needed for binary distribution) +/ts/ +/test/ +mod.ts +*.ts + +# Development files +.git/ +.gitea/ +.claude/ +.serena/ +.nogit/ +.github/ +deno.json +deno.lock +tsconfig.json + +# Scripts not needed for npm +/scripts/compile-all.sh +install.sh +uninstall.sh +example-action.sh + +# Documentation files not needed for npm package +readme.plan.md +readme.hints.md +npm-publish-instructions.md +docs/ + +# IDE and editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Keep only the install-binary.js in scripts/ +/scripts/* +!/scripts/install-binary.js + +# Exclude all dist directory (binaries will be downloaded during install) +/dist/ + +# Logs and temporary files +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Other +node_modules/ +.env +.env.* \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..4b9fb22 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "deno.enable": true +} \ No newline at end of file diff --git a/bin/modelgrid-wrapper.js b/bin/modelgrid-wrapper.js new file mode 100644 index 0000000..e23fe9a --- /dev/null +++ b/bin/modelgrid-wrapper.js @@ -0,0 +1,109 @@ +#!/usr/bin/env node + +/** + * ModelGrid npm wrapper + * This script executes the appropriate pre-compiled binary based on the current platform + */ + +import { spawn } from 'child_process'; +import { fileURLToPath } from 'url'; +import { dirname, join } from 'path'; +import { existsSync } from 'fs'; +import { arch, platform } from 'os'; +import process from "node:process"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +/** + * Get the binary name for the current platform + */ +function getBinaryName() { + const plat = platform(); + const architecture = arch(); + + // Map Node's platform/arch to our binary naming + const platformMap = { + 'darwin': 'macos', + 'linux': 'linux', + 'win32': 'windows', + }; + + const archMap = { + 'x64': 'x64', + 'arm64': 'arm64', + }; + + const mappedPlatform = platformMap[plat]; + const mappedArch = archMap[architecture]; + + if (!mappedPlatform || !mappedArch) { + console.error(`Error: Unsupported platform/architecture: ${plat}/${architecture}`); + console.error('Supported platforms: Linux, macOS, Windows'); + console.error('Supported architectures: x64, arm64'); + process.exit(1); + } + + // Construct binary name + let binaryName = `modelgrid-${mappedPlatform}-${mappedArch}`; + if (plat === 'win32') { + binaryName += '.exe'; + } + + return binaryName; +} + +/** + * Execute the binary + */ +function executeBinary() { + const binaryName = getBinaryName(); + const binaryPath = join(__dirname, '..', 'dist', 'binaries', binaryName); + + // Check if binary exists + if (!existsSync(binaryPath)) { + console.error(`Error: Binary not found at ${binaryPath}`); + console.error('This might happen if:'); + console.error('1. The postinstall script failed to run'); + console.error('2. The platform is not supported'); + console.error('3. The package was not installed correctly'); + console.error(''); + console.error('Try reinstalling the package:'); + console.error(' npm uninstall -g @modelgrid.com/modelgrid'); + console.error(' npm install -g @modelgrid.com/modelgrid'); + process.exit(1); + } + + // Spawn the binary with all arguments passed through + const child = spawn(binaryPath, process.argv.slice(2), { + stdio: 'inherit', + shell: false, + }); + + // Handle child process events + child.on('error', (err) => { + console.error(`Error executing modelgrid: ${err.message}`); + process.exit(1); + }); + + child.on('exit', (code, signal) => { + if (signal) { + process.kill(process.pid, signal); + } else { + process.exit(code || 0); + } + }); + + // Forward signals to child process + const signals = ['SIGINT', 'SIGTERM', 'SIGHUP']; + signals.forEach((signal) => { + process.on(signal, () => { + if (!child.killed) { + child.kill(signal); + } + }); + }); +} + +// Execute +executeBinary(); diff --git a/changelog.md b/changelog.md new file mode 100644 index 0000000..7498284 --- /dev/null +++ b/changelog.md @@ -0,0 +1,66 @@ +# Changelog + +## 2026-01-30 - 1.0.0 - Initial Release + +**ModelGrid v1.0.0 - GPU Infrastructure Management Daemon** + +ModelGrid is a root-level daemon that manages GPU infrastructure, Docker containers, and AI model serving with an OpenAI-compatible API interface. + +### Features + +**GPU Management:** +- Multi-vendor GPU detection (NVIDIA/CUDA, AMD/ROCm, Intel Arc/oneAPI) +- Automatic driver detection and installation guidance +- Real-time GPU status monitoring +- GPU assignment to containers + +**Container Management:** +- Support for Ollama, vLLM, and TGI (Text Generation Inference) containers +- Automatic container lifecycle management +- GPU passthrough configuration +- Health monitoring + +**OpenAI-Compatible API:** +- `/v1/chat/completions` - Chat completions with streaming support +- `/v1/models` - List available models +- `/v1/embeddings` - Text embeddings +- Bearer token authentication + +**Model Management:** +- Greenlit model system for controlled auto-pulling +- Automatic model loading on request +- VRAM requirement validation +- Model registry from remote configuration + +**System Integration:** +- Systemd service management +- Configuration at `/etc/modelgrid/config.json` +- Comprehensive CLI for all operations + +### Installation + +```bash +# Via npm (recommended) +npm install -g @modelgrid.com/modelgrid + +# Via installer script +curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash +``` + +### Supported Platforms + +- Linux x64 (x86_64) +- Linux ARM64 (aarch64) +- macOS Intel (x86_64) +- macOS Apple Silicon (ARM64) +- Windows x64 + +### CLI Commands + +``` +modelgrid service enable|disable|start|stop|status|logs +modelgrid gpu list|status|drivers +modelgrid container add|remove|list|start|stop +modelgrid model list|pull|remove +modelgrid config show|init +``` diff --git a/deno.json b/deno.json new file mode 100644 index 0000000..ed9e5cc --- /dev/null +++ b/deno.json @@ -0,0 +1,41 @@ +{ + "name": "@modelgrid.com/modelgrid", + "version": "1.0.0", + "exports": "./mod.ts", + "nodeModulesDir": "auto", + "tasks": { + "dev": "deno run --allow-all mod.ts", + "compile": "deno task compile:all", + "compile:all": "bash scripts/compile-all.sh", + "test": "deno test --allow-all test/", + "test:watch": "deno test --allow-all --watch test/", + "check": "deno check mod.ts", + "fmt": "deno fmt", + "lint": "deno lint" + }, + "lint": { + "rules": { + "tags": [ + "recommended" + ] + } + }, + "fmt": { + "useTabs": false, + "lineWidth": 100, + "indentWidth": 2, + "semiColons": true, + "singleQuote": true + }, + "compilerOptions": { + "lib": [ + "deno.window" + ], + "strict": true + }, + "imports": { + "@std/cli": "jsr:@std/cli@^1.0.0", + "@std/fmt": "jsr:@std/fmt@^1.0.0", + "@std/path": "jsr:@std/path@^1.0.0" + } +} diff --git a/docs/example-action.sh b/docs/example-action.sh new file mode 100644 index 0000000..62ee74d --- /dev/null +++ b/docs/example-action.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# ModelGrid Example Script +# This is a placeholder for custom automation scripts +# +# ModelGrid can execute custom scripts in response to various events +# such as model loading, container start/stop, GPU status changes, etc. + +# ============================================================================== +# ENVIRONMENT VARIABLES (set by ModelGrid when executing scripts) +# ============================================================================== +# MODELGRID_EVENT - Event type that triggered this script +# MODELGRID_CONTAINER_ID - Container ID (if applicable) +# MODELGRID_GPU_ID - GPU ID (if applicable) +# MODELGRID_MODEL_NAME - Model name (if applicable) +# MODELGRID_TIMESTAMP - Unix timestamp (milliseconds since epoch) + +# ============================================================================== +# EXAMPLE: Log the event +# ============================================================================== +LOG_FILE="/var/log/modelgrid-actions.log" + +echo "========================================" >> "$LOG_FILE" +echo "ModelGrid Action Triggered: $(date)" >> "$LOG_FILE" +echo "----------------------------------------" >> "$LOG_FILE" +echo "Event: ${MODELGRID_EVENT:-unknown}" >> "$LOG_FILE" +echo "Container: ${MODELGRID_CONTAINER_ID:-N/A}" >> "$LOG_FILE" +echo "GPU: ${MODELGRID_GPU_ID:-N/A}" >> "$LOG_FILE" +echo "Model: ${MODELGRID_MODEL_NAME:-N/A}" >> "$LOG_FILE" +echo "========================================" >> "$LOG_FILE" + +# ============================================================================== +# EXAMPLE: Send notification on model load +# ============================================================================== +# if [ "$MODELGRID_EVENT" = "model_loaded" ]; then +# echo "Model $MODELGRID_MODEL_NAME loaded successfully" | \ +# mail -s "ModelGrid: Model Loaded" admin@example.com +# fi + +# ============================================================================== +# EXAMPLE: Alert on GPU error +# ============================================================================== +# if [ "$MODELGRID_EVENT" = "gpu_error" ]; then +# curl -X POST https://monitoring.example.com/alert \ +# -H "Content-Type: application/json" \ +# -d "{ +# \"event\": \"gpu_error\", +# \"gpuId\": \"$MODELGRID_GPU_ID\", +# \"timestamp\": $MODELGRID_TIMESTAMP +# }" +# fi + +# Exit with success +exit 0 diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..c759a5f --- /dev/null +++ b/install.sh @@ -0,0 +1,286 @@ +#!/bin/bash + +# ModelGrid Installer Script +# Downloads and installs pre-compiled ModelGrid binary from Gitea releases +# +# Usage: +# Direct piped installation (recommended): +# curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash +# +# With version specification: +# curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash -s -- --version v1.0.0 +# +# Options: +# -h, --help Show this help message +# --version VERSION Install specific version (e.g., v1.0.0) +# --install-dir DIR Installation directory (default: /opt/modelgrid) + +set -e + +# Default values +SHOW_HELP=0 +SPECIFIED_VERSION="" +INSTALL_DIR="/opt/modelgrid" +GITEA_BASE_URL="https://code.foss.global" +GITEA_REPO="modelgrid.com/modelgrid" + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + SHOW_HELP=1 + shift + ;; + --version) + SPECIFIED_VERSION="$2" + shift 2 + ;; + --install-dir) + INSTALL_DIR="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + echo "Use -h or --help for usage information" + exit 1 + ;; + esac +done + +if [ $SHOW_HELP -eq 1 ]; then + echo "ModelGrid Installer Script" + echo "Downloads and installs pre-compiled ModelGrid binary" + echo "" + echo "Usage: $0 [options]" + echo "" + echo "Options:" + echo " -h, --help Show this help message" + echo " --version VERSION Install specific version (e.g., v1.0.0)" + echo " --install-dir DIR Installation directory (default: /opt/modelgrid)" + echo "" + echo "Examples:" + echo " # Install latest version" + echo " curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash" + echo "" + echo " # Install specific version" + echo " curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash -s -- --version v1.0.0" + exit 0 +fi + +# Check if running as root +if [ "$EUID" -ne 0 ]; then + echo "Please run as root (sudo bash install.sh or pipe to sudo bash)" + exit 1 +fi + +# Helper function to detect OS and architecture +detect_platform() { + local os=$(uname -s) + local arch=$(uname -m) + + # Map OS + case "$os" in + Linux) + os_name="linux" + ;; + Darwin) + os_name="macos" + ;; + MINGW*|MSYS*|CYGWIN*) + os_name="windows" + ;; + *) + echo "Error: Unsupported operating system: $os" + echo "Supported: Linux, macOS, Windows" + exit 1 + ;; + esac + + # Map architecture + case "$arch" in + x86_64|amd64) + arch_name="x64" + ;; + aarch64|arm64) + arch_name="arm64" + ;; + *) + echo "Error: Unsupported architecture: $arch" + echo "Supported: x86_64/amd64 (x64), aarch64/arm64 (arm64)" + exit 1 + ;; + esac + + # Construct binary name + if [ "$os_name" = "windows" ]; then + echo "modelgrid-${os_name}-${arch_name}.exe" + else + echo "modelgrid-${os_name}-${arch_name}" + fi +} + +# Get latest release version from Gitea API +get_latest_version() { + echo "Fetching latest release version from Gitea..." >&2 + + local api_url="${GITEA_BASE_URL}/api/v1/repos/${GITEA_REPO}/releases/latest" + local response=$(curl -sSL "$api_url" 2>/dev/null) + + if [ $? -ne 0 ] || [ -z "$response" ]; then + echo "Error: Failed to fetch latest release information from Gitea API" >&2 + echo "URL: $api_url" >&2 + exit 1 + fi + + # Extract tag_name from JSON response + local version=$(echo "$response" | grep -o '"tag_name":"[^"]*"' | cut -d'"' -f4) + + if [ -z "$version" ]; then + echo "Error: Could not determine latest version from API response" >&2 + exit 1 + fi + + echo "$version" +} + +# Main installation process +echo "================================================" +echo " ModelGrid Installation Script" +echo "================================================" +echo "" + +# Detect platform +BINARY_NAME=$(detect_platform) +echo "Detected platform: $BINARY_NAME" +echo "" + +# Determine version to install +if [ -n "$SPECIFIED_VERSION" ]; then + VERSION="$SPECIFIED_VERSION" + echo "Installing specified version: $VERSION" +else + VERSION=$(get_latest_version) + echo "Installing latest version: $VERSION" +fi +echo "" + +# Construct download URL +DOWNLOAD_URL="${GITEA_BASE_URL}/${GITEA_REPO}/releases/download/${VERSION}/${BINARY_NAME}" +echo "Download URL: $DOWNLOAD_URL" +echo "" + +# Check if service is running and stop it +SERVICE_WAS_RUNNING=0 +if systemctl is-enabled --quiet modelgrid 2>/dev/null || systemctl is-active --quiet modelgrid 2>/dev/null; then + SERVICE_WAS_RUNNING=1 + if systemctl is-active --quiet modelgrid 2>/dev/null; then + echo "Stopping ModelGrid service..." + systemctl stop modelgrid + fi +fi + +# Clean installation directory - ensure only binary exists +if [ -d "$INSTALL_DIR" ]; then + echo "Cleaning installation directory: $INSTALL_DIR" + rm -rf "$INSTALL_DIR" +fi + +# Create fresh installation directory +echo "Creating installation directory: $INSTALL_DIR" +mkdir -p "$INSTALL_DIR" + +# Download binary +echo "Downloading ModelGrid binary..." +TEMP_FILE="$INSTALL_DIR/modelgrid.download" +curl -sSL "$DOWNLOAD_URL" -o "$TEMP_FILE" + +if [ $? -ne 0 ]; then + echo "Error: Failed to download binary from $DOWNLOAD_URL" + echo "" + echo "Please check:" + echo " 1. Your internet connection" + echo " 2. The specified version exists: ${GITEA_BASE_URL}/${GITEA_REPO}/releases" + echo " 3. The platform binary is available for this release" + rm -f "$TEMP_FILE" + exit 1 +fi + +# Check if download was successful (file exists and not empty) +if [ ! -s "$TEMP_FILE" ]; then + echo "Error: Downloaded file is empty or does not exist" + rm -f "$TEMP_FILE" + exit 1 +fi + +# Move to final location +BINARY_PATH="$INSTALL_DIR/modelgrid" +mv "$TEMP_FILE" "$BINARY_PATH" + +if [ $? -ne 0 ] || [ ! -f "$BINARY_PATH" ]; then + echo "Error: Failed to move binary to $BINARY_PATH" + rm -f "$TEMP_FILE" 2>/dev/null + exit 1 +fi + +# Make executable +chmod +x "$BINARY_PATH" + +if [ $? -ne 0 ]; then + echo "Error: Failed to make binary executable" + exit 1 +fi + +echo "Binary installed successfully to: $BINARY_PATH" +echo "" + +# Check if /usr/local/bin is in PATH +if [[ ":$PATH:" == *":/usr/local/bin:"* ]]; then + BIN_DIR="/usr/local/bin" +else + BIN_DIR="/usr/bin" +fi + +# Create symlink for global access +ln -sf "$BINARY_PATH" "$BIN_DIR/modelgrid" +echo "Symlink created: $BIN_DIR/modelgrid -> $BINARY_PATH" + +echo "" + +# Restart service if it was running before update +if [ $SERVICE_WAS_RUNNING -eq 1 ]; then + echo "Restarting ModelGrid service..." + systemctl restart modelgrid + echo "Service restarted successfully." + echo "" +fi + +echo "================================================" +echo " ModelGrid Installation Complete!" +echo "================================================" +echo "" +echo "Installation details:" +echo " Binary location: $BINARY_PATH" +echo " Symlink location: $BIN_DIR/modelgrid" +echo " Version: $VERSION" +echo "" + +# Check if configuration exists +if [ -f "/etc/modelgrid/config.json" ]; then + echo "Configuration: /etc/modelgrid/config.json (preserved)" + echo "" + echo "Your existing configuration has been preserved." + if [ $SERVICE_WAS_RUNNING -eq 1 ]; then + echo "The service has been restarted with your current settings." + else + echo "Start the service with: sudo modelgrid service start" + fi +else + echo "Get started:" + echo " modelgrid --version" + echo " modelgrid help" + echo " modelgrid gpu list # Detect GPUs" + echo " modelgrid container add # Add a container" + echo " modelgrid config init # Initialize config" + echo " modelgrid service enable # Enable systemd service" +fi +echo "" diff --git a/license b/license new file mode 100644 index 0000000..7583a48 --- /dev/null +++ b/license @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Task Venture Capital GmbH + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/mod.ts b/mod.ts new file mode 100644 index 0000000..91f7a52 --- /dev/null +++ b/mod.ts @@ -0,0 +1,44 @@ +#!/usr/bin/env -S deno run --allow-all + +/** + * ModelGrid - GPU Infrastructure Management Daemon + * + * A root-level daemon that manages GPU infrastructure, Docker, and AI model containers + * (Ollama, vLLM, TGI) with an OpenAI-compatible API interface. + * + * Required Permissions: + * - --allow-net: HTTP server for OpenAI API, container communication + * - --allow-read: Read configuration files (/etc/modelgrid/config.json) + * - --allow-write: Write configuration files + * - --allow-run: Execute system commands (docker, nvidia-smi, systemctl) + * - --allow-sys: Access system information (hostname, OS details, GPU info) + * - --allow-env: Read environment variables + * + * @module + */ + +import { ModelGridCli } from './ts/cli.ts'; + +/** + * Main entry point for the ModelGrid application + * Parses command-line arguments and executes the requested command + */ +async function main(): Promise { + const cli = new ModelGridCli(); + + // Deno.args is already 0-indexed (unlike Node's process.argv which starts at index 2) + // We need to prepend placeholder args to match the existing CLI parser expectations + const args = ['deno', 'mod.ts', ...Deno.args]; + + await cli.parseAndExecute(args); +} + +// Execute main and handle errors +if (import.meta.main) { + try { + await main(); + } catch (error) { + console.error(`Error: ${error instanceof Error ? error.message : String(error)}`); + Deno.exit(1); + } +} diff --git a/npmextra.json b/npmextra.json new file mode 100644 index 0000000..ad187cc --- /dev/null +++ b/npmextra.json @@ -0,0 +1,20 @@ +{ + "@git.zone/cli": { + "release": { + "registries": [ + "https://verdaccio.lossless.digital" + ], + "accessLevel": "public" + }, + "projectType": "deno", + "module": { + "githost": "code.foss.global", + "gitscope": "modelgrid.com", + "gitrepo": "modelgrid", + "description": "GPU infrastructure management daemon with OpenAI-compatible API for AI model containers", + "npmPackagename": "@modelgrid.com/modelgrid", + "license": "MIT" + } + }, + "@ship.zone/szci": {} +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..cecdac9 --- /dev/null +++ b/package.json @@ -0,0 +1,66 @@ +{ + "name": "@modelgrid.com/modelgrid", + "version": "1.0.0", + "description": "ModelGrid - GPU infrastructure management daemon for AI model containers with OpenAI-compatible API", + "keywords": [ + "gpu", + "docker", + "ai", + "llm", + "ollama", + "vllm", + "openai", + "api", + "nvidia", + "cuda", + "amd", + "rocm", + "intel", + "inference", + "container" + ], + "homepage": "https://code.foss.global/modelgrid.com/modelgrid", + "bugs": { + "url": "https://code.foss.global/modelgrid.com/modelgrid/issues" + }, + "repository": { + "type": "git", + "url": "git+https://code.foss.global/modelgrid.com/modelgrid.git" + }, + "author": "ModelGrid", + "license": "MIT", + "type": "module", + "bin": { + "modelgrid": "./bin/modelgrid-wrapper.js" + }, + "scripts": { + "postinstall": "node scripts/install-binary.js", + "prepublishOnly": "echo 'Publishing ModelGrid binaries to npm...'", + "test": "echo 'Tests are run with Deno: deno task test'", + "build": "echo 'no build needed'" + }, + "files": [ + "bin/", + "scripts/install-binary.js", + "readme.md", + "license", + "changelog.md" + ], + "engines": { + "node": ">=18.0.0" + }, + "os": [ + "darwin", + "linux", + "win32" + ], + "cpu": [ + "x64", + "arm64" + ], + "publishConfig": { + "access": "public", + "registry": "https://registry.npmjs.org/" + }, + "packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34" +} diff --git a/readme.hints.md b/readme.hints.md new file mode 100644 index 0000000..1689777 --- /dev/null +++ b/readme.hints.md @@ -0,0 +1,156 @@ +# ModelGrid Project Hints + +## Project Overview + +ModelGrid is a root-level daemon that manages GPU infrastructure, Docker, and AI model containers (Ollama, vLLM, TGI) with an OpenAI-compatible API interface. + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ ModelGrid Daemon │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │ +│ │ CLI │ │ Hardware │ │ Container Manager │ │ +│ │ Commands │ │ Detection │ │ (Docker/Podman) │ │ +│ └─────────────┘ └─────────────┘ └─────────────────────────┘ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │ +│ │ Driver │ │ Model │ │ OpenAI API Gateway │ │ +│ │ Installer │ │ Registry │ │ (HTTP Server) │ │ +│ └─────────────┘ └─────────────┘ └─────────────────────────┘ │ +├─────────────────────────────────────────────────────────────────┤ +│ Systemd Service │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## File Organization + +``` +ts/ +├── index.ts # Node.js entry point +├── cli.ts # CLI router +├── modelgrid.ts # Main coordinator (facade) +├── daemon.ts # Background daemon +├── systemd.ts # Systemd integration +├── constants.ts # Configuration constants +├── logger.ts # Logging utilities +├── colors.ts # Color themes +├── interfaces/ # TypeScript interfaces +│ ├── config.ts # IModelGridConfig +│ ├── gpu.ts # IGpuInfo, IGpuStatus +│ ├── container.ts # IContainerConfig, IContainerStatus +│ └── api.ts # OpenAI API types +├── hardware/ # Hardware detection +│ ├── gpu-detector.ts # Detect GPUs (NVIDIA, AMD, Intel) +│ └── system-info.ts # CPU, RAM info +├── drivers/ # Driver management +│ ├── nvidia.ts # NVIDIA driver + CUDA +│ ├── amd.ts # AMD driver + ROCm +│ ├── intel.ts # Intel Arc + oneAPI +│ └── driver-manager.ts # Driver orchestrator +├── docker/ # Docker management +│ ├── docker-manager.ts # Docker setup +│ └── container-runtime.ts # Container lifecycle +├── containers/ # AI container management +│ ├── ollama.ts # Ollama container +│ ├── vllm.ts # vLLM container +│ ├── tgi.ts # TGI container +│ └── container-manager.ts # Orchestrator +├── models/ # Model management +│ ├── registry.ts # Greenlit model registry +│ └── loader.ts # Model loading with VRAM checks +├── api/ # OpenAI-compatible API +│ ├── server.ts # HTTP server +│ ├── router.ts # Request routing +│ ├── handlers/ # API endpoint handlers +│ │ ├── chat.ts # /v1/chat/completions +│ │ ├── models.ts # /v1/models +│ │ └── embeddings.ts # /v1/embeddings +│ └── middleware/ # Request processing +│ ├── auth.ts # API key validation +│ └── sanity.ts # Request validation +├── cli/ # CLI handlers +│ ├── service-handler.ts +│ ├── gpu-handler.ts +│ ├── container-handler.ts +│ ├── model-handler.ts +│ └── config-handler.ts +└── helpers/ # Utilities + ├── prompt.ts # Readline utility + └── shortid.ts # ID generation +``` + +## Key Concepts + +### Greenlit Model System +- Only pre-approved models can be auto-pulled for security +- Greenlist fetched from remote URL (configurable) +- VRAM requirements checked before loading + +### Container Types +- **Ollama**: Easy to use, native API converted to OpenAI format +- **vLLM**: High performance, natively OpenAI-compatible +- **TGI**: HuggingFace Text Generation Inference + +### GPU Support +- NVIDIA: nvidia-smi, CUDA, nvidia-docker2 +- AMD: rocm-smi, ROCm +- Intel Arc: xpu-smi, oneAPI + +## Configuration + +Config file: `/etc/modelgrid/config.json` + +```typescript +interface IModelGridConfig { + version: string; + api: { + port: number; // Default: 8080 + host: string; // Default: '0.0.0.0' + apiKeys: string[]; // Valid API keys + cors: boolean; + corsOrigins: string[]; + }; + docker: { + networkName: string; // Default: 'modelgrid' + runtime: 'docker' | 'podman'; + }; + gpus: { + autoDetect: boolean; + assignments: Record; + }; + containers: IContainerConfig[]; + models: { + greenlistUrl: string; + autoPull: boolean; + defaultContainer: string; + autoLoad: string[]; + }; + checkInterval: number; +} +``` + +## CLI Commands + +```bash +modelgrid service enable/disable/start/stop/status/logs +modelgrid gpu list/status/drivers/install +modelgrid container list/add/remove/start/stop/logs +modelgrid model list/pull/remove/status/refresh +modelgrid config show/init/apikey +``` + +## API Endpoints + +- `POST /v1/chat/completions` - Chat completion (OpenAI-compatible) +- `GET /v1/models` - List available models +- `POST /v1/embeddings` - Generate embeddings +- `GET /health` - Health check +- `GET /metrics` - Prometheus metrics + +## Development Notes + +- All async patterns preferred for flexibility +- Use `fs.promises` instead of sync methods +- Containers auto-start on daemon startup +- Models auto-preload if configured diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..5267160 --- /dev/null +++ b/readme.md @@ -0,0 +1,296 @@ +# ModelGrid + +**GPU infrastructure management daemon with OpenAI-compatible API for AI model containers.** + +ModelGrid is a root-level daemon that manages GPU infrastructure, Docker containers, and AI model serving. It provides an OpenAI-compatible API interface for seamless integration with existing tools and applications. + +## Features + +- **Multi-GPU Support**: Detect and manage NVIDIA (CUDA), AMD (ROCm), and Intel Arc (oneAPI) GPUs +- **Container Management**: Orchestrate Ollama, vLLM, and TGI containers with GPU passthrough +- **OpenAI-Compatible API**: Drop-in replacement API for chat completions, embeddings, and model management +- **Greenlit Models**: Controlled model auto-pulling with remote configuration +- **Systemd Integration**: Run as a system service with automatic startup +- **Cross-Platform**: Pre-compiled binaries for Linux, macOS, and Windows + +## Quick Start + +### Installation + +```bash +# Via npm (recommended) +npm install -g @modelgrid.com/modelgrid + +# Via installer script +curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash +``` + +### Initial Setup + +```bash +# 1. Check GPU detection +sudo modelgrid gpu list + +# 2. Initialize configuration +sudo modelgrid config init + +# 3. Enable and start the service +sudo modelgrid service enable +sudo modelgrid service start + +# 4. Check status +modelgrid service status +``` + +### Using the API + +Once running, ModelGrid exposes an OpenAI-compatible API: + +```bash +# List available models +curl http://localhost:8080/v1/models \ + -H "Authorization: Bearer YOUR_API_KEY" + +# Chat completion +curl http://localhost:8080/v1/chat/completions \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llama3:8b", + "messages": [{"role": "user", "content": "Hello!"}] + }' +``` + +## CLI Commands + +### Service Management + +```bash +modelgrid service enable # Install and enable systemd service +modelgrid service disable # Stop and disable systemd service +modelgrid service start # Start the service +modelgrid service stop # Stop the service +modelgrid service status # Show service status +modelgrid service logs # Show service logs +``` + +### GPU Management + +```bash +modelgrid gpu list # List detected GPUs +modelgrid gpu status # Show GPU utilization +modelgrid gpu drivers # Check/install GPU drivers +``` + +### Container Management + +```bash +modelgrid container add # Add a new container +modelgrid container remove # Remove a container +modelgrid container list # List all containers +modelgrid container start # Start a container +modelgrid container stop # Stop a container +``` + +### Model Management + +```bash +modelgrid model list # List available/loaded models +modelgrid model pull # Pull a model +modelgrid model remove # Remove a model +``` + +### Configuration + +```bash +modelgrid config show # Display current configuration +modelgrid config init # Initialize configuration +``` + +## Configuration + +Configuration is stored at `/etc/modelgrid/config.json`: + +```json +{ + "version": "1.0", + "api": { + "port": 8080, + "host": "0.0.0.0", + "apiKeys": ["your-api-key-here"] + }, + "docker": { + "networkName": "modelgrid", + "runtime": "docker" + }, + "gpus": { + "autoDetect": true, + "assignments": {} + }, + "containers": [], + "models": { + "greenlistUrl": "https://code.foss.global/modelgrid.com/model_lists/raw/branch/main/greenlit.json", + "autoPull": true, + "defaultContainer": "ollama", + "autoLoad": [] + }, + "checkInterval": 30000 +} +``` + +## Supported Container Types + +### Ollama + +Best for general-purpose model serving with easy model management. + +```bash +modelgrid container add --type ollama --gpu gpu-0 +``` + +### vLLM + +High-performance serving for large models with tensor parallelism. + +```bash +modelgrid container add --type vllm --gpu gpu-0,gpu-1 +``` + +### TGI (Text Generation Inference) + +HuggingFace's production-ready inference server. + +```bash +modelgrid container add --type tgi --gpu gpu-0 +``` + +## GPU Support + +### NVIDIA (CUDA) + +Requires NVIDIA drivers and NVIDIA Container Toolkit: + +```bash +# Check driver status +modelgrid gpu drivers + +# Install if needed (Ubuntu/Debian) +sudo apt install nvidia-driver-535 nvidia-container-toolkit +``` + +### AMD (ROCm) + +Requires ROCm drivers: + +```bash +# Check driver status +modelgrid gpu drivers +``` + +### Intel Arc (oneAPI) + +Requires Intel GPU drivers and oneAPI toolkit: + +```bash +# Check driver status +modelgrid gpu drivers +``` + +## Greenlit Models + +ModelGrid uses a greenlit model system to control which models can be auto-pulled. The greenlist is fetched from a configurable URL and contains approved models with VRAM requirements: + +```json +{ + "version": "1.0", + "models": [ + { "name": "llama3:8b", "container": "ollama", "minVram": 8 }, + { "name": "mistral:7b", "container": "ollama", "minVram": 8 }, + { "name": "llama3:70b", "container": "vllm", "minVram": 48 } + ] +} +``` + +When a request comes in for a model not currently loaded: +1. Check if model is in the greenlist +2. Verify VRAM requirements can be met +3. Auto-pull and load the model +4. Serve the request + +## API Reference + +### Chat Completions + +``` +POST /v1/chat/completions +``` + +OpenAI-compatible chat completion endpoint with streaming support. + +### Models + +``` +GET /v1/models +GET /v1/models/:model +``` + +List available models or get details for a specific model. + +### Embeddings + +``` +POST /v1/embeddings +``` + +Generate text embeddings using compatible models. + +## Development + +### Building from Source + +```bash +# Clone repository +git clone https://code.foss.global/modelgrid.com/modelgrid.git +cd modelgrid + +# Run directly with Deno +deno run --allow-all mod.ts help + +# Compile for current platform +deno compile --allow-all --output modelgrid mod.ts + +# Compile for all platforms +bash scripts/compile-all.sh +``` + +### Project Structure + +``` +modelgrid/ +├── mod.ts # Entry point +├── ts/ +│ ├── cli.ts # CLI command routing +│ ├── modelgrid.ts # Main coordinator class +│ ├── daemon.ts # Background daemon +│ ├── systemd.ts # Systemd service management +│ ├── constants.ts # Configuration constants +│ ├── interfaces/ # TypeScript interfaces +│ ├── hardware/ # GPU detection +│ ├── drivers/ # Driver management +│ ├── docker/ # Docker management +│ ├── containers/ # Container orchestration +│ ├── api/ # OpenAI-compatible API +│ ├── models/ # Model management +│ └── cli/ # CLI handlers +├── test/ # Test files +└── scripts/ # Build scripts +``` + +## License + +MIT License - See [license](./license) for details. + +## Links + +- Repository: https://code.foss.global/modelgrid.com/modelgrid +- Issues: https://community.foss.global/ diff --git a/readme.plan.md b/readme.plan.md new file mode 100644 index 0000000..3c7cd93 --- /dev/null +++ b/readme.plan.md @@ -0,0 +1,202 @@ +# ModelGrid Implementation Plan + +**Goal**: GPU infrastructure management daemon with OpenAI-compatible API for AI model containers. + +--- + +## Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ ModelGrid Daemon │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │ +│ │ CLI │ │ Hardware │ │ Container Manager │ │ +│ │ Commands │ │ Detection │ │ (Docker/Podman) │ │ +│ └─────────────┘ └─────────────┘ └─────────────────────────┘ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │ +│ │ Driver │ │ Model │ │ OpenAI API Gateway │ │ +│ │ Installer │ │ Registry │ │ (HTTP Server) │ │ +│ └─────────────┘ └─────────────┘ └─────────────────────────┘ │ +├─────────────────────────────────────────────────────────────────┤ +│ Systemd Service │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Container Runtime │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Ollama │ │ vLLM │ │ TGI │ │ Custom │ │ +│ │Container │ │Container │ │Container │ │Container │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Implementation Status + +### Completed Components + +- [x] Project structure and configuration (deno.json, package.json) +- [x] TypeScript interfaces (ts/interfaces/) +- [x] Logger and colors (ts/logger.ts, ts/colors.ts) +- [x] Helper utilities (ts/helpers/) +- [x] Constants (ts/constants.ts) +- [x] Hardware detection (ts/hardware/) +- [x] Driver management (ts/drivers/) +- [x] Docker management (ts/docker/) +- [x] Container orchestration (ts/containers/) +- [x] Model management (ts/models/) +- [x] OpenAI-compatible API (ts/api/) +- [x] CLI router and handlers (ts/cli.ts, ts/cli/) +- [x] Main coordinator (ts/modelgrid.ts) +- [x] Daemon (ts/daemon.ts) +- [x] Systemd integration (ts/systemd.ts) +- [x] Build scripts (scripts/) +- [x] Installation scripts (install.sh, uninstall.sh) +- [x] CI/CD workflows (.gitea/workflows/) +- [x] npm packaging (package.json, bin/, scripts/) + +### Pending Tasks + +- [ ] Integration testing with real GPUs +- [ ] End-to-end API testing +- [ ] Documentation improvements +- [ ] First release (v1.0.0) + +--- + +## Directory Structure + +``` +modelgrid/ +├── mod.ts # Deno entry point +├── ts/ +│ ├── index.ts # Node.js entry point +│ ├── cli.ts # CLI router +│ ├── modelgrid.ts # Main coordinator +│ ├── daemon.ts # Background daemon +│ ├── systemd.ts # Systemd integration +│ ├── constants.ts # Configuration constants +│ ├── logger.ts # Logging utilities +│ ├── colors.ts # Color themes +│ ├── interfaces/ # TypeScript interfaces +│ │ ├── index.ts +│ │ ├── config.ts # IModelGridConfig +│ │ ├── gpu.ts # IGpuInfo, IGpuStatus +│ │ ├── container.ts # IContainerConfig, IContainerStatus +│ │ └── api.ts # OpenAI API types +│ ├── hardware/ # Hardware detection +│ │ ├── index.ts +│ │ ├── gpu-detector.ts # Multi-vendor GPU detection +│ │ └── system-info.ts # System information +│ ├── drivers/ # Driver management +│ │ ├── index.ts +│ │ ├── nvidia.ts # NVIDIA/CUDA +│ │ ├── amd.ts # AMD/ROCm +│ │ ├── intel.ts # Intel Arc/oneAPI +│ │ └── base-driver.ts # Abstract driver class +│ ├── docker/ # Docker management +│ │ ├── index.ts +│ │ ├── docker-manager.ts # Docker operations +│ │ └── container-runtime.ts +│ ├── containers/ # Container orchestration +│ │ ├── index.ts +│ │ ├── ollama.ts # Ollama container +│ │ ├── vllm.ts # vLLM container +│ │ ├── tgi.ts # TGI container +│ │ └── base-container.ts # Abstract container class +│ ├── api/ # OpenAI-compatible API +│ │ ├── index.ts +│ │ ├── server.ts # HTTP server +│ │ ├── router.ts # Request routing +│ │ ├── handlers/ # Endpoint handlers +│ │ │ ├── chat.ts # /v1/chat/completions +│ │ │ ├── models.ts # /v1/models +│ │ │ └── embeddings.ts # /v1/embeddings +│ │ └── middleware/ # Request processing +│ │ ├── auth.ts # API key validation +│ │ ├── sanity.ts # Request validation +│ │ └── proxy.ts # Container proxy +│ ├── models/ # Model management +│ │ ├── index.ts +│ │ ├── registry.ts # Model registry +│ │ └── loader.ts # Model loading +│ └── cli/ # CLI handlers +│ ├── service-handler.ts +│ ├── gpu-handler.ts +│ ├── container-handler.ts +│ ├── model-handler.ts +│ └── config-handler.ts +├── test/ # Test files +├── scripts/ # Build scripts +├── bin/ # npm wrapper +└── docs/ # Documentation +``` + +--- + +## CLI Commands + +``` +modelgrid service enable # Install systemd service +modelgrid service disable # Remove systemd service +modelgrid service start # Start daemon +modelgrid service stop # Stop daemon +modelgrid service status # Show status +modelgrid service logs # Show logs + +modelgrid gpu list # List detected GPUs +modelgrid gpu status # Show GPU utilization +modelgrid gpu drivers # Check/install drivers + +modelgrid container add # Add container config +modelgrid container remove # Remove container +modelgrid container list # List containers +modelgrid container start # Start container +modelgrid container stop # Stop container + +modelgrid model list # List available models +modelgrid model pull # Pull model +modelgrid model remove # Remove model + +modelgrid config show # Show configuration +modelgrid config init # Initialize configuration +``` + +--- + +## API Endpoints + +- `GET /v1/models` - List available models +- `GET /v1/models/:model` - Get model details +- `POST /v1/chat/completions` - Chat completions (streaming supported) +- `POST /v1/embeddings` - Generate embeddings + +--- + +## Greenlit Model System + +Models are controlled via a remote greenlist to prevent arbitrary downloads: + +```json +{ + "version": "1.0", + "models": [ + { "name": "llama3:8b", "container": "ollama", "minVram": 8 }, + { "name": "mistral:7b", "container": "ollama", "minVram": 8 }, + { "name": "llama3:70b", "container": "vllm", "minVram": 48 } + ] +} +``` + +--- + +## Supported Platforms + +- Linux x64 (x86_64) +- Linux ARM64 (aarch64) +- macOS Intel (x86_64) +- macOS Apple Silicon (ARM64) +- Windows x64 diff --git a/scripts/compile-all.sh b/scripts/compile-all.sh new file mode 100755 index 0000000..59b1dae --- /dev/null +++ b/scripts/compile-all.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set -e + +# Get version from deno.json +VERSION=$(cat deno.json | grep -o '"version": *"[^"]*"' | cut -d'"' -f4) +BINARY_DIR="dist/binaries" + +echo "================================================" +echo " ModelGrid Compilation Script" +echo " Version: ${VERSION}" +echo "================================================" +echo "" +echo "Compiling for all supported platforms..." +echo "" + +# Clean up old binaries and create fresh directory +rm -rf "$BINARY_DIR" +mkdir -p "$BINARY_DIR" +echo "→ Cleaned old binaries from $BINARY_DIR" +echo "" + +# Linux x86_64 +echo "→ Compiling for Linux x86_64..." +deno compile --allow-all --no-check --output "$BINARY_DIR/modelgrid-linux-x64" \ + --target x86_64-unknown-linux-gnu mod.ts +echo " ✓ Linux x86_64 complete" +echo "" + +# Linux ARM64 +echo "→ Compiling for Linux ARM64..." +deno compile --allow-all --no-check --output "$BINARY_DIR/modelgrid-linux-arm64" \ + --target aarch64-unknown-linux-gnu mod.ts +echo " ✓ Linux ARM64 complete" +echo "" + +# macOS x86_64 +echo "→ Compiling for macOS x86_64..." +deno compile --allow-all --no-check --output "$BINARY_DIR/modelgrid-macos-x64" \ + --target x86_64-apple-darwin mod.ts +echo " ✓ macOS x86_64 complete" +echo "" + +# macOS ARM64 +echo "→ Compiling for macOS ARM64..." +deno compile --allow-all --no-check --output "$BINARY_DIR/modelgrid-macos-arm64" \ + --target aarch64-apple-darwin mod.ts +echo " ✓ macOS ARM64 complete" +echo "" + +# Windows x86_64 +echo "→ Compiling for Windows x86_64..." +deno compile --allow-all --no-check --output "$BINARY_DIR/modelgrid-windows-x64.exe" \ + --target x86_64-pc-windows-msvc mod.ts +echo " ✓ Windows x86_64 complete" +echo "" + +echo "================================================" +echo " Compilation Summary" +echo "================================================" +echo "" +ls -lh "$BINARY_DIR/" | tail -n +2 +echo "" +echo "✓ All binaries compiled successfully!" +echo "" +echo "Binary location: $BINARY_DIR/" +echo "" diff --git a/scripts/install-binary.js b/scripts/install-binary.js new file mode 100644 index 0000000..cc8da13 --- /dev/null +++ b/scripts/install-binary.js @@ -0,0 +1,238 @@ +#!/usr/bin/env node +// deno-lint-ignore-file no-unused-vars + +/** + * ModelGrid npm postinstall script + * Downloads the appropriate binary for the current platform from Gitea releases + */ + +import { arch, platform } from 'os'; +import { chmodSync, existsSync, mkdirSync, unlinkSync } from 'fs'; +import { dirname, join } from 'path'; +import { fileURLToPath } from 'url'; +import https from 'https'; +import { pipeline } from 'stream'; +import { promisify } from 'util'; +import { createWriteStream } from 'fs'; +import process from "node:process"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const streamPipeline = promisify(pipeline); + +// Configuration +const REPO_BASE = 'https://code.foss.global/modelgrid.com/modelgrid'; +const VERSION = process.env.npm_package_version || '1.0.0'; + +function getBinaryInfo() { + const plat = platform(); + const architecture = arch(); + + const platformMap = { + 'darwin': 'macos', + 'linux': 'linux', + 'win32': 'windows', + }; + + const archMap = { + 'x64': 'x64', + 'arm64': 'arm64', + }; + + const mappedPlatform = platformMap[plat]; + const mappedArch = archMap[architecture]; + + if (!mappedPlatform || !mappedArch) { + return { supported: false, platform: plat, arch: architecture }; + } + + let binaryName = `modelgrid-${mappedPlatform}-${mappedArch}`; + if (plat === 'win32') { + binaryName += '.exe'; + } + + return { + supported: true, + platform: mappedPlatform, + arch: mappedArch, + binaryName, + originalPlatform: plat, + }; +} + +function downloadFile(url, destination) { + return new Promise((resolve, reject) => { + console.log(`Downloading from: ${url}`); + + // Follow redirects + const download = (url, redirectCount = 0) => { + if (redirectCount > 5) { + reject(new Error('Too many redirects')); + return; + } + + https.get(url, (response) => { + if (response.statusCode === 301 || response.statusCode === 302) { + console.log(`Following redirect to: ${response.headers.location}`); + download(response.headers.location, redirectCount + 1); + return; + } + + if (response.statusCode !== 200) { + reject(new Error(`Failed to download: ${response.statusCode} ${response.statusMessage}`)); + return; + } + + const totalSize = parseInt(response.headers['content-length'], 10); + let downloadedSize = 0; + let lastProgress = 0; + + response.on('data', (chunk) => { + downloadedSize += chunk.length; + const progress = Math.round((downloadedSize / totalSize) * 100); + + // Only log every 10% to reduce noise + if (progress >= lastProgress + 10) { + console.log(`Download progress: ${progress}%`); + lastProgress = progress; + } + }); + + const file = createWriteStream(destination); + + pipeline(response, file, (err) => { + if (err) { + reject(err); + } else { + console.log('Download complete!'); + resolve(); + } + }); + }).on('error', reject); + }; + + download(url); + }); +} + +async function main() { + console.log('==========================================='); + console.log(' ModelGrid - Binary Installation'); + console.log('==========================================='); + console.log(''); + + const binaryInfo = getBinaryInfo(); + + if (!binaryInfo.supported) { + console.error( + `❌ Error: Unsupported platform/architecture: ${binaryInfo.platform}/${binaryInfo.arch}`, + ); + console.error(''); + console.error('Supported platforms:'); + console.error(' • Linux (x64, arm64)'); + console.error(' • macOS (x64, arm64)'); + console.error(' • Windows (x64)'); + console.error(''); + console.error('If you believe your platform should be supported, please file an issue:'); + console.error(' https://code.foss.global/modelgrid.com/modelgrid/issues'); + process.exit(1); + } + + console.log(`Platform: ${binaryInfo.platform} (${binaryInfo.originalPlatform})`); + console.log(`Architecture: ${binaryInfo.arch}`); + console.log(`Binary: ${binaryInfo.binaryName}`); + console.log(`Version: ${VERSION}`); + console.log(''); + + // Create dist/binaries directory if it doesn't exist + const binariesDir = join(__dirname, '..', 'dist', 'binaries'); + if (!existsSync(binariesDir)) { + console.log('Creating binaries directory...'); + mkdirSync(binariesDir, { recursive: true }); + } + + const binaryPath = join(binariesDir, binaryInfo.binaryName); + + // Check if binary already exists and skip download + if (existsSync(binaryPath)) { + console.log('✓ Binary already exists, skipping download'); + } else { + // Construct download URL + // Try release URL first, fall back to raw branch if needed + const releaseUrl = `${REPO_BASE}/releases/download/v${VERSION}/${binaryInfo.binaryName}`; + const fallbackUrl = `${REPO_BASE}/raw/branch/main/dist/binaries/${binaryInfo.binaryName}`; + + console.log('Downloading platform-specific binary...'); + console.log('This may take a moment depending on your connection speed.'); + console.log(''); + + try { + // Try downloading from release + await downloadFile(releaseUrl, binaryPath); + } catch (err) { + console.log(`Release download failed: ${err.message}`); + console.log('Trying fallback URL...'); + + try { + // Try fallback URL + await downloadFile(fallbackUrl, binaryPath); + } catch (fallbackErr) { + console.error(`❌ Error: Failed to download binary`); + console.error(` Primary URL: ${releaseUrl}`); + console.error(` Fallback URL: ${fallbackUrl}`); + console.error(''); + console.error('This might be because:'); + console.error('1. The release has not been created yet'); + console.error('2. Network connectivity issues'); + console.error('3. The version specified does not exist'); + console.error(''); + console.error('You can try:'); + console.error('1. Installing from source: https://code.foss.global/modelgrid.com/modelgrid'); + console.error('2. Downloading the binary manually from the releases page'); + console.error( + '3. Using the install script: curl -sSL https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh | sudo bash', + ); + + // Clean up partial download + if (existsSync(binaryPath)) { + unlinkSync(binaryPath); + } + + process.exit(1); + } + } + + console.log(`✓ Binary downloaded successfully`); + } + + // On Unix-like systems, ensure the binary is executable + if (binaryInfo.originalPlatform !== 'win32') { + try { + console.log('Setting executable permissions...'); + chmodSync(binaryPath, 0o755); + console.log('✓ Binary permissions updated'); + } catch (err) { + console.error(`⚠️ Warning: Could not set executable permissions: ${err.message}`); + console.error(' You may need to manually run:'); + console.error(` chmod +x ${binaryPath}`); + } + } + + console.log(''); + console.log('✅ ModelGrid installation completed successfully!'); + console.log(''); + console.log('You can now use ModelGrid by running:'); + console.log(' modelgrid --help'); + console.log(''); + console.log('For initial setup, run:'); + console.log(' modelgrid gpu list'); + console.log(' modelgrid config init'); + console.log(''); + console.log('==========================================='); +} + +// Run the installation +main().catch((err) => { + console.error(`❌ Installation failed: ${err.message}`); + process.exit(1); +}); diff --git a/test/test.logger.ts b/test/test.logger.ts new file mode 100644 index 0000000..3d4e741 --- /dev/null +++ b/test/test.logger.ts @@ -0,0 +1,157 @@ +import { assert, assertEquals } from 'jsr:@std/assert@^1.0.0'; +import { Logger } from '../ts/logger.ts'; + +// Create a Logger instance for testing +const logger = new Logger(); + +Deno.test('should create a logger instance', () => { + assert(logger instanceof Logger); +}); + +Deno.test('should log messages with different log levels', () => { + // We're not testing console output directly, just ensuring no errors + logger.log('Regular log message'); + logger.error('Error message'); + logger.warn('Warning message'); + logger.success('Success message'); + + // Just assert that the test runs without errors + assert(true); +}); + +Deno.test('should create a logbox with title, content, and end', () => { + // Just ensuring no errors occur + logger.logBoxTitle('Test Box', 40); + logger.logBoxLine('This is a test line'); + logger.logBoxEnd(); + + // Just assert that the test runs without errors + assert(true); +}); + +Deno.test('should handle width persistence between logbox calls', () => { + logger.logBoxTitle('Width Test', 45); + + // These should use the width from the title + logger.logBoxLine('Line 1'); + logger.logBoxLine('Line 2'); + logger.logBoxEnd(); + + let errorThrown = false; + + try { + // This should work fine after the reset in logBoxEnd + logger.logBoxTitle('New Box', 30); + logger.logBoxLine('New line'); + logger.logBoxEnd(); + } catch (_error) { + errorThrown = true; + } + + assertEquals(errorThrown, false); +}); + +Deno.test('should use default width when no width is specified', () => { + // This should automatically use the default width instead of throwing + let errorThrown = false; + + try { + logger.logBoxLine('This should use default width'); + logger.logBoxEnd(); + } catch (_error) { + errorThrown = true; + } + + // Verify no error was thrown + assertEquals(errorThrown, false); +}); + +Deno.test('should create a complete logbox in one call', () => { + // Just ensuring no errors occur + logger.logBox('Complete Box', [ + 'Line 1', + 'Line 2', + 'Line 3', + ], 40); + + // Just assert that the test runs without errors + assert(true); +}); + +Deno.test('should handle content that exceeds box width', () => { + // Just ensuring no errors occur when content is too long + logger.logBox('Truncation Test', [ + 'This line is way too long and should be truncated because it exceeds the available space', + ], 30); + + // Just assert that the test runs without errors + assert(true); +}); + +Deno.test('should create dividers with custom characters', () => { + // Just ensuring no errors occur + logger.logDivider(30); + logger.logDivider(20, '*'); + + // Just assert that the test runs without errors + assert(true); +}); + +Deno.test('should create divider with default width', () => { + // This should use the default width + logger.logDivider(undefined, '-'); + + // Just assert that the test runs without errors + assert(true); +}); + +Deno.test('Logger Demo', () => { + console.log('\n=== LOGGER DEMO ===\n'); + + // Basic logging + logger.log('Regular log message'); + logger.error('Error message'); + logger.warn('Warning message'); + logger.success('Success message'); + + // Logbox with title, content lines, and end + logger.logBoxTitle('Configuration Loaded', 50); + logger.logBoxLine('SNMP Settings:'); + logger.logBoxLine(' Host: 127.0.0.1'); + logger.logBoxLine(' Port: 161'); + logger.logBoxLine(' Version: 1'); + logger.logBoxEnd(); + + // Complete logbox in one call + logger.logBox('UPS Status', [ + 'Power Status: onBattery', + 'Battery Capacity: 75%', + 'Runtime Remaining: 30 minutes', + ], 45); + + // Logbox with content that's too long for the width + logger.logBox('Truncation Example', [ + 'This line is short enough to fit within the box width', + 'This line is way too long and will be truncated because it exceeds the available space for content within the logbox', + ], 40); + + // Demonstrating logbox width being remembered + logger.logBoxTitle('Width Persistence Example', 60); + logger.logBoxLine('These lines use the width from the title'); + logger.logBoxLine('No need to specify the width again'); + logger.logBoxEnd(); + + // Demonstrating default width + console.log('\nDefault Width Example:'); + logger.logBoxLine('This line uses the default width'); + logger.logBoxLine('Still using default width'); + logger.logBoxEnd(); + + // Divider example + logger.log('\nDivider example:'); + logger.logDivider(30); + logger.logDivider(30, '*'); + logger.logDivider(undefined, '='); + + assert(true); +}); diff --git a/test/test.showcase.ts b/test/test.showcase.ts new file mode 100644 index 0000000..ee11463 --- /dev/null +++ b/test/test.showcase.ts @@ -0,0 +1,216 @@ +/** + * Showcase test for ModelGrid CLI outputs + * Demonstrates all the beautiful colored output features + * + * Run with: deno run --allow-all test/showcase.ts + */ + +import { type ITableColumn, logger } from '../ts/logger.ts'; +import { theme } from '../ts/colors.ts'; + +console.log(''); +console.log('═'.repeat(80)); +logger.highlight('MODELGRID CLI OUTPUT SHOWCASE'); +logger.dim('Demonstrating beautiful, colored terminal output'); +console.log('═'.repeat(80)); +console.log(''); + +// === 1. Basic Logging Methods === +logger.logBoxTitle('Basic Logging Methods', 60, 'info'); +logger.logBoxLine(''); +logger.log('Normal log message (default color)'); +logger.success('Success message with ✓ symbol'); +logger.error('Error message with ✗ symbol'); +logger.warn('Warning message with ⚠ symbol'); +logger.info('Info message with ℹ symbol'); +logger.dim('Dim/secondary text for less important info'); +logger.highlight('Highlighted/bold text for emphasis'); +logger.logBoxLine(''); +logger.logBoxEnd(); + +console.log(''); + +// === 2. Colored Boxes === +logger.logBoxTitle('Colored Box Styles', 60); +logger.logBoxLine(''); +logger.logBoxLine('Boxes can be styled with different colors:'); +logger.logBoxEnd(); + +console.log(''); + +logger.logBox( + 'Success Box (Green)', + [ + 'Used for successful operations', + 'Container started, model loaded, etc.', + ], + 60, + 'success', +); + +console.log(''); + +logger.logBox( + 'Error Box (Red)', + [ + 'Used for critical errors and failures', + 'Configuration errors, GPU detection failures, etc.', + ], + 60, + 'error', +); + +console.log(''); + +logger.logBox( + 'Warning Box (Yellow)', + [ + 'Used for warnings and deprecations', + 'Driver updates needed, low VRAM, etc.', + ], + 60, + 'warning', +); + +console.log(''); + +logger.logBox( + 'Info Box (Cyan)', + [ + 'Used for informational messages', + 'Version info, model info, etc.', + ], + 60, + 'info', +); + +console.log(''); + +// === 3. GPU Status Table === +const gpuColumns: ITableColumn[] = [ + { header: 'ID', key: 'id' }, + { header: 'Model', key: 'model' }, + { header: 'VRAM', key: 'vram', align: 'right' }, + { + header: 'Status', + key: 'status', + color: (v) => { + if (v.includes('Ready')) return theme.success(v); + if (v.includes('Busy')) return theme.warning(v); + return theme.dim(v); + }, + }, + { header: 'Utilization', key: 'utilization', align: 'right' }, +]; + +const gpuData = [ + { + id: 'gpu-0', + model: 'NVIDIA RTX 4090', + vram: '24 GB', + status: 'Ready', + utilization: '15%', + }, + { + id: 'gpu-1', + model: 'NVIDIA RTX 4090', + vram: '24 GB', + status: 'Busy', + utilization: '92%', + }, + { + id: 'gpu-2', + model: 'AMD RX 7900 XTX', + vram: '24 GB', + status: 'Ready', + utilization: '0%', + }, +]; + +logger.logTable(gpuColumns, gpuData, 'GPU Devices'); + +console.log(''); + +// === 4. Container Table === +const containerColumns: ITableColumn[] = [ + { header: 'ID', key: 'id' }, + { header: 'Type', key: 'type' }, + { header: 'Status', key: 'status' }, + { header: 'GPU', key: 'gpu' }, + { header: 'Models', key: 'models', align: 'right' }, +]; + +const containerData = [ + { id: 'ollama-1', type: 'ollama', status: 'Running', gpu: 'gpu-0', models: '3' }, + { id: 'vllm-1', type: 'vllm', status: 'Running', gpu: 'gpu-1', models: '1' }, +]; + +logger.logTable(containerColumns, containerData, 'AI Containers'); + +console.log(''); + +// === 5. Service Status Example === +logger.logBoxTitle('Service Status', 70, 'success'); +logger.logBoxLine(''); +logger.logBoxLine(`Status: ${theme.statusActive('Active (Running)')}`); +logger.logBoxLine(`Enabled: ${theme.success('Yes')}`); +logger.logBoxLine(`Uptime: 2 days, 5 hours, 23 minutes`); +logger.logBoxLine(`PID: ${theme.dim('12345')}`); +logger.logBoxLine(`Memory: ${theme.dim('245.2 MB')}`); +logger.logBoxLine(''); +logger.logBoxEnd(); + +console.log(''); + +// === 6. Configuration Example === +logger.logBoxTitle('Configuration', 70); +logger.logBoxLine(''); +logger.logBoxLine(`GPUs Detected: ${theme.highlight('3')}`); +logger.logBoxLine(`Containers: ${theme.highlight('2')}`); +logger.logBoxLine(`API Port: ${theme.dim('8080')}`); +logger.logBoxLine(`Config File: ${theme.path('/etc/modelgrid/config.json')}`); +logger.logBoxLine(''); +logger.logBoxEnd(); + +console.log(''); + +// === 7. Model List Example === +const modelColumns: ITableColumn[] = [ + { header: 'Model', key: 'name' }, + { header: 'Container', key: 'container' }, + { header: 'Size', key: 'size', align: 'right' }, + { header: 'Status', key: 'status' }, +]; + +const modelData = [ + { name: 'llama3:8b', container: 'ollama-1', size: '4.7 GB', status: 'Loaded' }, + { name: 'mistral:7b', container: 'ollama-1', size: '4.1 GB', status: 'Loaded' }, + { name: 'llama3:70b', container: 'vllm-1', size: '40 GB', status: 'Loaded' }, +]; + +logger.logTable(modelColumns, modelData, 'Loaded Models'); + +console.log(''); + +// === 8. Error Example === +logger.logBoxTitle('Error Example', 70, 'error'); +logger.logBoxLine(''); +logger.logBoxLine(`✗ Failed to start container vllm-2`); +logger.logBoxLine(''); +logger.logBoxLine('Possible causes:'); +logger.logBoxLine(` ${theme.dim('• Insufficient VRAM on assigned GPU')}`); +logger.logBoxLine(` ${theme.dim('• Docker daemon not running')}`); +logger.logBoxLine(` ${theme.dim('• NVIDIA container toolkit not installed')}`); +logger.logBoxLine(''); +logger.logBoxLine(`Try: ${theme.command('modelgrid gpu status')}`); +logger.logBoxLine(''); +logger.logBoxEnd(); + +console.log(''); + +// === Final Summary === +console.log('═'.repeat(80)); +logger.success('CLI Output Showcase Complete!'); +logger.dim('All color and formatting features demonstrated'); +console.log('═'.repeat(80)); +console.log(''); diff --git a/test/test.ts b/test/test.ts new file mode 100644 index 0000000..cc92638 --- /dev/null +++ b/test/test.ts @@ -0,0 +1,323 @@ +import { assert, assertEquals, assertExists } from 'jsr:@std/assert@^1.0.0'; +import { shortId } from '../ts/helpers/shortid.ts'; + +// ============================================================================= +// UNIT TESTS - ModelGrid Core Components +// ============================================================================= + +// ----------------------------------------------------------------------------- +// shortId() Tests +// ----------------------------------------------------------------------------- + +Deno.test('shortId: generates 6-character string', () => { + const id = shortId(); + assertEquals(id.length, 6); +}); + +Deno.test('shortId: contains only alphanumeric characters', () => { + const id = shortId(); + const alphanumericRegex = /^[a-zA-Z0-9]+$/; + assert(alphanumericRegex.test(id), `ID "${id}" contains non-alphanumeric characters`); +}); + +Deno.test('shortId: generates unique IDs', () => { + const ids = new Set(); + const count = 100; + + for (let i = 0; i < count; i++) { + ids.add(shortId()); + } + + // All IDs should be unique (statistically extremely likely for 100 IDs) + assertEquals(ids.size, count, 'Generated IDs should be unique'); +}); + +// ----------------------------------------------------------------------------- +// Interface Type Tests +// ----------------------------------------------------------------------------- + +Deno.test('IModelGridConfig: valid config structure', () => { + const config = { + version: '1.0', + api: { + port: 8080, + host: '0.0.0.0', + apiKeys: ['test-key'], + }, + docker: { + networkName: 'modelgrid', + runtime: 'docker' as const, + }, + gpus: { + autoDetect: true, + assignments: {}, + }, + containers: [], + models: { + greenlistUrl: 'https://example.com/greenlit.json', + autoPull: true, + defaultContainer: 'ollama', + autoLoad: [], + }, + checkInterval: 30000, + }; + + assertExists(config.version); + assertExists(config.api); + assertExists(config.docker); + assertExists(config.gpus); + assertExists(config.containers); + assertExists(config.models); + assertEquals(config.api.port, 8080); + assertEquals(config.docker.runtime, 'docker'); +}); + +Deno.test('IGpuInfo: valid GPU info structure', () => { + const gpu = { + id: 'gpu-0', + vendor: 'nvidia' as const, + model: 'RTX 4090', + vram: 24576, + driverVersion: '535.154.05', + cudaVersion: '12.2', + pciSlot: '0000:01:00.0', + }; + + assertExists(gpu.id); + assertExists(gpu.vendor); + assertExists(gpu.model); + assert(gpu.vram > 0, 'VRAM should be positive'); + assert(['nvidia', 'amd', 'intel'].includes(gpu.vendor), 'Vendor should be valid'); +}); + +Deno.test('IContainerConfig: valid container config structure', () => { + const container = { + id: 'ollama-1', + type: 'ollama' as const, + name: 'Ollama Container', + image: 'ollama/ollama:latest', + gpuIds: ['gpu-0'], + port: 11434, + models: ['llama3:8b'], + }; + + assertExists(container.id); + assertExists(container.type); + assertExists(container.name); + assertExists(container.image); + assert(container.gpuIds.length > 0, 'Should have at least one GPU'); + assert(container.port > 0, 'Port should be positive'); +}); + +// ----------------------------------------------------------------------------- +// Greenlit Model Tests +// ----------------------------------------------------------------------------- + +Deno.test('Greenlit model validation: valid model passes', () => { + const greenlist = { + version: '1.0', + models: [ + { name: 'llama3:8b', container: 'ollama', minVram: 8 }, + { name: 'mistral:7b', container: 'ollama', minVram: 8 }, + ], + }; + + const requestedModel = 'llama3:8b'; + const availableVram = 24; // GB + + const model = greenlist.models.find((m) => m.name === requestedModel); + assertExists(model, 'Model should be in greenlist'); + assert(availableVram >= model.minVram, 'Should have enough VRAM'); +}); + +Deno.test('Greenlit model validation: insufficient VRAM fails', () => { + const greenlist = { + version: '1.0', + models: [ + { name: 'llama3:70b', container: 'vllm', minVram: 48 }, + ], + }; + + const requestedModel = 'llama3:70b'; + const availableVram = 24; // GB + + const model = greenlist.models.find((m) => m.name === requestedModel); + assertExists(model, 'Model should be in greenlist'); + assert(availableVram < model.minVram, 'Should NOT have enough VRAM'); +}); + +Deno.test('Greenlit model validation: unlisted model rejected', () => { + const greenlist = { + version: '1.0', + models: [ + { name: 'llama3:8b', container: 'ollama', minVram: 8 }, + ], + }; + + const requestedModel = 'some-random-model:latest'; + const model = greenlist.models.find((m) => m.name === requestedModel); + assertEquals(model, undefined, 'Model should NOT be in greenlist'); +}); + +// ----------------------------------------------------------------------------- +// API Request Validation Tests +// ----------------------------------------------------------------------------- + +Deno.test('Chat completion request: valid request passes', () => { + const request = { + model: 'llama3:8b', + messages: [ + { role: 'user', content: 'Hello!' }, + ], + }; + + assertExists(request.model, 'Model should be specified'); + assert(request.messages.length > 0, 'Should have at least one message'); + assert( + request.messages.every((m) => m.role && m.content), + 'All messages should have role and content', + ); +}); + +Deno.test('Chat completion request: missing model fails', () => { + const request = { + messages: [ + { role: 'user', content: 'Hello!' }, + ], + }; + + assertEquals((request as { model?: string }).model, undefined, 'Model should be missing'); +}); + +Deno.test('Chat completion request: empty messages fails', () => { + const request = { + model: 'llama3:8b', + messages: [], + }; + + assertEquals(request.messages.length, 0, 'Messages should be empty'); +}); + +Deno.test('Embedding request: valid request passes', () => { + const request = { + model: 'llama3:8b', + input: 'Hello, world!', + }; + + assertExists(request.model, 'Model should be specified'); + assertExists(request.input, 'Input should be specified'); +}); + +Deno.test('Embedding request: array input passes', () => { + const request = { + model: 'llama3:8b', + input: ['Hello', 'World'], + }; + + assertExists(request.model, 'Model should be specified'); + assert(Array.isArray(request.input), 'Input should be an array'); + assert(request.input.length > 0, 'Input should not be empty'); +}); + +// ----------------------------------------------------------------------------- +// Container Type Tests +// ----------------------------------------------------------------------------- + +Deno.test('Container types: ollama configuration', () => { + const ollamaConfig = { + type: 'ollama' as const, + image: 'ollama/ollama:latest', + defaultPort: 11434, + apiPath: '/api', + }; + + assertEquals(ollamaConfig.type, 'ollama'); + assertEquals(ollamaConfig.defaultPort, 11434); +}); + +Deno.test('Container types: vllm configuration', () => { + const vllmConfig = { + type: 'vllm' as const, + image: 'vllm/vllm-openai:latest', + defaultPort: 8000, + apiPath: '/v1', + }; + + assertEquals(vllmConfig.type, 'vllm'); + assertEquals(vllmConfig.defaultPort, 8000); +}); + +Deno.test('Container types: tgi configuration', () => { + const tgiConfig = { + type: 'tgi' as const, + image: 'ghcr.io/huggingface/text-generation-inference:latest', + defaultPort: 80, + apiPath: '/generate', + }; + + assertEquals(tgiConfig.type, 'tgi'); + assertEquals(tgiConfig.defaultPort, 80); +}); + +// ----------------------------------------------------------------------------- +// GPU Vendor Tests +// ----------------------------------------------------------------------------- + +Deno.test('GPU vendors: NVIDIA detection pattern', () => { + const nvidiaPatterns = ['NVIDIA', 'GeForce', 'Quadro', 'Tesla', 'RTX', 'GTX']; + const gpuName = 'NVIDIA GeForce RTX 4090'; + + const isNvidia = nvidiaPatterns.some((pattern) => + gpuName.toUpperCase().includes(pattern.toUpperCase()) + ); + assert(isNvidia, 'Should detect NVIDIA GPU'); +}); + +Deno.test('GPU vendors: AMD detection pattern', () => { + const amdPatterns = ['AMD', 'Radeon', 'RX']; + const gpuName = 'AMD Radeon RX 7900 XTX'; + + const isAmd = amdPatterns.some((pattern) => + gpuName.toUpperCase().includes(pattern.toUpperCase()) + ); + assert(isAmd, 'Should detect AMD GPU'); +}); + +Deno.test('GPU vendors: Intel detection pattern', () => { + const intelPatterns = ['Intel', 'Arc', 'Iris', 'UHD']; + const gpuName = 'Intel Arc A770'; + + const isIntel = intelPatterns.some((pattern) => + gpuName.toUpperCase().includes(pattern.toUpperCase()) + ); + assert(isIntel, 'Should detect Intel GPU'); +}); + +// ----------------------------------------------------------------------------- +// VRAM Calculation Tests +// ----------------------------------------------------------------------------- + +Deno.test('VRAM calculation: MB to GB conversion', () => { + const vramMB = 24576; // 24 GB in MB + const vramGB = vramMB / 1024; + assertEquals(vramGB, 24); +}); + +Deno.test('VRAM calculation: model fits in available VRAM', () => { + const availableVramGB = 24; + const modelRequiredVramGB = 8; + const overhead = 2; // GB for system overhead + + const fits = (modelRequiredVramGB + overhead) <= availableVramGB; + assert(fits, 'Model should fit in available VRAM'); +}); + +Deno.test('VRAM calculation: multiple models VRAM sum', () => { + const models = [ + { name: 'llama3:8b', vram: 8 }, + { name: 'mistral:7b', vram: 8 }, + ]; + + const totalVram = models.reduce((sum, m) => sum + m.vram, 0); + assertEquals(totalVram, 16); +}); diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts new file mode 100644 index 0000000..3a2f810 --- /dev/null +++ b/ts/00_commitinfo_data.ts @@ -0,0 +1,8 @@ +/** + * autocreated commitinfo by @push.rocks/commitinfo + */ +export const commitinfo = { + name: '@modelgrid.com/modelgrid', + version: '1.0.0', + description: 'GPU infrastructure management daemon with OpenAI-compatible API for AI model containers' +} diff --git a/ts/api/handlers/chat.ts b/ts/api/handlers/chat.ts new file mode 100644 index 0000000..f01f058 --- /dev/null +++ b/ts/api/handlers/chat.ts @@ -0,0 +1,150 @@ +/** + * Chat Completions Handler + * + * Handles /v1/chat/completions and /v1/completions endpoints. + */ + +import * as http from 'node:http'; +import type { + IChatCompletionRequest, + IChatCompletionResponse, + IApiError, +} from '../../interfaces/api.ts'; +import { logger } from '../../logger.ts'; +import { ContainerManager } from '../../containers/container-manager.ts'; +import { ModelLoader } from '../../models/loader.ts'; + +/** + * Handler for chat completion requests + */ +export class ChatHandler { + private containerManager: ContainerManager; + private modelLoader: ModelLoader; + + constructor(containerManager: ContainerManager, modelLoader: ModelLoader) { + this.containerManager = containerManager; + this.modelLoader = modelLoader; + } + + /** + * Handle POST /v1/chat/completions + */ + public async handleChatCompletion( + req: http.IncomingMessage, + res: http.ServerResponse, + body: IChatCompletionRequest, + ): Promise { + const modelName = body.model; + const isStream = body.stream === true; + + logger.dim(`Chat completion request for model: ${modelName}`); + + try { + // Find or load the model + const container = await this.findOrLoadModel(modelName); + if (!container) { + this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found'); + return; + } + + // Route to streaming or non-streaming handler + if (isStream) { + await this.handleStreamingCompletion(res, container, body); + } else { + await this.handleNonStreamingCompletion(res, container, body); + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger.error(`Chat completion error: ${message}`); + this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error'); + } + } + + /** + * Find container with model or attempt to load it + */ + private async findOrLoadModel( + modelName: string, + ): Promise { + // First, check if model is already loaded + const container = await this.containerManager.findContainerForModel(modelName); + if (container) { + return container; + } + + // Try to load the model + logger.info(`Model ${modelName} not loaded, attempting to load...`); + const loadResult = await this.modelLoader.loadModel(modelName); + + if (!loadResult.success) { + logger.error(`Failed to load model: ${loadResult.error}`); + return null; + } + + // Find the container again after loading + return this.containerManager.findContainerForModel(modelName); + } + + /** + * Handle non-streaming chat completion + */ + private async handleNonStreamingCompletion( + res: http.ServerResponse, + container: import('../../containers/base-container.ts').BaseContainer, + body: IChatCompletionRequest, + ): Promise { + const response = await container.chatCompletion(body); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(response)); + } + + /** + * Handle streaming chat completion + */ + private async handleStreamingCompletion( + res: http.ServerResponse, + container: import('../../containers/base-container.ts').BaseContainer, + body: IChatCompletionRequest, + ): Promise { + // Set SSE headers + res.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'X-Accel-Buffering': 'no', + }); + + // Stream chunks to client + await container.chatCompletionStream(body, (chunk) => { + res.write(`data: ${chunk}\n\n`); + }); + + // Send final done message + res.write('data: [DONE]\n\n'); + res.end(); + } + + /** + * Send error response + */ + private sendError( + res: http.ServerResponse, + statusCode: number, + message: string, + type: string, + param?: string, + ): void { + const error: IApiError = { + error: { + message, + type, + param, + code: null, + }, + }; + + res.writeHead(statusCode, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(error)); + } +} diff --git a/ts/api/handlers/embeddings.ts b/ts/api/handlers/embeddings.ts new file mode 100644 index 0000000..6a4647d --- /dev/null +++ b/ts/api/handlers/embeddings.ts @@ -0,0 +1,235 @@ +/** + * Embeddings Handler + * + * Handles /v1/embeddings endpoint. + */ + +import * as http from 'node:http'; +import type { + IEmbeddingsRequest, + IEmbeddingsResponse, + IEmbeddingData, + IApiError, +} from '../../interfaces/api.ts'; +import { logger } from '../../logger.ts'; +import { ContainerManager } from '../../containers/container-manager.ts'; + +/** + * Handler for embeddings requests + */ +export class EmbeddingsHandler { + private containerManager: ContainerManager; + + constructor(containerManager: ContainerManager) { + this.containerManager = containerManager; + } + + /** + * Handle POST /v1/embeddings + */ + public async handleEmbeddings( + res: http.ServerResponse, + body: IEmbeddingsRequest, + ): Promise { + const modelName = body.model; + + logger.dim(`Embeddings request for model: ${modelName}`); + + try { + // Find container with the embedding model + const container = await this.containerManager.findContainerForModel(modelName); + if (!container) { + this.sendError(res, 404, `Embedding model "${modelName}" not found`, 'model_not_found'); + return; + } + + // Generate embeddings + const response = await this.generateEmbeddings(container, body); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(response)); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger.error(`Embeddings error: ${message}`); + this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error'); + } + } + + /** + * Generate embeddings from container + */ + private async generateEmbeddings( + container: import('../../containers/base-container.ts').BaseContainer, + request: IEmbeddingsRequest, + ): Promise { + const inputs = Array.isArray(request.input) ? request.input : [request.input]; + const embeddings: IEmbeddingData[] = []; + let totalTokens = 0; + + // Generate embeddings for each input + for (let i = 0; i < inputs.length; i++) { + const input = inputs[i]; + const embedding = await this.getEmbeddingFromContainer(container, request.model, input); + + embeddings.push({ + object: 'embedding', + embedding: embedding.vector, + index: i, + }); + + totalTokens += embedding.tokenCount; + } + + return { + object: 'list', + data: embeddings, + model: request.model, + usage: { + prompt_tokens: totalTokens, + total_tokens: totalTokens, + }, + }; + } + + /** + * Get embedding from container (container-specific implementation) + */ + private async getEmbeddingFromContainer( + container: import('../../containers/base-container.ts').BaseContainer, + model: string, + input: string, + ): Promise<{ vector: number[]; tokenCount: number }> { + const endpoint = container.getEndpoint(); + const containerType = container.type; + + // Route to container-specific embedding endpoint + if (containerType === 'ollama') { + return this.getOllamaEmbedding(endpoint, model, input); + } else if (containerType === 'vllm') { + return this.getVllmEmbedding(endpoint, model, input); + } else if (containerType === 'tgi') { + return this.getTgiEmbedding(endpoint, model, input); + } + + throw new Error(`Container type ${containerType} does not support embeddings`); + } + + /** + * Get embedding from Ollama + */ + private async getOllamaEmbedding( + endpoint: string, + model: string, + input: string, + ): Promise<{ vector: number[]; tokenCount: number }> { + const response = await fetch(`${endpoint}/api/embeddings`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model, + prompt: input, + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`Ollama embedding error: ${errorText}`); + } + + const result = await response.json() as { embedding: number[] }; + + // Estimate token count (rough approximation: ~4 chars per token) + const tokenCount = Math.ceil(input.length / 4); + + return { + vector: result.embedding, + tokenCount, + }; + } + + /** + * Get embedding from vLLM (OpenAI-compatible) + */ + private async getVllmEmbedding( + endpoint: string, + model: string, + input: string, + ): Promise<{ vector: number[]; tokenCount: number }> { + const response = await fetch(`${endpoint}/v1/embeddings`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model, + input, + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`vLLM embedding error: ${errorText}`); + } + + const result = await response.json() as IEmbeddingsResponse; + + return { + vector: result.data[0].embedding, + tokenCount: result.usage.total_tokens, + }; + } + + /** + * Get embedding from TGI + */ + private async getTgiEmbedding( + endpoint: string, + _model: string, + input: string, + ): Promise<{ vector: number[]; tokenCount: number }> { + // TGI uses /embed endpoint + const response = await fetch(`${endpoint}/embed`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + inputs: input, + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`TGI embedding error: ${errorText}`); + } + + const result = await response.json() as number[][]; + + // Estimate token count + const tokenCount = Math.ceil(input.length / 4); + + return { + vector: result[0], + tokenCount, + }; + } + + /** + * Send error response + */ + private sendError( + res: http.ServerResponse, + statusCode: number, + message: string, + type: string, + param?: string, + ): void { + const error: IApiError = { + error: { + message, + type, + param, + code: null, + }, + }; + + res.writeHead(statusCode, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(error)); + } +} diff --git a/ts/api/handlers/index.ts b/ts/api/handlers/index.ts new file mode 100644 index 0000000..751c949 --- /dev/null +++ b/ts/api/handlers/index.ts @@ -0,0 +1,9 @@ +/** + * API Handlers + * + * OpenAI-compatible request handlers. + */ + +export { ChatHandler } from './chat.ts'; +export { ModelsHandler } from './models.ts'; +export { EmbeddingsHandler } from './embeddings.ts'; diff --git a/ts/api/handlers/models.ts b/ts/api/handlers/models.ts new file mode 100644 index 0000000..01ab518 --- /dev/null +++ b/ts/api/handlers/models.ts @@ -0,0 +1,136 @@ +/** + * Models Handler + * + * Handles /v1/models endpoints. + */ + +import * as http from 'node:http'; +import type { + IModelInfo, + IListModelsResponse, + IApiError, +} from '../../interfaces/api.ts'; +import { logger } from '../../logger.ts'; +import { ContainerManager } from '../../containers/container-manager.ts'; +import { ModelRegistry } from '../../models/registry.ts'; + +/** + * Handler for model-related requests + */ +export class ModelsHandler { + private containerManager: ContainerManager; + private modelRegistry: ModelRegistry; + + constructor(containerManager: ContainerManager, modelRegistry: ModelRegistry) { + this.containerManager = containerManager; + this.modelRegistry = modelRegistry; + } + + /** + * Handle GET /v1/models + */ + public async handleListModels(res: http.ServerResponse): Promise { + try { + const models = await this.getAvailableModels(); + + const response: IListModelsResponse = { + object: 'list', + data: models, + }; + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(response)); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger.error(`Failed to list models: ${message}`); + this.sendError(res, 500, `Failed to list models: ${message}`, 'server_error'); + } + } + + /** + * Handle GET /v1/models/:model + */ + public async handleGetModel(res: http.ServerResponse, modelId: string): Promise { + try { + const models = await this.getAvailableModels(); + const model = models.find((m) => m.id === modelId); + + if (!model) { + this.sendError(res, 404, `Model "${modelId}" not found`, 'model_not_found'); + return; + } + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(model)); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger.error(`Failed to get model info: ${message}`); + this.sendError(res, 500, `Failed to get model info: ${message}`, 'server_error'); + } + } + + /** + * Get all available models from containers and greenlist + */ + private async getAvailableModels(): Promise { + const models: IModelInfo[] = []; + const seen = new Set(); + const timestamp = Math.floor(Date.now() / 1000); + + // Get models from running containers + const containerModels = await this.containerManager.getAllAvailableModels(); + for (const [modelId, modelInfo] of containerModels) { + if (!seen.has(modelId)) { + seen.add(modelId); + models.push({ + id: modelId, + object: 'model', + created: timestamp, + owned_by: `modelgrid-${modelInfo.container}`, + }); + } + } + + // Add greenlit models that aren't loaded yet + const greenlitModels = await this.modelRegistry.getAllGreenlitModels(); + for (const greenlit of greenlitModels) { + if (!seen.has(greenlit.name)) { + seen.add(greenlit.name); + models.push({ + id: greenlit.name, + object: 'model', + created: timestamp, + owned_by: `modelgrid-${greenlit.container}`, + }); + } + } + + // Sort alphabetically + models.sort((a, b) => a.id.localeCompare(b.id)); + + return models; + } + + /** + * Send error response + */ + private sendError( + res: http.ServerResponse, + statusCode: number, + message: string, + type: string, + param?: string, + ): void { + const error: IApiError = { + error: { + message, + type, + param, + code: null, + }, + }; + + res.writeHead(statusCode, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(error)); + } +} diff --git a/ts/api/index.ts b/ts/api/index.ts new file mode 100644 index 0000000..3251876 --- /dev/null +++ b/ts/api/index.ts @@ -0,0 +1,10 @@ +/** + * API Gateway Module + * + * OpenAI-compatible API server for ModelGrid. + */ + +export { ApiServer } from './server.ts'; +export { ApiRouter } from './router.ts'; +export * from './handlers/index.ts'; +export * from './middleware/index.ts'; diff --git a/ts/api/middleware/auth.ts b/ts/api/middleware/auth.ts new file mode 100644 index 0000000..d0055bd --- /dev/null +++ b/ts/api/middleware/auth.ts @@ -0,0 +1,105 @@ +/** + * Authentication Middleware + * + * Validates API keys for incoming requests. + */ + +import * as http from 'node:http'; +import { logger } from '../../logger.ts'; + +/** + * Authentication middleware for API key validation + */ +export class AuthMiddleware { + private apiKeys: Set; + private allowNoAuth: boolean; + + constructor(apiKeys: string[], allowNoAuth: boolean = false) { + this.apiKeys = new Set(apiKeys); + this.allowNoAuth = allowNoAuth; + + if (this.apiKeys.size === 0 && !allowNoAuth) { + logger.warn('No API keys configured - authentication will fail for all requests'); + } + } + + /** + * Authenticate a request + */ + public authenticate(req: http.IncomingMessage): boolean { + // If no keys configured and allowNoAuth is true, allow all requests + if (this.apiKeys.size === 0 && this.allowNoAuth) { + return true; + } + + const authHeader = req.headers.authorization; + + if (!authHeader) { + logger.dim('Request rejected: No Authorization header'); + return false; + } + + // Extract Bearer token + const match = authHeader.match(/^Bearer\s+(.+)$/i); + if (!match) { + logger.dim('Request rejected: Invalid Authorization header format'); + return false; + } + + const apiKey = match[1]; + + // Check if key is valid + if (!this.apiKeys.has(apiKey)) { + logger.dim('Request rejected: Invalid API key'); + return false; + } + + return true; + } + + /** + * Get API key from request (if authenticated) + */ + public getApiKey(req: http.IncomingMessage): string | null { + const authHeader = req.headers.authorization; + if (!authHeader) { + return null; + } + + const match = authHeader.match(/^Bearer\s+(.+)$/i); + return match ? match[1] : null; + } + + /** + * Add an API key + */ + public addApiKey(key: string): void { + this.apiKeys.add(key); + logger.info('API key added'); + } + + /** + * Remove an API key + */ + public removeApiKey(key: string): boolean { + const removed = this.apiKeys.delete(key); + if (removed) { + logger.info('API key removed'); + } + return removed; + } + + /** + * Get count of configured API keys + */ + public getKeyCount(): number { + return this.apiKeys.size; + } + + /** + * Check if authentication is required + */ + public isAuthRequired(): boolean { + return !this.allowNoAuth || this.apiKeys.size > 0; + } +} diff --git a/ts/api/middleware/index.ts b/ts/api/middleware/index.ts new file mode 100644 index 0000000..0f33e40 --- /dev/null +++ b/ts/api/middleware/index.ts @@ -0,0 +1,7 @@ +/** + * API Middleware + */ + +export { AuthMiddleware } from './auth.ts'; +export { SanityMiddleware } from './sanity.ts'; +export type { IValidationResult } from './sanity.ts'; diff --git a/ts/api/middleware/sanity.ts b/ts/api/middleware/sanity.ts new file mode 100644 index 0000000..b56f213 --- /dev/null +++ b/ts/api/middleware/sanity.ts @@ -0,0 +1,254 @@ +/** + * Sanity Middleware + * + * Validates request structure and parameters. + */ + +import type { IChatCompletionRequest, IEmbeddingsRequest } from '../../interfaces/api.ts'; +import { ModelRegistry } from '../../models/registry.ts'; + +/** + * Validation result + */ +export interface IValidationResult { + valid: boolean; + error?: string; + param?: string; +} + +/** + * Request validation middleware + */ +export class SanityMiddleware { + private modelRegistry: ModelRegistry; + + constructor(modelRegistry: ModelRegistry) { + this.modelRegistry = modelRegistry; + } + + /** + * Validate chat completion request + */ + public validateChatRequest(body: unknown): IValidationResult { + if (!body || typeof body !== 'object') { + return { valid: false, error: 'Request body must be a JSON object' }; + } + + const request = body as Record; + + // Validate model + if (!request.model || typeof request.model !== 'string') { + return { valid: false, error: 'Missing or invalid "model" field', param: 'model' }; + } + + // Validate messages + if (!Array.isArray(request.messages)) { + return { valid: false, error: 'Missing or invalid "messages" field', param: 'messages' }; + } + + if (request.messages.length === 0) { + return { valid: false, error: '"messages" array cannot be empty', param: 'messages' }; + } + + // Validate each message + for (let i = 0; i < request.messages.length; i++) { + const msg = request.messages[i] as Record; + const msgValidation = this.validateMessage(msg, i); + if (!msgValidation.valid) { + return msgValidation; + } + } + + // Validate optional parameters + if (request.temperature !== undefined) { + const temp = request.temperature as number; + if (typeof temp !== 'number' || temp < 0 || temp > 2) { + return { valid: false, error: '"temperature" must be between 0 and 2', param: 'temperature' }; + } + } + + if (request.top_p !== undefined) { + const topP = request.top_p as number; + if (typeof topP !== 'number' || topP < 0 || topP > 1) { + return { valid: false, error: '"top_p" must be between 0 and 1', param: 'top_p' }; + } + } + + if (request.max_tokens !== undefined) { + const maxTokens = request.max_tokens as number; + if (typeof maxTokens !== 'number' || maxTokens < 1) { + return { valid: false, error: '"max_tokens" must be a positive integer', param: 'max_tokens' }; + } + } + + if (request.n !== undefined) { + const n = request.n as number; + if (typeof n !== 'number' || n < 1 || n > 10) { + return { valid: false, error: '"n" must be between 1 and 10', param: 'n' }; + } + } + + if (request.stream !== undefined && typeof request.stream !== 'boolean') { + return { valid: false, error: '"stream" must be a boolean', param: 'stream' }; + } + + if (request.presence_penalty !== undefined) { + const pp = request.presence_penalty as number; + if (typeof pp !== 'number' || pp < -2 || pp > 2) { + return { valid: false, error: '"presence_penalty" must be between -2 and 2', param: 'presence_penalty' }; + } + } + + if (request.frequency_penalty !== undefined) { + const fp = request.frequency_penalty as number; + if (typeof fp !== 'number' || fp < -2 || fp > 2) { + return { valid: false, error: '"frequency_penalty" must be between -2 and 2', param: 'frequency_penalty' }; + } + } + + return { valid: true }; + } + + /** + * Validate a single message in the chat request + */ + private validateMessage(msg: Record, index: number): IValidationResult { + if (!msg || typeof msg !== 'object') { + return { valid: false, error: `Message at index ${index} must be an object`, param: `messages[${index}]` }; + } + + // Validate role + const validRoles = ['system', 'user', 'assistant', 'tool']; + if (!msg.role || !validRoles.includes(msg.role as string)) { + return { + valid: false, + error: `Invalid role at index ${index}. Must be one of: ${validRoles.join(', ')}`, + param: `messages[${index}].role`, + }; + } + + // Validate content (can be null for assistant with tool_calls) + if (msg.role === 'assistant' && msg.tool_calls) { + // Content can be null/undefined when tool_calls present + } else if (msg.content === undefined || msg.content === null) { + return { + valid: false, + error: `Missing content at index ${index}`, + param: `messages[${index}].content`, + }; + } else if (typeof msg.content !== 'string') { + return { + valid: false, + error: `Content at index ${index} must be a string`, + param: `messages[${index}].content`, + }; + } + + // Validate tool response message + if (msg.role === 'tool' && !msg.tool_call_id) { + return { + valid: false, + error: `Tool message at index ${index} requires tool_call_id`, + param: `messages[${index}].tool_call_id`, + }; + } + + return { valid: true }; + } + + /** + * Validate embeddings request + */ + public validateEmbeddingsRequest(body: unknown): IValidationResult { + if (!body || typeof body !== 'object') { + return { valid: false, error: 'Request body must be a JSON object' }; + } + + const request = body as Record; + + // Validate model + if (!request.model || typeof request.model !== 'string') { + return { valid: false, error: 'Missing or invalid "model" field', param: 'model' }; + } + + // Validate input + if (request.input === undefined || request.input === null) { + return { valid: false, error: 'Missing "input" field', param: 'input' }; + } + + const input = request.input; + if (typeof input !== 'string' && !Array.isArray(input)) { + return { valid: false, error: '"input" must be a string or array of strings', param: 'input' }; + } + + if (Array.isArray(input)) { + for (let i = 0; i < input.length; i++) { + if (typeof input[i] !== 'string') { + return { valid: false, error: `"input[${i}]" must be a string`, param: `input[${i}]` }; + } + } + + if (input.length === 0) { + return { valid: false, error: '"input" array cannot be empty', param: 'input' }; + } + } + + // Validate encoding_format + if (request.encoding_format !== undefined) { + const format = request.encoding_format as string; + if (format !== 'float' && format !== 'base64') { + return { valid: false, error: '"encoding_format" must be "float" or "base64"', param: 'encoding_format' }; + } + } + + return { valid: true }; + } + + /** + * Check if model is in greenlist (async validation) + */ + public async validateModelGreenlist(modelName: string): Promise { + const isGreenlit = await this.modelRegistry.isModelGreenlit(modelName); + if (!isGreenlit) { + return { + valid: false, + error: `Model "${modelName}" is not greenlit. Contact administrator to add it to the greenlist.`, + param: 'model', + }; + } + return { valid: true }; + } + + /** + * Sanitize request body by removing unknown fields + */ + public sanitizeChatRequest(body: Record): IChatCompletionRequest { + return { + model: body.model as string, + messages: body.messages as IChatCompletionRequest['messages'], + max_tokens: body.max_tokens as number | undefined, + temperature: body.temperature as number | undefined, + top_p: body.top_p as number | undefined, + n: body.n as number | undefined, + stream: body.stream as boolean | undefined, + stop: body.stop as string | string[] | undefined, + presence_penalty: body.presence_penalty as number | undefined, + frequency_penalty: body.frequency_penalty as number | undefined, + user: body.user as string | undefined, + tools: body.tools as IChatCompletionRequest['tools'], + tool_choice: body.tool_choice as IChatCompletionRequest['tool_choice'], + }; + } + + /** + * Sanitize embeddings request + */ + public sanitizeEmbeddingsRequest(body: Record): IEmbeddingsRequest { + return { + model: body.model as string, + input: body.input as string | string[], + user: body.user as string | undefined, + encoding_format: body.encoding_format as 'float' | 'base64' | undefined, + }; + } +} diff --git a/ts/api/router.ts b/ts/api/router.ts new file mode 100644 index 0000000..708923f --- /dev/null +++ b/ts/api/router.ts @@ -0,0 +1,300 @@ +/** + * API Router + * + * Routes incoming requests to appropriate handlers. + */ + +import * as http from 'node:http'; +import type { IApiError } from '../interfaces/api.ts'; +import { logger } from '../logger.ts'; +import { ContainerManager } from '../containers/container-manager.ts'; +import { ModelRegistry } from '../models/registry.ts'; +import { ModelLoader } from '../models/loader.ts'; +import { ChatHandler } from './handlers/chat.ts'; +import { ModelsHandler } from './handlers/models.ts'; +import { EmbeddingsHandler } from './handlers/embeddings.ts'; +import { AuthMiddleware } from './middleware/auth.ts'; +import { SanityMiddleware } from './middleware/sanity.ts'; + +/** + * API Router - routes requests to handlers + */ +export class ApiRouter { + private containerManager: ContainerManager; + private modelRegistry: ModelRegistry; + private modelLoader: ModelLoader; + private chatHandler: ChatHandler; + private modelsHandler: ModelsHandler; + private embeddingsHandler: EmbeddingsHandler; + private authMiddleware: AuthMiddleware; + private sanityMiddleware: SanityMiddleware; + + constructor( + containerManager: ContainerManager, + modelRegistry: ModelRegistry, + modelLoader: ModelLoader, + apiKeys: string[], + ) { + this.containerManager = containerManager; + this.modelRegistry = modelRegistry; + this.modelLoader = modelLoader; + + // Initialize handlers + this.chatHandler = new ChatHandler(containerManager, modelLoader); + this.modelsHandler = new ModelsHandler(containerManager, modelRegistry); + this.embeddingsHandler = new EmbeddingsHandler(containerManager); + + // Initialize middleware + this.authMiddleware = new AuthMiddleware(apiKeys); + this.sanityMiddleware = new SanityMiddleware(modelRegistry); + } + + /** + * Route a request to the appropriate handler + */ + public async route( + req: http.IncomingMessage, + res: http.ServerResponse, + path: string, + ): Promise { + // OpenAI API endpoints + if (path === '/v1/chat/completions') { + await this.handleChatCompletions(req, res); + return; + } + + if (path === '/v1/completions') { + await this.handleCompletions(req, res); + return; + } + + if (path === '/v1/models' || path === '/v1/models/') { + await this.handleModels(req, res); + return; + } + + if (path.startsWith('/v1/models/')) { + await this.handleModelInfo(req, res, path); + return; + } + + if (path === '/v1/embeddings') { + await this.handleEmbeddings(req, res); + return; + } + + // Not found + this.sendError(res, 404, `Endpoint not found: ${path}`, 'invalid_request_error'); + } + + /** + * Handle POST /v1/chat/completions + */ + private async handleChatCompletions( + req: http.IncomingMessage, + res: http.ServerResponse, + ): Promise { + if (req.method !== 'POST') { + this.sendError(res, 405, 'Method not allowed', 'invalid_request_error'); + return; + } + + // Authenticate + if (!this.authMiddleware.authenticate(req)) { + this.sendError(res, 401, 'Invalid API key', 'authentication_error'); + return; + } + + // Parse body + const body = await this.parseRequestBody(req); + if (!body) { + this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error'); + return; + } + + // Validate request + const validation = this.sanityMiddleware.validateChatRequest(body); + if (!validation.valid) { + this.sendError(res, 400, validation.error || 'Invalid request', 'invalid_request_error'); + return; + } + + // Handle request + await this.chatHandler.handleChatCompletion(req, res, body); + } + + /** + * Handle POST /v1/completions (legacy endpoint) + */ + private async handleCompletions( + req: http.IncomingMessage, + res: http.ServerResponse, + ): Promise { + if (req.method !== 'POST') { + this.sendError(res, 405, 'Method not allowed', 'invalid_request_error'); + return; + } + + // Authenticate + if (!this.authMiddleware.authenticate(req)) { + this.sendError(res, 401, 'Invalid API key', 'authentication_error'); + return; + } + + // Parse body + const body = await this.parseRequestBody(req); + if (!body) { + this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error'); + return; + } + + // Convert to chat format and handle + const chatBody = this.convertCompletionToChat(body); + await this.chatHandler.handleChatCompletion(req, res, chatBody); + } + + /** + * Handle GET /v1/models + */ + private async handleModels( + req: http.IncomingMessage, + res: http.ServerResponse, + ): Promise { + if (req.method !== 'GET') { + this.sendError(res, 405, 'Method not allowed', 'invalid_request_error'); + return; + } + + // Authenticate + if (!this.authMiddleware.authenticate(req)) { + this.sendError(res, 401, 'Invalid API key', 'authentication_error'); + return; + } + + await this.modelsHandler.handleListModels(res); + } + + /** + * Handle GET /v1/models/:model + */ + private async handleModelInfo( + req: http.IncomingMessage, + res: http.ServerResponse, + path: string, + ): Promise { + if (req.method !== 'GET') { + this.sendError(res, 405, 'Method not allowed', 'invalid_request_error'); + return; + } + + // Authenticate + if (!this.authMiddleware.authenticate(req)) { + this.sendError(res, 401, 'Invalid API key', 'authentication_error'); + return; + } + + const modelId = path.replace('/v1/models/', ''); + await this.modelsHandler.handleGetModel(res, modelId); + } + + /** + * Handle POST /v1/embeddings + */ + private async handleEmbeddings( + req: http.IncomingMessage, + res: http.ServerResponse, + ): Promise { + if (req.method !== 'POST') { + this.sendError(res, 405, 'Method not allowed', 'invalid_request_error'); + return; + } + + // Authenticate + if (!this.authMiddleware.authenticate(req)) { + this.sendError(res, 401, 'Invalid API key', 'authentication_error'); + return; + } + + // Parse body + const body = await this.parseRequestBody(req); + if (!body) { + this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error'); + return; + } + + await this.embeddingsHandler.handleEmbeddings(res, body); + } + + /** + * Parse request body + */ + private async parseRequestBody(req: http.IncomingMessage): Promise { + return new Promise((resolve) => { + let body = ''; + + req.on('data', (chunk) => { + body += chunk.toString(); + // Limit body size + if (body.length > 10 * 1024 * 1024) { + resolve(null); + } + }); + + req.on('end', () => { + try { + resolve(JSON.parse(body)); + } catch { + resolve(null); + } + }); + + req.on('error', () => { + resolve(null); + }); + }); + } + + /** + * Convert legacy completion request to chat format + */ + private convertCompletionToChat(body: Record): Record { + const prompt = body.prompt as string | string[]; + const promptText = Array.isArray(prompt) ? prompt.join('\n') : prompt; + + return { + model: body.model, + messages: [ + { role: 'user', content: promptText }, + ], + max_tokens: body.max_tokens, + temperature: body.temperature, + top_p: body.top_p, + n: body.n, + stream: body.stream, + stop: body.stop, + }; + } + + /** + * Send error response + */ + private sendError( + res: http.ServerResponse, + statusCode: number, + message: string, + type: string, + param?: string, + ): void { + const error: IApiError = { + error: { + message, + type, + param, + code: null, + }, + }; + + res.writeHead(statusCode, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(error)); + } +} diff --git a/ts/api/server.ts b/ts/api/server.ts new file mode 100644 index 0000000..0b380f2 --- /dev/null +++ b/ts/api/server.ts @@ -0,0 +1,300 @@ +/** + * API Server + * + * HTTP server for the OpenAI-compatible API gateway. + */ + +import * as http from 'node:http'; +import type { IApiConfig } from '../interfaces/config.ts'; +import type { IHealthResponse } from '../interfaces/api.ts'; +import { logger } from '../logger.ts'; +import { API_SERVER } from '../constants.ts'; +import { ApiRouter } from './router.ts'; +import { ContainerManager } from '../containers/container-manager.ts'; +import { ModelRegistry } from '../models/registry.ts'; +import { ModelLoader } from '../models/loader.ts'; +import { GpuDetector } from '../hardware/gpu-detector.ts'; + +/** + * API Server for ModelGrid + */ +export class ApiServer { + private server?: http.Server; + private config: IApiConfig; + private router: ApiRouter; + private containerManager: ContainerManager; + private modelRegistry: ModelRegistry; + private modelLoader: ModelLoader; + private gpuDetector: GpuDetector; + private startTime: number = 0; + + constructor( + config: IApiConfig, + containerManager: ContainerManager, + modelRegistry: ModelRegistry, + ) { + this.config = config; + this.containerManager = containerManager; + this.modelRegistry = modelRegistry; + this.gpuDetector = new GpuDetector(); + this.modelLoader = new ModelLoader(modelRegistry, containerManager, true); + this.router = new ApiRouter( + containerManager, + modelRegistry, + this.modelLoader, + config.apiKeys, + ); + } + + /** + * Start the API server + */ + public async start(): Promise { + if (this.server) { + logger.warn('API server is already running'); + return; + } + + this.startTime = Date.now(); + + this.server = http.createServer(async (req, res) => { + await this.handleRequest(req, res); + }); + + return new Promise((resolve, reject) => { + this.server!.listen(this.config.port, this.config.host, () => { + logger.success(`API server started on ${this.config.host}:${this.config.port}`); + logger.info('OpenAI-compatible API available at:'); + logger.info(` POST /v1/chat/completions`); + logger.info(` GET /v1/models`); + logger.info(` POST /v1/embeddings`); + resolve(); + }); + + this.server!.on('error', (error) => { + logger.error(`API server error: ${error.message}`); + reject(error); + }); + }); + } + + /** + * Stop the API server + */ + public async stop(): Promise { + if (!this.server) { + return; + } + + return new Promise((resolve) => { + this.server!.close(() => { + logger.log('API server stopped'); + this.server = undefined; + resolve(); + }); + }); + } + + /** + * Handle incoming HTTP request + */ + private async handleRequest( + req: http.IncomingMessage, + res: http.ServerResponse, + ): Promise { + const startTime = Date.now(); + + // Set CORS headers if enabled + if (this.config.cors) { + this.setCorsHeaders(req, res); + } + + // Handle preflight requests + if (req.method === 'OPTIONS') { + res.writeHead(204); + res.end(); + return; + } + + // Parse URL + const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`); + const path = url.pathname; + + // Health check endpoint (no auth required) + if (path === '/health' || path === '/healthz') { + await this.handleHealthCheck(res); + return; + } + + // Metrics endpoint (no auth required) + if (path === '/metrics') { + await this.handleMetrics(res); + return; + } + + // Route request + try { + await this.router.route(req, res, path); + } catch (error) { + logger.error(`Request error: ${error instanceof Error ? error.message : String(error)}`); + this.sendError(res, 500, 'Internal server error', 'internal_error'); + } + + // Log request + const duration = Date.now() - startTime; + logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`); + } + + /** + * Set CORS headers + */ + private setCorsHeaders( + req: http.IncomingMessage, + res: http.ServerResponse, + ): void { + const origin = req.headers.origin || '*'; + const allowedOrigins = this.config.corsOrigins || ['*']; + + if (allowedOrigins.includes('*') || allowedOrigins.includes(origin)) { + res.setHeader('Access-Control-Allow-Origin', origin); + } + + res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS'); + res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization'); + res.setHeader('Access-Control-Max-Age', '86400'); + } + + /** + * Handle health check + */ + private async handleHealthCheck(res: http.ServerResponse): Promise { + try { + const statuses = await this.containerManager.getAllStatus(); + const gpus = await this.gpuDetector.detectGpus(); + const models = await this.containerManager.getAllAvailableModels(); + + let status: 'ok' | 'degraded' | 'error' = 'ok'; + const containerHealth: Record = {}; + const gpuStatus: Record = {}; + + // Check container health + for (const [id, containerStatus] of statuses) { + if (containerStatus.running && containerStatus.health === 'healthy') { + containerHealth[id] = 'healthy'; + } else { + containerHealth[id] = 'unhealthy'; + status = 'degraded'; + } + } + + // Check GPU status + for (const gpu of gpus) { + gpuStatus[gpu.id] = 'available'; + } + + const response: IHealthResponse = { + status, + version: '1.0.0', // TODO: Get from config + uptime: Math.floor((Date.now() - this.startTime) / 1000), + containers: statuses.size, + models: models.size, + gpus: gpus.length, + details: { + containers: containerHealth, + gpus: gpuStatus, + }, + }; + + res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(response, null, 2)); + } catch (error) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + status: 'error', + error: error instanceof Error ? error.message : String(error), + })); + } + } + + /** + * Handle metrics endpoint (Prometheus format) + */ + private async handleMetrics(res: http.ServerResponse): Promise { + try { + const metrics: string[] = []; + const timestamp = Date.now(); + + // Server uptime + const uptime = Math.floor((timestamp - this.startTime) / 1000); + metrics.push(`# HELP modelgrid_uptime_seconds Server uptime in seconds`); + metrics.push(`# TYPE modelgrid_uptime_seconds gauge`); + metrics.push(`modelgrid_uptime_seconds ${uptime}`); + + // Container count + const statuses = await this.containerManager.getAllStatus(); + metrics.push(`# HELP modelgrid_containers_total Total number of containers`); + metrics.push(`# TYPE modelgrid_containers_total gauge`); + metrics.push(`modelgrid_containers_total ${statuses.size}`); + + // Running containers + const running = Array.from(statuses.values()).filter((s) => s.running).length; + metrics.push(`# HELP modelgrid_containers_running Number of running containers`); + metrics.push(`# TYPE modelgrid_containers_running gauge`); + metrics.push(`modelgrid_containers_running ${running}`); + + // Available models + const models = await this.containerManager.getAllAvailableModels(); + metrics.push(`# HELP modelgrid_models_available Number of available models`); + metrics.push(`# TYPE modelgrid_models_available gauge`); + metrics.push(`modelgrid_models_available ${models.size}`); + + // GPU count + const gpus = await this.gpuDetector.detectGpus(); + metrics.push(`# HELP modelgrid_gpus_total Total number of GPUs`); + metrics.push(`# TYPE modelgrid_gpus_total gauge`); + metrics.push(`modelgrid_gpus_total ${gpus.length}`); + + res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' }); + res.end(metrics.join('\n') + '\n'); + } catch (error) { + res.writeHead(500, { 'Content-Type': 'text/plain' }); + res.end(`# Error: ${error instanceof Error ? error.message : String(error)}\n`); + } + } + + /** + * Send error response + */ + private sendError( + res: http.ServerResponse, + statusCode: number, + message: string, + type: string, + ): void { + res.writeHead(statusCode, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + error: { + message, + type, + code: null, + }, + })); + } + + /** + * Get server info + */ + public getInfo(): { + running: boolean; + host: string; + port: number; + uptime: number; + } { + return { + running: !!this.server, + host: this.config.host, + port: this.config.port, + uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0, + }; + } +} diff --git a/ts/cli.ts b/ts/cli.ts new file mode 100644 index 0000000..79d8601 --- /dev/null +++ b/ts/cli.ts @@ -0,0 +1,423 @@ +/** + * ModelGrid CLI + * + * Command line interface for ModelGrid. + */ + +import { ModelGrid } from './modelgrid.ts'; +import { logger } from './logger.ts'; +import { theme } from './colors.ts'; +import { VERSION } from './constants.ts'; + +/** + * CLI handler for ModelGrid + */ +export class ModelGridCli { + private readonly modelgrid: ModelGrid; + + constructor() { + this.modelgrid = new ModelGrid(); + } + + /** + * Parse command line arguments and execute the appropriate command + */ + public async parseAndExecute(args: string[]): Promise { + const debugOptions = this.extractDebugOptions(args); + + // Check for version flag + if (debugOptions.cleanedArgs.includes('--version') || debugOptions.cleanedArgs.includes('-v')) { + this.showVersion(); + return; + } + + // Get the command (default to help if none provided) + const command = debugOptions.cleanedArgs[2] || 'help'; + const commandArgs = debugOptions.cleanedArgs.slice(3); + + await this.executeCommand(command, commandArgs, debugOptions.debugMode); + } + + /** + * Extract debug options from args + */ + private extractDebugOptions(args: string[]): { debugMode: boolean; cleanedArgs: string[] } { + const debugMode = args.includes('--debug') || args.includes('-d'); + const cleanedArgs = args.filter((arg) => arg !== '--debug' && arg !== '-d'); + return { debugMode, cleanedArgs }; + } + + /** + * Execute a command + */ + private async executeCommand( + command: string, + commandArgs: string[], + debugMode: boolean, + ): Promise { + const serviceHandler = this.modelgrid.getServiceHandler(); + const gpuHandler = this.modelgrid.getGpuHandler(); + const containerHandler = this.modelgrid.getContainerHandler(); + const modelHandler = this.modelgrid.getModelHandler(); + const configHandler = this.modelgrid.getConfigHandler(); + + // Service commands + if (command === 'service') { + const subcommand = commandArgs[0] || 'status'; + + switch (subcommand) { + case 'enable': + await serviceHandler.enable(); + break; + case 'disable': + await serviceHandler.disable(); + break; + case 'start': + await serviceHandler.start(); + break; + case 'stop': + await serviceHandler.stop(); + break; + case 'restart': + await serviceHandler.stop(); + await new Promise((resolve) => setTimeout(resolve, 2000)); + await serviceHandler.start(); + break; + case 'status': + await serviceHandler.status(); + break; + case 'logs': + await serviceHandler.logs(); + break; + case 'start-daemon': + await serviceHandler.daemonStart(debugMode); + break; + default: + this.showServiceHelp(); + break; + } + return; + } + + // GPU commands + if (command === 'gpu') { + const subcommand = commandArgs[0] || 'list'; + + switch (subcommand) { + case 'list': + case 'ls': + await gpuHandler.list(); + break; + case 'status': + await gpuHandler.status(); + break; + case 'drivers': + await gpuHandler.drivers(); + break; + case 'install': + await gpuHandler.install(); + break; + default: + this.showGpuHelp(); + break; + } + return; + } + + // Container commands + if (command === 'container') { + const subcommand = commandArgs[0] || 'list'; + const subcommandArgs = commandArgs.slice(1); + + switch (subcommand) { + case 'list': + case 'ls': + await containerHandler.list(); + break; + case 'add': + await containerHandler.add(); + break; + case 'remove': + case 'rm': + await containerHandler.remove(subcommandArgs[0]); + break; + case 'start': + await containerHandler.start(subcommandArgs[0]); + break; + case 'stop': + await containerHandler.stop(subcommandArgs[0]); + break; + case 'logs': + await containerHandler.logs(subcommandArgs[0], parseInt(subcommandArgs[1] || '100', 10)); + break; + default: + this.showContainerHelp(); + break; + } + return; + } + + // Model commands + if (command === 'model') { + const subcommand = commandArgs[0] || 'list'; + const subcommandArgs = commandArgs.slice(1); + + switch (subcommand) { + case 'list': + case 'ls': + await modelHandler.list(); + break; + case 'pull': + await modelHandler.pull(subcommandArgs[0]); + break; + case 'remove': + case 'rm': + await modelHandler.remove(subcommandArgs[0]); + break; + case 'status': + await modelHandler.status(); + break; + case 'refresh': + await modelHandler.refresh(); + break; + default: + this.showModelHelp(); + break; + } + return; + } + + // Config commands + if (command === 'config') { + const subcommand = commandArgs[0] || 'show'; + const subcommandArgs = commandArgs.slice(1); + + switch (subcommand) { + case 'show': + case 'display': + await configHandler.show(); + break; + case 'init': + await configHandler.init(); + break; + case 'apikey': + const keySubcommand = subcommandArgs[0] || 'list'; + switch (keySubcommand) { + case 'add': + await configHandler.addApiKey(subcommandArgs[1]); + break; + case 'remove': + case 'rm': + await configHandler.removeApiKey(subcommandArgs[1]); + break; + case 'list': + case 'ls': + default: + await configHandler.listApiKeys(); + break; + } + break; + default: + this.showConfigHelp(); + break; + } + return; + } + + // Top-level commands + switch (command) { + case 'update': + await serviceHandler.update(); + break; + case 'uninstall': + await serviceHandler.uninstall(); + break; + case 'help': + case '--help': + case '-h': + this.showHelp(); + break; + default: + logger.error(`Unknown command: ${command}`); + logger.log(''); + this.showHelp(); + break; + } + } + + /** + * Display version information + */ + private showVersion(): void { + logger.log(`ModelGrid version ${VERSION}`); + logger.log('GPU Infrastructure & AI Model Management (https://modelgrid.com)'); + } + + /** + * Display help message + */ + private showHelp(): void { + console.log(''); + logger.highlight('ModelGrid - AI Infrastructure Management'); + logger.dim('GPU detection, container orchestration, and OpenAI-compatible API'); + console.log(''); + + logger.log(theme.info('Usage:')); + logger.log(` ${theme.command('modelgrid')} ${theme.dim(' [options]')}`); + console.log(''); + + logger.log(theme.info('Commands:')); + this.printCommand('service ', 'Manage systemd service'); + this.printCommand('gpu ', 'Manage GPU hardware'); + this.printCommand('container ', 'Manage AI containers'); + this.printCommand('model ', 'Manage AI models'); + this.printCommand('config ', 'Manage configuration'); + this.printCommand('update', 'Update ModelGrid', theme.dim('(requires root)')); + this.printCommand('uninstall', 'Remove ModelGrid', theme.dim('(requires root)')); + this.printCommand('help, --help, -h', 'Show this help message'); + this.printCommand('--version, -v', 'Show version information'); + console.log(''); + + logger.log(theme.info('Quick Start:')); + logger.dim(' modelgrid gpu list # Detect GPUs'); + logger.dim(' modelgrid container add # Add an Ollama/vLLM container'); + logger.dim(' modelgrid container start # Start containers'); + logger.dim(' modelgrid model pull llama3 # Pull a model'); + logger.dim(' modelgrid service enable # Install as service'); + console.log(''); + + logger.log(theme.info('API Usage:')); + logger.dim(' curl -X POST http://localhost:8080/v1/chat/completions \\'); + logger.dim(' -H "Authorization: Bearer YOUR_API_KEY" \\'); + logger.dim(' -H "Content-Type: application/json" \\'); + logger.dim(' -d \'{"model": "llama3", "messages": [{"role": "user", "content": "Hello"}]}\''); + console.log(''); + } + + /** + * Helper to print a command + */ + private printCommand(command: string, description: string, extra?: string): void { + const paddedCommand = command.padEnd(28); + logger.log(` ${theme.command(paddedCommand)} ${description}${extra ? ' ' + extra : ''}`); + } + + /** + * Display service help + */ + private showServiceHelp(): void { + logger.log(` +ModelGrid - Service Management Commands + +Usage: + modelgrid service + +Subcommands: + enable Install and enable the systemd service (requires root) + disable Stop and disable the systemd service (requires root) + start Start the systemd service + stop Stop the systemd service + restart Restart the systemd service + status Show service status + logs Show service logs in real-time + start-daemon Start the daemon process directly + +Options: + --debug, -d Enable debug mode +`); + } + + /** + * Display GPU help + */ + private showGpuHelp(): void { + logger.log(` +ModelGrid - GPU Management Commands + +Usage: + modelgrid gpu + +Subcommands: + list List detected GPUs + status Show GPU utilization and status + drivers Check GPU driver status + install Install GPU drivers (requires root) + +Examples: + modelgrid gpu list # Show all detected GPUs + modelgrid gpu status # Show current GPU utilization +`); + } + + /** + * Display container help + */ + private showContainerHelp(): void { + logger.log(` +ModelGrid - Container Management Commands + +Usage: + modelgrid container [arguments] + +Subcommands: + list List all configured containers + add Add a new container interactively + remove Remove a container by ID + start [id] Start a container (or all if no ID) + stop [id] Stop a container (or all if no ID) + logs Show container logs + +Examples: + modelgrid container add # Add new container + modelgrid container start ollama # Start specific container + modelgrid container logs ollama # View container logs +`); + } + + /** + * Display model help + */ + private showModelHelp(): void { + logger.log(` +ModelGrid - Model Management Commands + +Usage: + modelgrid model [arguments] + +Subcommands: + list List all available models + pull Pull a model (must be greenlit) + remove Remove a model + status Show model loading recommendations + refresh Refresh greenlist cache + +Examples: + modelgrid model list # Show all models + modelgrid model pull llama3:8b # Pull a model + modelgrid model status # Show VRAM recommendations +`); + } + + /** + * Display config help + */ + private showConfigHelp(): void { + logger.log(` +ModelGrid - Configuration Commands + +Usage: + modelgrid config [arguments] + +Subcommands: + show Display current configuration + init Initialize default configuration + apikey list List configured API keys + apikey add [key] Add an API key (generates if not provided) + apikey remove Remove an API key + +Examples: + modelgrid config show # Show current config + modelgrid config apikey add # Generate new API key +`); + } +} diff --git a/ts/cli/config-handler.ts b/ts/cli/config-handler.ts new file mode 100644 index 0000000..c258f24 --- /dev/null +++ b/ts/cli/config-handler.ts @@ -0,0 +1,314 @@ +/** + * Config Handler + * + * CLI commands for configuration management. + */ + +import { logger } from '../logger.ts'; +import { theme } from '../colors.ts'; +import { PATHS } from '../constants.ts'; +import type { IModelGridConfig } from '../interfaces/config.ts'; +import type { ITableColumn } from '../logger.ts'; +import * as fs from 'node:fs/promises'; + +/** + * Handler for configuration-related CLI commands + */ +export class ConfigHandler { + /** + * Show current configuration + */ + public async show(): Promise { + logger.log(''); + + try { + const configPath = PATHS.CONFIG_FILE; + const configContent = await fs.readFile(configPath, 'utf-8'); + const config = JSON.parse(configContent) as IModelGridConfig; + + // Overview + logger.logBox( + 'ModelGrid Configuration', + [ + `Version: ${theme.highlight(config.version)}`, + `Check Interval: ${theme.info(String(config.checkInterval / 1000))} seconds`, + '', + theme.dim('Configuration File:'), + ` ${theme.path(configPath)}`, + ], + 60, + 'info', + ); + + // API Configuration + logger.log(''); + logger.logBox( + 'API Server', + [ + `Host: ${theme.info(config.api.host)}`, + `Port: ${theme.highlight(String(config.api.port))}`, + `API Keys: ${config.api.apiKeys.length} configured`, + ...(config.api.rateLimit + ? [`Rate Limit: ${config.api.rateLimit} req/min`] + : []), + '', + theme.dim('Endpoint:'), + ` http://${config.api.host}:${config.api.port}/v1/chat/completions`, + ], + 60, + 'info', + ); + + // Docker Configuration + logger.log(''); + logger.logBox( + 'Docker', + [ + `Runtime: ${theme.info(config.docker.runtime)}`, + `Network: ${config.docker.networkName}`, + ], + 60, + 'default', + ); + + // GPU Configuration + logger.log(''); + logger.logBox( + 'GPU', + [ + `Auto Detect: ${config.gpus.autoDetect ? theme.success('Yes') : theme.dim('No')}`, + `Assignments: ${Object.keys(config.gpus.assignments).length} GPU(s)`, + ], + 60, + 'default', + ); + + // Model Configuration + logger.log(''); + logger.logBox( + 'Models', + [ + `Auto Pull: ${config.models.autoPull ? theme.success('Enabled') : theme.dim('Disabled')}`, + `Default Container: ${config.models.defaultContainer}`, + `Auto Load: ${config.models.autoLoad.length} model(s)`, + '', + theme.dim('Greenlist URL:'), + ` ${config.models.greenlistUrl}`, + ], + 70, + 'default', + ); + + // Containers + if (config.containers.length > 0) { + logger.log(''); + logger.info(`Containers (${config.containers.length}):`); + logger.log(''); + + const rows = config.containers.map((c) => ({ + id: c.id, + name: c.name, + type: c.type, + image: c.image.length > 40 ? c.image.substring(0, 37) + '...' : c.image, + port: c.port, + gpus: c.gpuIds.length > 0 ? c.gpuIds.join(',') : theme.dim('None'), + })); + + const columns: ITableColumn[] = [ + { header: 'ID', key: 'id', align: 'left' }, + { header: 'Name', key: 'name', align: 'left', color: theme.highlight }, + { header: 'Type', key: 'type', align: 'left' }, + { header: 'Image', key: 'image', align: 'left', color: theme.dim }, + { header: 'Port', key: 'port', align: 'right' }, + { header: 'GPUs', key: 'gpus', align: 'left' }, + ]; + + logger.logTable(columns, rows); + } + + logger.log(''); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + logger.logBox( + 'No Configuration', + [ + 'No configuration file found.', + '', + theme.dim('Create configuration with:'), + ` ${theme.command('modelgrid service enable')}`, + '', + theme.dim('Or manually create:'), + ` ${PATHS.CONFIG_FILE}`, + ], + 60, + 'warning', + ); + } else { + logger.error(`Failed to read configuration: ${(error as Error).message}`); + } + } + } + + /** + * Initialize default configuration + */ + public async init(): Promise { + const configPath = PATHS.CONFIG_FILE; + + // Check if config already exists + try { + await fs.access(configPath); + logger.warn('Configuration file already exists'); + logger.dim(` ${configPath}`); + return; + } catch { + // File doesn't exist, continue + } + + // Create config directory + const configDir = PATHS.CONFIG_DIR; + await fs.mkdir(configDir, { recursive: true }); + + // Create default config + const defaultConfig: IModelGridConfig = { + version: '1.0.0', + api: { + port: 8080, + host: '0.0.0.0', + apiKeys: [], + cors: true, + corsOrigins: ['*'], + }, + docker: { + networkName: 'modelgrid', + runtime: 'docker', + }, + gpus: { + autoDetect: true, + assignments: {}, + }, + containers: [], + models: { + greenlistUrl: 'https://code.foss.global/modelgrid.com/model_lists/raw/branch/main/greenlit.json', + autoPull: true, + defaultContainer: 'ollama', + autoLoad: [], + }, + checkInterval: 30000, + }; + + await fs.writeFile(configPath, JSON.stringify(defaultConfig, null, 2)); + + logger.success('Configuration initialized'); + logger.dim(` ${configPath}`); + } + + /** + * Add an API key + */ + public async addApiKey(key?: string): Promise { + const configPath = PATHS.CONFIG_FILE; + + try { + const configContent = await fs.readFile(configPath, 'utf-8'); + const config = JSON.parse(configContent) as IModelGridConfig; + + // Generate key if not provided + const apiKey = key || this.generateApiKey(); + + if (config.api.apiKeys.includes(apiKey)) { + logger.warn('API key already exists'); + return; + } + + config.api.apiKeys.push(apiKey); + + await fs.writeFile(configPath, JSON.stringify(config, null, 2)); + + logger.success('API key added:'); + logger.log(` ${theme.highlight(apiKey)}`); + logger.log(''); + logger.dim('Use with Authorization header:'); + logger.dim(` curl -H "Authorization: Bearer ${apiKey}" ...`); + } catch (error) { + logger.error(`Failed to add API key: ${(error as Error).message}`); + } + } + + /** + * Remove an API key + */ + public async removeApiKey(key: string): Promise { + if (!key) { + logger.error('API key is required'); + return; + } + + const configPath = PATHS.CONFIG_FILE; + + try { + const configContent = await fs.readFile(configPath, 'utf-8'); + const config = JSON.parse(configContent) as IModelGridConfig; + + const index = config.api.apiKeys.indexOf(key); + if (index === -1) { + logger.warn('API key not found'); + return; + } + + config.api.apiKeys.splice(index, 1); + + await fs.writeFile(configPath, JSON.stringify(config, null, 2)); + + logger.success('API key removed'); + } catch (error) { + logger.error(`Failed to remove API key: ${(error as Error).message}`); + } + } + + /** + * List API keys + */ + public async listApiKeys(): Promise { + const configPath = PATHS.CONFIG_FILE; + + try { + const configContent = await fs.readFile(configPath, 'utf-8'); + const config = JSON.parse(configContent) as IModelGridConfig; + + if (config.api.apiKeys.length === 0) { + logger.warn('No API keys configured'); + logger.dim('Add a key with: modelgrid config apikey add'); + return; + } + + logger.info(`API Keys (${config.api.apiKeys.length}):`); + logger.log(''); + + for (const key of config.api.apiKeys) { + // Show partial key for security + const masked = key.substring(0, 8) + '...' + key.substring(key.length - 4); + logger.log(` ${masked}`); + } + + logger.log(''); + } catch (error) { + logger.error(`Failed to list API keys: ${(error as Error).message}`); + } + } + + /** + * Generate a random API key + */ + private generateApiKey(): string { + const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'; + const length = 48; + let key = 'sk-'; + + for (let i = 0; i < length; i++) { + key += chars.charAt(Math.floor(Math.random() * chars.length)); + } + + return key; + } +} diff --git a/ts/cli/container-handler.ts b/ts/cli/container-handler.ts new file mode 100644 index 0000000..8216958 --- /dev/null +++ b/ts/cli/container-handler.ts @@ -0,0 +1,317 @@ +/** + * Container Handler + * + * CLI commands for container management. + */ + +import { logger } from '../logger.ts'; +import { theme } from '../colors.ts'; +import { ContainerManager } from '../containers/container-manager.ts'; +import { DockerManager } from '../docker/docker-manager.ts'; +import type { IContainerConfig } from '../interfaces/container.ts'; +import type { ITableColumn } from '../logger.ts'; +import * as helpers from '../helpers/index.ts'; + +/** + * Handler for container-related CLI commands + */ +export class ContainerHandler { + private containerManager: ContainerManager; + private dockerManager: DockerManager; + + constructor(containerManager: ContainerManager) { + this.containerManager = containerManager; + this.dockerManager = new DockerManager(); + } + + /** + * List all configured containers + */ + public async list(): Promise { + logger.log(''); + logger.info('Containers'); + logger.log(''); + + const containers = this.containerManager.getAllContainers(); + + if (containers.length === 0) { + logger.logBox( + 'No Containers', + [ + 'No containers are configured.', + '', + theme.dim('Add a container with:'), + ` ${theme.command('modelgrid container add')}`, + ], + 60, + 'warning', + ); + return; + } + + const rows = []; + + for (const container of containers) { + const status = await container.getStatus(); + const config = container.getConfig(); + + rows.push({ + id: config.id, + name: config.name, + type: this.formatContainerType(container.type), + status: status.running + ? theme.success('Running') + : theme.dim('Stopped'), + health: status.running + ? this.formatHealth(status.health) + : theme.dim('N/A'), + port: config.externalPort || config.port, + models: status.loadedModels.length, + gpus: config.gpuIds.length > 0 ? config.gpuIds.join(',') : theme.dim('None'), + }); + } + + const columns: ITableColumn[] = [ + { header: 'ID', key: 'id', align: 'left' }, + { header: 'Name', key: 'name', align: 'left', color: theme.highlight }, + { header: 'Type', key: 'type', align: 'left' }, + { header: 'Status', key: 'status', align: 'left' }, + { header: 'Health', key: 'health', align: 'left' }, + { header: 'Port', key: 'port', align: 'right', color: theme.info }, + { header: 'Models', key: 'models', align: 'right' }, + { header: 'GPUs', key: 'gpus', align: 'left' }, + ]; + + logger.logTable(columns, rows); + logger.log(''); + } + + /** + * Add a new container interactively + */ + public async add(): Promise { + const { prompt, close, select } = await helpers.createPrompt(); + + try { + logger.log(''); + logger.highlight('Add Container'); + logger.dim('Configure a new AI model container'); + logger.log(''); + + // Select container type + const typeIndex = await select('Select container type:', [ + 'Ollama - Easy to use, good for local models', + 'vLLM - High performance, OpenAI compatible', + 'TGI - HuggingFace Text Generation Inference', + ]); + + const types = ['ollama', 'vllm', 'tgi'] as const; + const containerType = types[typeIndex]; + + // Container name + const name = await prompt('Container name: '); + if (!name.trim()) { + logger.error('Container name is required'); + return; + } + + // Generate ID from name + const id = name.toLowerCase().replace(/[^a-z0-9-]/g, '-'); + + // Port + const defaultPorts = { ollama: 11434, vllm: 8000, tgi: 8080 }; + const portStr = await prompt(`Port [${defaultPorts[containerType]}]: `); + const port = portStr ? parseInt(portStr, 10) : defaultPorts[containerType]; + + // GPU assignment + const gpuStr = await prompt('GPU IDs (comma-separated, or "all", or empty for none): '); + let gpuIds: string[] = []; + + if (gpuStr.trim().toLowerCase() === 'all') { + const { GpuDetector } = await import('../hardware/gpu-detector.ts'); + const detector = new GpuDetector(); + const gpus = await detector.detectGpus(); + gpuIds = gpus.map((g) => g.id); + } else if (gpuStr.trim()) { + gpuIds = gpuStr.split(',').map((s) => s.trim()); + } + + // Build config + const config: IContainerConfig = { + id, + type: containerType, + name, + image: this.getDefaultImage(containerType), + port, + gpuIds, + models: [], + }; + + // Add container + await this.containerManager.addContainer(config); + + logger.log(''); + logger.success(`Container "${name}" added successfully`); + logger.log(''); + logger.dim('Start the container with:'); + logger.log(` ${theme.command(`modelgrid container start ${id}`)}`); + logger.log(''); + } finally { + close(); + } + } + + /** + * Remove a container + */ + public async remove(containerId: string): Promise { + if (!containerId) { + logger.error('Container ID is required'); + return; + } + + const { prompt, close } = await helpers.createPrompt(); + + try { + const confirm = await prompt(`Remove container "${containerId}"? (y/N): `); + + if (confirm.toLowerCase() !== 'y') { + logger.log('Aborted'); + return; + } + + const success = await this.containerManager.removeContainer(containerId); + + if (success) { + logger.success(`Container "${containerId}" removed`); + } else { + logger.error(`Failed to remove container "${containerId}"`); + } + } finally { + close(); + } + } + + /** + * Start a container + */ + public async start(containerId?: string): Promise { + if (containerId) { + // Start specific container + const container = this.containerManager.getContainer(containerId); + if (!container) { + logger.error(`Container "${containerId}" not found`); + return; + } + + logger.info(`Starting container "${containerId}"...`); + const success = await container.start(); + + if (success) { + logger.success(`Container "${containerId}" started`); + } else { + logger.error(`Failed to start container "${containerId}"`); + } + } else { + // Start all containers + logger.info('Starting all containers...'); + await this.containerManager.startAll(); + logger.success('All containers started'); + } + } + + /** + * Stop a container + */ + public async stop(containerId?: string): Promise { + if (containerId) { + // Stop specific container + const container = this.containerManager.getContainer(containerId); + if (!container) { + logger.error(`Container "${containerId}" not found`); + return; + } + + logger.info(`Stopping container "${containerId}"...`); + const success = await container.stop(); + + if (success) { + logger.success(`Container "${containerId}" stopped`); + } else { + logger.error(`Failed to stop container "${containerId}"`); + } + } else { + // Stop all containers + logger.info('Stopping all containers...'); + await this.containerManager.stopAll(); + logger.success('All containers stopped'); + } + } + + /** + * Show container logs + */ + public async logs(containerId: string, lines: number = 100): Promise { + if (!containerId) { + logger.error('Container ID is required'); + return; + } + + const container = this.containerManager.getContainer(containerId); + if (!container) { + logger.error(`Container "${containerId}" not found`); + return; + } + + const logs = await container.getLogs(lines); + console.log(logs); + } + + /** + * Format container type for display + */ + private formatContainerType(type: string): string { + switch (type) { + case 'ollama': + return theme.containerOllama('Ollama'); + case 'vllm': + return theme.containerVllm('vLLM'); + case 'tgi': + return theme.containerTgi('TGI'); + default: + return type; + } + } + + /** + * Format health status + */ + private formatHealth(health: string): string { + switch (health) { + case 'healthy': + return theme.success('Healthy'); + case 'unhealthy': + return theme.error('Unhealthy'); + case 'starting': + return theme.warning('Starting'); + default: + return theme.dim(health); + } + } + + /** + * Get default image for container type + */ + private getDefaultImage(type: string): string { + switch (type) { + case 'ollama': + return 'ollama/ollama:latest'; + case 'vllm': + return 'vllm/vllm-openai:latest'; + case 'tgi': + return 'ghcr.io/huggingface/text-generation-inference:latest'; + default: + return ''; + } + } +} diff --git a/ts/cli/gpu-handler.ts b/ts/cli/gpu-handler.ts new file mode 100644 index 0000000..6019465 --- /dev/null +++ b/ts/cli/gpu-handler.ts @@ -0,0 +1,255 @@ +/** + * GPU Handler + * + * CLI commands for GPU management. + */ + +import { logger } from '../logger.ts'; +import { theme } from '../colors.ts'; +import { GpuDetector } from '../hardware/gpu-detector.ts'; +import { SystemInfo } from '../hardware/system-info.ts'; +import { DriverManager } from '../drivers/driver-manager.ts'; +import type { ITableColumn } from '../logger.ts'; + +/** + * Handler for GPU-related CLI commands + */ +export class GpuHandler { + private gpuDetector: GpuDetector; + private systemInfo: SystemInfo; + private driverManager: DriverManager; + + constructor() { + this.gpuDetector = new GpuDetector(); + this.systemInfo = new SystemInfo(); + this.driverManager = new DriverManager(); + } + + /** + * List detected GPUs + */ + public async list(): Promise { + logger.log(''); + logger.info('Detecting GPUs...'); + logger.log(''); + + const gpus = await this.gpuDetector.detectGpus(); + + if (gpus.length === 0) { + logger.logBox( + 'No GPUs Detected', + [ + 'No GPUs were found on this system.', + '', + theme.dim('Possible reasons:'), + ' - No discrete GPU installed', + ' - GPU drivers not installed', + ' - GPU not properly connected', + ], + 60, + 'warning', + ); + return; + } + + const rows = gpus.map((gpu) => ({ + id: gpu.id, + vendor: this.formatVendor(gpu.vendor), + model: gpu.model, + vram: `${Math.round(gpu.vram / 1024)} GB`, + driver: gpu.driverVersion || theme.dim('N/A'), + cuda: gpu.cudaVersion || theme.dim('N/A'), + pci: gpu.pciSlot, + })); + + const columns: ITableColumn[] = [ + { header: 'ID', key: 'id', align: 'left' }, + { header: 'Vendor', key: 'vendor', align: 'left' }, + { header: 'Model', key: 'model', align: 'left', color: theme.highlight }, + { header: 'VRAM', key: 'vram', align: 'right', color: theme.info }, + { header: 'Driver', key: 'driver', align: 'left' }, + { header: 'CUDA', key: 'cuda', align: 'left' }, + { header: 'PCI', key: 'pci', align: 'left', color: theme.dim }, + ]; + + logger.info(`Found ${gpus.length} GPU(s):`); + logger.log(''); + logger.logTable(columns, rows); + logger.log(''); + } + + /** + * Show GPU status and utilization + */ + public async status(): Promise { + logger.log(''); + logger.info('GPU Status'); + logger.log(''); + + const gpuStatus = await this.gpuDetector.getGpuStatus(); + + if (gpuStatus.length === 0) { + logger.warn('No GPUs detected'); + return; + } + + for (const gpu of gpuStatus) { + const utilizationBar = this.createProgressBar(gpu.utilization, 30); + const memoryBar = this.createProgressBar(gpu.memoryUsed / gpu.memoryTotal * 100, 30); + + logger.logBoxTitle(`GPU ${gpu.id}: ${gpu.name}`, 70, 'info'); + logger.logBoxLine(`Utilization: ${utilizationBar} ${gpu.utilization.toFixed(1)}%`); + logger.logBoxLine(`Memory: ${memoryBar} ${Math.round(gpu.memoryUsed)}/${Math.round(gpu.memoryTotal)} MB`); + logger.logBoxLine(`Temperature: ${this.formatTemperature(gpu.temperature)}`); + logger.logBoxLine(`Power: ${gpu.powerDraw.toFixed(0)}W / ${gpu.powerLimit.toFixed(0)}W`); + logger.logBoxEnd(); + logger.log(''); + } + } + + /** + * Check and install GPU drivers + */ + public async drivers(): Promise { + logger.log(''); + logger.info('GPU Driver Status'); + logger.log(''); + + // Get system info first + const sysInfo = await this.systemInfo.getSystemInfo(); + + // Detect GPUs + const gpus = await this.gpuDetector.detectGpus(); + + if (gpus.length === 0) { + logger.warn('No GPUs detected'); + return; + } + + // Check driver status for each vendor + const vendors = new Set(gpus.map((g) => g.vendor)); + + for (const vendor of vendors) { + const driver = this.driverManager.getDriver(vendor); + if (!driver) { + logger.warn(`No driver support for ${vendor}`); + continue; + } + + const status = await driver.getStatus(); + + logger.logBoxTitle(`${this.formatVendor(vendor)} Driver`, 60, status.installed ? 'success' : 'warning'); + logger.logBoxLine(`Installed: ${status.installed ? theme.success('Yes') : theme.error('No')}`); + + if (status.installed) { + logger.logBoxLine(`Version: ${status.version || 'Unknown'}`); + logger.logBoxLine(`Runtime: ${status.runtimeVersion || 'Unknown'}`); + logger.logBoxLine(`Container Support: ${status.containerSupport ? theme.success('Yes') : theme.warning('No')}`); + } else { + logger.logBoxLine(''); + logger.logBoxLine(theme.dim('Run `modelgrid gpu install` to install drivers')); + } + + logger.logBoxEnd(); + logger.log(''); + } + } + + /** + * Install GPU drivers + */ + public async install(): Promise { + logger.log(''); + logger.info('Installing GPU Drivers'); + logger.log(''); + + // Detect GPUs + const gpus = await this.gpuDetector.detectGpus(); + + if (gpus.length === 0) { + logger.error('No GPUs detected - cannot install drivers'); + return; + } + + // Install drivers for each vendor + const vendors = new Set(gpus.map((g) => g.vendor)); + + for (const vendor of vendors) { + const driver = this.driverManager.getDriver(vendor); + if (!driver) { + logger.warn(`No driver installer for ${vendor}`); + continue; + } + + logger.info(`Installing ${this.formatVendor(vendor)} drivers...`); + + const success = await driver.install(); + + if (success) { + logger.success(`${this.formatVendor(vendor)} drivers installed successfully`); + + // Setup container support + logger.info('Setting up container support...'); + const containerSuccess = await driver.setupContainer(); + + if (containerSuccess) { + logger.success('Container support configured'); + } else { + logger.warn('Container support setup failed - GPU passthrough may not work'); + } + } else { + logger.error(`Failed to install ${this.formatVendor(vendor)} drivers`); + } + + logger.log(''); + } + } + + /** + * Format vendor name for display + */ + private formatVendor(vendor: string): string { + switch (vendor) { + case 'nvidia': + return theme.gpuNvidia('NVIDIA'); + case 'amd': + return theme.gpuAmd('AMD'); + case 'intel': + return theme.gpuIntel('Intel'); + default: + return vendor; + } + } + + /** + * Create a progress bar + */ + private createProgressBar(percent: number, width: number): string { + const filled = Math.round((percent / 100) * width); + const empty = width - filled; + const bar = '█'.repeat(filled) + '░'.repeat(empty); + + if (percent >= 90) { + return theme.error(bar); + } else if (percent >= 70) { + return theme.warning(bar); + } else { + return theme.success(bar); + } + } + + /** + * Format temperature with color coding + */ + private formatTemperature(temp: number): string { + const tempStr = `${temp}°C`; + + if (temp >= 85) { + return theme.error(tempStr); + } else if (temp >= 70) { + return theme.warning(tempStr); + } else { + return theme.success(tempStr); + } + } +} diff --git a/ts/cli/model-handler.ts b/ts/cli/model-handler.ts new file mode 100644 index 0000000..c052814 --- /dev/null +++ b/ts/cli/model-handler.ts @@ -0,0 +1,202 @@ +/** + * Model Handler + * + * CLI commands for model management. + */ + +import { logger } from '../logger.ts'; +import { theme } from '../colors.ts'; +import { ContainerManager } from '../containers/container-manager.ts'; +import { ModelRegistry } from '../models/registry.ts'; +import { ModelLoader } from '../models/loader.ts'; +import type { ITableColumn } from '../logger.ts'; + +/** + * Handler for model-related CLI commands + */ +export class ModelHandler { + private containerManager: ContainerManager; + private modelRegistry: ModelRegistry; + private modelLoader: ModelLoader; + + constructor( + containerManager: ContainerManager, + modelRegistry: ModelRegistry, + ) { + this.containerManager = containerManager; + this.modelRegistry = modelRegistry; + this.modelLoader = new ModelLoader(modelRegistry, containerManager); + } + + /** + * List all available models + */ + public async list(): Promise { + logger.log(''); + logger.info('Models'); + logger.log(''); + + // Get loaded models from containers + const loadedModels = await this.containerManager.getAllAvailableModels(); + + // Get greenlit models + const greenlitModels = await this.modelRegistry.getAllGreenlitModels(); + + if (loadedModels.size === 0 && greenlitModels.length === 0) { + logger.logBox( + 'No Models', + [ + 'No models are loaded or greenlit.', + '', + theme.dim('Pull a model with:'), + ` ${theme.command('modelgrid model pull ')}`, + ], + 60, + 'warning', + ); + return; + } + + // Show loaded models + if (loadedModels.size > 0) { + logger.info(`Loaded Models (${loadedModels.size}):`); + logger.log(''); + + const rows = []; + for (const [name, info] of loadedModels) { + rows.push({ + name, + container: info.container, + size: info.size ? this.formatSize(info.size) : theme.dim('N/A'), + format: info.format || theme.dim('N/A'), + modified: info.modifiedAt + ? new Date(info.modifiedAt).toLocaleDateString() + : theme.dim('N/A'), + }); + } + + const columns: ITableColumn[] = [ + { header: 'Name', key: 'name', align: 'left', color: theme.highlight }, + { header: 'Container', key: 'container', align: 'left' }, + { header: 'Size', key: 'size', align: 'right', color: theme.info }, + { header: 'Format', key: 'format', align: 'left' }, + { header: 'Modified', key: 'modified', align: 'left', color: theme.dim }, + ]; + + logger.logTable(columns, rows); + logger.log(''); + } + + // Show greenlit models (not yet loaded) + const loadedNames = new Set(loadedModels.keys()); + const unloadedGreenlit = greenlitModels.filter((m) => !loadedNames.has(m.name)); + + if (unloadedGreenlit.length > 0) { + logger.info(`Available to Pull (${unloadedGreenlit.length}):`); + logger.log(''); + + const rows = unloadedGreenlit.map((m) => ({ + name: m.name, + container: m.container, + vram: `${m.minVram} GB`, + tags: m.tags?.join(', ') || theme.dim('None'), + })); + + const columns: ITableColumn[] = [ + { header: 'Name', key: 'name', align: 'left' }, + { header: 'Container', key: 'container', align: 'left' }, + { header: 'Min VRAM', key: 'vram', align: 'right', color: theme.info }, + { header: 'Tags', key: 'tags', align: 'left', color: theme.dim }, + ]; + + logger.logTable(columns, rows); + logger.log(''); + } + } + + /** + * Pull a model + */ + public async pull(modelName: string): Promise { + if (!modelName) { + logger.error('Model name is required'); + return; + } + + logger.log(''); + logger.info(`Pulling model: ${modelName}`); + logger.log(''); + + const result = await this.modelLoader.loadModel(modelName); + + if (result.success) { + if (result.alreadyLoaded) { + logger.success(`Model "${modelName}" is already loaded`); + } else { + logger.success(`Model "${modelName}" pulled successfully`); + } + if (result.container) { + logger.dim(`Container: ${result.container}`); + } + } else { + logger.error(`Failed to pull model: ${result.error}`); + } + + logger.log(''); + } + + /** + * Remove a model + */ + public async remove(modelName: string): Promise { + if (!modelName) { + logger.error('Model name is required'); + return; + } + + logger.info(`Removing model: ${modelName}`); + + const success = await this.modelLoader.unloadModel(modelName); + + if (success) { + logger.success(`Model "${modelName}" removed`); + } else { + logger.error(`Failed to remove model "${modelName}"`); + } + } + + /** + * Show model loading status and recommendations + */ + public async status(): Promise { + logger.log(''); + await this.modelLoader.printStatus(); + } + + /** + * Refresh greenlist cache + */ + public async refresh(): Promise { + logger.info('Refreshing greenlist...'); + + await this.modelRegistry.refreshGreenlist(); + + logger.success('Greenlist refreshed'); + } + + /** + * Format file size + */ + private formatSize(bytes: number): string { + const units = ['B', 'KB', 'MB', 'GB', 'TB']; + let size = bytes; + let unitIndex = 0; + + while (size >= 1024 && unitIndex < units.length - 1) { + size /= 1024; + unitIndex++; + } + + return `${size.toFixed(1)} ${units[unitIndex]}`; + } +} diff --git a/ts/cli/service-handler.ts b/ts/cli/service-handler.ts new file mode 100644 index 0000000..7d1cf6d --- /dev/null +++ b/ts/cli/service-handler.ts @@ -0,0 +1,252 @@ +/** + * Service Handler + * + * CLI commands for systemd service management. + */ + +import process from 'node:process'; +import { execSync } from 'node:child_process'; +import { logger } from '../logger.ts'; +import { theme } from '../colors.ts'; +import { PATHS } from '../constants.ts'; +import type { ModelGrid } from '../modelgrid.ts'; + +/** + * Handler for service-related CLI commands + */ +export class ServiceHandler { + private readonly modelgrid: ModelGrid; + + constructor(modelgrid: ModelGrid) { + this.modelgrid = modelgrid; + } + + /** + * Enable the service (requires root) + */ + public async enable(): Promise { + this.checkRootAccess('This command must be run as root.'); + await this.modelgrid.getSystemd().install(); + logger.log('ModelGrid service has been installed. Use "modelgrid service start" to start the service.'); + } + + /** + * Start the daemon directly + */ + public async daemonStart(debugMode: boolean = false): Promise { + logger.log('Starting ModelGrid daemon...'); + try { + if (debugMode) { + logger.log('Debug mode enabled'); + } + await this.modelgrid.getDaemon().start(); + } catch (error) { + logger.error(`Daemon start failed: ${error instanceof Error ? error.message : String(error)}`); + process.exit(1); + } + } + + /** + * Show logs of the systemd service + */ + public async logs(): Promise { + try { + const { spawn } = await import('child_process'); + logger.log('Tailing modelgrid service logs (Ctrl+C to exit)...\n'); + + const journalctl = spawn('journalctl', ['-u', 'modelgrid.service', '-n', '50', '-f'], { + stdio: ['ignore', 'inherit', 'inherit'], + }); + + process.on('SIGINT', () => { + journalctl.kill('SIGINT'); + process.exit(0); + }); + + await new Promise((resolve) => { + journalctl.on('exit', () => resolve()); + }); + } catch (error) { + logger.error(`Failed to retrieve logs: ${error}`); + process.exit(1); + } + } + + /** + * Stop the systemd service + */ + public async stop(): Promise { + await this.modelgrid.getSystemd().stop(); + } + + /** + * Start the systemd service + */ + public async start(): Promise { + try { + await this.modelgrid.getSystemd().start(); + } catch (error) { + process.exit(1); + } + } + + /** + * Show status of the systemd service + */ + public async status(): Promise { + await this.modelgrid.getSystemd().getStatus(); + } + + /** + * Disable the service (requires root) + */ + public async disable(): Promise { + this.checkRootAccess('This command must be run as root.'); + await this.modelgrid.getSystemd().disable(); + } + + /** + * Check if the user has root access + */ + private checkRootAccess(errorMessage: string): void { + if (process.getuid && process.getuid() !== 0) { + logger.error(errorMessage); + process.exit(1); + } + } + + /** + * Update ModelGrid from repository + */ + public async update(): Promise { + try { + this.checkRootAccess('This command must be run as root to update ModelGrid.'); + + console.log(''); + logger.info('Checking for updates...'); + + try { + const currentVersion = this.modelgrid.getVersion(); + const apiUrl = 'https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/latest'; + const response = execSync(`curl -sSL ${apiUrl}`).toString(); + const release = JSON.parse(response); + const latestVersion = release.tag_name; + + const normalizedCurrent = currentVersion.startsWith('v') ? currentVersion : `v${currentVersion}`; + const normalizedLatest = latestVersion.startsWith('v') ? latestVersion : `v${latestVersion}`; + + logger.dim(`Current version: ${normalizedCurrent}`); + logger.dim(`Latest version: ${normalizedLatest}`); + console.log(''); + + if (normalizedCurrent === normalizedLatest) { + logger.success('Already up to date!'); + console.log(''); + return; + } + + logger.info(`New version available: ${latestVersion}`); + logger.dim('Downloading and installing...'); + console.log(''); + + const installUrl = 'https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh'; + + execSync(`curl -sSL ${installUrl} | bash`, { + stdio: 'inherit', + }); + + console.log(''); + logger.success(`Updated to ${latestVersion}`); + console.log(''); + } catch (error) { + console.log(''); + logger.error('Update failed'); + logger.dim(`${error instanceof Error ? error.message : String(error)}`); + console.log(''); + process.exit(1); + } + } catch (error) { + logger.error(`Update failed: ${error instanceof Error ? error.message : String(error)}`); + process.exit(1); + } + } + + /** + * Completely uninstall ModelGrid from the system + */ + public async uninstall(): Promise { + this.checkRootAccess('This command must be run as root.'); + + try { + const helpers = await import('../helpers/index.ts'); + const { prompt, close } = await helpers.createPrompt(); + + logger.log(''); + logger.highlight('ModelGrid Uninstaller'); + logger.dim('====================='); + logger.log('This will completely remove ModelGrid from your system.'); + logger.log(''); + + const removeConfig = await prompt('Do you want to remove configuration files? (y/N): '); + const removeContainers = await prompt('Do you want to remove Docker containers? (y/N): '); + + close(); + + // Stop service first + try { + await this.modelgrid.getSystemd().stop(); + } catch { + // Service might not be running + } + + // Disable service + try { + await this.modelgrid.getSystemd().disable(); + } catch { + // Service might not be installed + } + + // Remove containers if requested + if (removeContainers.toLowerCase() === 'y') { + logger.info('Removing Docker containers...'); + try { + execSync('docker rm -f $(docker ps -aq --filter "name=modelgrid")', { stdio: 'pipe' }); + } catch { + // No containers to remove + } + } + + // Remove configuration if requested + if (removeConfig.toLowerCase() === 'y') { + logger.info('Removing configuration...'); + try { + const { rm } = await import('node:fs/promises'); + await rm(PATHS.CONFIG_DIR, { recursive: true, force: true }); + } catch { + // Config might not exist + } + } + + // Run uninstall script + const { dirname, join } = await import('path'); + const binPath = process.argv[1]; + const modulePath = dirname(dirname(binPath)); + const uninstallScriptPath = join(modulePath, 'uninstall.sh'); + + logger.log(''); + logger.log(`Running uninstaller from ${uninstallScriptPath}...`); + + execSync(`sudo bash ${uninstallScriptPath}`, { + env: { + ...process.env, + REMOVE_CONFIG: removeConfig.toLowerCase() === 'y' ? 'yes' : 'no', + MODELGRID_CLI_CALL: 'true', + }, + stdio: 'inherit', + }); + } catch (error) { + logger.error(`Uninstall failed: ${error instanceof Error ? error.message : String(error)}`); + process.exit(1); + } + } +} diff --git a/ts/colors.ts b/ts/colors.ts new file mode 100644 index 0000000..00cadb5 --- /dev/null +++ b/ts/colors.ts @@ -0,0 +1,157 @@ +/** + * Color theme and styling utilities for ModelGrid CLI + * Uses Deno standard library colors module + */ +import * as colors from '@std/fmt/colors'; + +/** + * Color theme for consistent CLI styling + */ +export const theme = { + // Message types + error: colors.red, + warning: colors.yellow, + success: colors.green, + info: colors.cyan, + dim: colors.dim, + highlight: colors.bold, + + // Status indicators + statusActive: (text: string) => colors.green(colors.bold(text)), + statusInactive: (text: string) => colors.red(text), + statusWarning: (text: string) => colors.yellow(text), + statusUnknown: (text: string) => colors.dim(text), + + // GPU status colors + gpuHealthy: colors.green, // GPU healthy + gpuWarning: colors.yellow, // GPU warning + gpuError: colors.red, // GPU error + + // VRAM usage colors + vramGood: colors.green, // < 60% usage + vramMedium: colors.yellow, // 60-85% usage + vramCritical: colors.red, // > 85% usage + + // Container status colors + containerRunning: colors.green, + containerStopped: colors.red, + containerStarting: colors.yellow, + + // Box borders + borderSuccess: colors.green, + borderError: colors.red, + borderWarning: colors.yellow, + borderInfo: colors.cyan, + borderDefault: (text: string) => text, // No color + + // Command/code highlighting + command: colors.cyan, + code: colors.dim, + path: colors.blue, + model: colors.magenta, +}; + +/** + * Status symbols with colors + */ +export const symbols = { + success: colors.green('✓'), + error: colors.red('✗'), + warning: colors.yellow('⚠'), + info: colors.cyan('ℹ'), + running: colors.green('●'), + stopped: colors.red('○'), + starting: colors.yellow('◐'), + unknown: colors.dim('◯'), + gpu: colors.cyan('◆'), + container: colors.blue('▣'), + model: colors.magenta('◈'), +}; + +/** + * Get color for VRAM usage percentage + */ +export function getVramColor(percentage: number): (text: string) => string { + if (percentage < 60) return theme.vramGood; + if (percentage < 85) return theme.vramMedium; + return theme.vramCritical; +} + +/** + * Get color for GPU utilization + */ +export function getGpuUtilColor(percentage: number): (text: string) => string { + if (percentage < 60) return theme.gpuHealthy; + if (percentage < 85) return theme.gpuWarning; + return theme.gpuError; +} + +/** + * Format GPU vendor with color + */ +export function formatGpuVendor(vendor: 'nvidia' | 'amd' | 'intel' | 'unknown'): string { + switch (vendor) { + case 'nvidia': + return colors.green('NVIDIA'); + case 'amd': + return colors.red('AMD'); + case 'intel': + return colors.blue('Intel'); + case 'unknown': + default: + return colors.dim('Unknown'); + } +} + +/** + * Format container status with color + */ +export function formatContainerStatus( + status: 'running' | 'stopped' | 'starting' | 'error' | 'unknown', +): string { + switch (status) { + case 'running': + return theme.containerRunning('Running'); + case 'stopped': + return theme.containerStopped('Stopped'); + case 'starting': + return theme.containerStarting('Starting'); + case 'error': + return theme.error('Error'); + case 'unknown': + default: + return theme.dim('Unknown'); + } +} + +/** + * Format container type with color + */ +export function formatContainerType(type: 'ollama' | 'vllm' | 'tgi' | 'custom'): string { + switch (type) { + case 'ollama': + return colors.green('Ollama'); + case 'vllm': + return colors.cyan('vLLM'); + case 'tgi': + return colors.magenta('TGI'); + case 'custom': + return colors.yellow('Custom'); + } +} + +/** + * Format model status with color + */ +export function formatModelStatus(status: 'loaded' | 'loading' | 'unloaded' | 'error'): string { + switch (status) { + case 'loaded': + return theme.success('Loaded'); + case 'loading': + return theme.warning('Loading'); + case 'unloaded': + return theme.dim('Unloaded'); + case 'error': + return theme.error('Error'); + } +} diff --git a/ts/constants.ts b/ts/constants.ts new file mode 100644 index 0000000..24fbc09 --- /dev/null +++ b/ts/constants.ts @@ -0,0 +1,175 @@ +/** + * ModelGrid Constants + * + * Central location for all timeout, interval, and configuration values. + * This makes configuration easier and code more self-documenting. + */ + +/** + * Default timing values in milliseconds + */ +export const TIMING = { + /** Default interval between container health checks (30 seconds) */ + CHECK_INTERVAL_MS: 30000, + + /** Interval for idle monitoring mode (60 seconds) */ + IDLE_CHECK_INTERVAL_MS: 60000, + + /** Interval for checking config file changes (60 seconds) */ + CONFIG_CHECK_INTERVAL_MS: 60000, + + /** Interval for logging periodic status updates (5 minutes) */ + LOG_INTERVAL_MS: 5 * 60 * 1000, + + /** Timeout for GPU driver detection (10 seconds) */ + GPU_DETECTION_TIMEOUT_MS: 10000, + + /** Timeout for Docker commands (30 seconds) */ + DOCKER_COMMAND_TIMEOUT_MS: 30000, + + /** Timeout for container startup (2 minutes) */ + CONTAINER_STARTUP_TIMEOUT_MS: 2 * 60 * 1000, + + /** Timeout for model loading (10 minutes) */ + MODEL_LOAD_TIMEOUT_MS: 10 * 60 * 1000, + + /** Greenlit model list cache duration (1 hour) */ + GREENLIST_CACHE_DURATION_MS: 60 * 60 * 1000, +} as const; + +/** + * API Server constants + */ +export const API_SERVER = { + /** Default API server port */ + DEFAULT_PORT: 8080, + + /** Default API server host */ + DEFAULT_HOST: '0.0.0.0', + + /** Default rate limit (requests per minute) */ + DEFAULT_RATE_LIMIT: 60, + + /** Request timeout (30 seconds) */ + REQUEST_TIMEOUT_MS: 30000, + + /** Stream keep-alive interval (15 seconds) */ + STREAM_KEEPALIVE_MS: 15000, +} as const; + +/** + * Docker/Container constants + */ +export const DOCKER = { + /** Default Docker network name */ + DEFAULT_NETWORK: 'modelgrid', + + /** Container health check interval (10 seconds) */ + HEALTH_CHECK_INTERVAL_MS: 10000, + + /** Container restart delay (5 seconds) */ + RESTART_DELAY_MS: 5000, + + /** Maximum container restart attempts */ + MAX_RESTART_ATTEMPTS: 3, +} as const; + +/** + * GPU-related constants + */ +export const GPU = { + /** Minimum VRAM for most models (8GB) */ + MIN_VRAM_GB: 8, + + /** Recommended VRAM for larger models (24GB) */ + RECOMMENDED_VRAM_GB: 24, + + /** GPU utilization threshold for load balancing (80%) */ + UTILIZATION_THRESHOLD_PERCENT: 80, +} as const; + +/** + * Container port mapping defaults + */ +export const CONTAINER_PORTS = { + /** Ollama default port */ + OLLAMA: 11434, + + /** vLLM default port */ + VLLM: 8000, + + /** TGI (Text Generation Inference) default port */ + TGI: 8080, +} as const; + +/** + * Container image defaults + */ +export const CONTAINER_IMAGES = { + /** Ollama official image */ + OLLAMA: 'ollama/ollama:latest', + + /** vLLM official image */ + VLLM: 'vllm/vllm-openai:latest', + + /** TGI official image */ + TGI: 'ghcr.io/huggingface/text-generation-inference:latest', +} as const; + +/** + * Model registry constants + */ +export const MODEL_REGISTRY = { + /** Default greenlit models URL */ + DEFAULT_GREENLIST_URL: + 'https://code.foss.global/modelgrid.com/model_lists/raw/branch/main/greenlit.json', + + /** Fallback greenlist if remote fetch fails */ + FALLBACK_GREENLIST: [ + { name: 'llama3.2:1b', container: 'ollama', minVram: 4 }, + { name: 'llama3.2:3b', container: 'ollama', minVram: 6 }, + { name: 'llama3:8b', container: 'ollama', minVram: 8 }, + { name: 'mistral:7b', container: 'ollama', minVram: 8 }, + { name: 'codellama:7b', container: 'ollama', minVram: 8 }, + ], +} as const; + +/** + * Configuration paths + */ +export const PATHS = { + /** Default configuration directory */ + CONFIG_DIR: '/etc/modelgrid', + + /** Default configuration file */ + CONFIG_FILE: '/etc/modelgrid/config.json', + + /** Default data directory */ + DATA_DIR: '/var/lib/modelgrid', + + /** Default log directory */ + LOG_DIR: '/var/log/modelgrid', + + /** Systemd service file path */ + SYSTEMD_SERVICE: '/etc/systemd/system/modelgrid.service', + + /** Binary installation path */ + BINARY_PATH: '/usr/local/bin/modelgrid', + + /** Working directory */ + WORK_DIR: '/opt/modelgrid', +} as const; + +/** + * UI/Display constants + */ +export const UI = { + /** Default width for log boxes */ + DEFAULT_BOX_WIDTH: 50, + + /** Wide box width for status displays */ + WIDE_BOX_WIDTH: 65, + + /** Extra wide box width for detailed info */ + EXTRA_WIDE_BOX_WIDTH: 80, +} as const; diff --git a/ts/containers/base-container.ts b/ts/containers/base-container.ts new file mode 100644 index 0000000..d3e68d3 --- /dev/null +++ b/ts/containers/base-container.ts @@ -0,0 +1,216 @@ +/** + * Base Container + * + * Abstract base class for AI model containers. + */ + +import type { + IContainerConfig, + IContainerStatus, + ILoadedModel, + TContainerType, +} from '../interfaces/container.ts'; +import type { IChatCompletionRequest, IChatCompletionResponse } from '../interfaces/api.ts'; +import { ContainerRuntime } from '../docker/container-runtime.ts'; +import { logger } from '../logger.ts'; + +/** + * Model pull progress callback + */ +export type TModelPullProgress = (progress: { + model: string; + status: string; + percent?: number; +}) => void; + +/** + * Abstract base class for AI model containers + */ +export abstract class BaseContainer { + /** Container type */ + public abstract readonly type: TContainerType; + + /** Display name */ + public abstract readonly displayName: string; + + /** Default Docker image */ + public abstract readonly defaultImage: string; + + /** Default internal port */ + public abstract readonly defaultPort: number; + + /** Container configuration */ + protected config: IContainerConfig; + + /** Container runtime */ + protected runtime: ContainerRuntime; + + constructor(config: IContainerConfig) { + this.config = config; + this.runtime = new ContainerRuntime(); + } + + /** + * Get the container configuration + */ + public getConfig(): IContainerConfig { + return this.config; + } + + /** + * Get the endpoint URL for this container + */ + public getEndpoint(): string { + const port = this.config.externalPort || this.config.port; + return `http://localhost:${port}`; + } + + /** + * Start the container + */ + public async start(): Promise { + logger.info(`Starting ${this.displayName} container: ${this.config.name}`); + return this.runtime.startContainer(this.config); + } + + /** + * Stop the container + */ + public async stop(): Promise { + logger.info(`Stopping ${this.displayName} container: ${this.config.name}`); + return this.runtime.stopContainer(this.config.id); + } + + /** + * Restart the container + */ + public async restart(): Promise { + logger.info(`Restarting ${this.displayName} container: ${this.config.name}`); + return this.runtime.restartContainer(this.config.id); + } + + /** + * Remove the container + */ + public async remove(): Promise { + logger.info(`Removing ${this.displayName} container: ${this.config.name}`); + return this.runtime.removeContainer(this.config.id); + } + + /** + * Get container status + */ + public async getStatus(): Promise { + return this.runtime.getContainerStatus(this.config); + } + + /** + * Get container logs + */ + public async getLogs(lines: number = 100): Promise { + return this.runtime.getLogs(this.config.id, { lines }); + } + + /** + * Check if the container is healthy + */ + public abstract isHealthy(): Promise; + + /** + * Get list of available models + */ + public abstract listModels(): Promise; + + /** + * Get list of loaded models with details + */ + public abstract getLoadedModels(): Promise; + + /** + * Pull a model + */ + public abstract pullModel(modelName: string, onProgress?: TModelPullProgress): Promise; + + /** + * Remove a model + */ + public abstract removeModel(modelName: string): Promise; + + /** + * Send a chat completion request + */ + public abstract chatCompletion(request: IChatCompletionRequest): Promise; + + /** + * Stream a chat completion request + */ + public abstract chatCompletionStream( + request: IChatCompletionRequest, + onChunk: (chunk: string) => void, + ): Promise; + + /** + * Make HTTP request to container + */ + protected async fetch( + path: string, + options: { + method?: string; + headers?: Record; + body?: unknown; + timeout?: number; + } = {}, + ): Promise { + const endpoint = this.getEndpoint(); + const url = `${endpoint}${path}`; + + const controller = new AbortController(); + const timeout = options.timeout || 30000; + const timeoutId = setTimeout(() => controller.abort(), timeout); + + try { + const response = await fetch(url, { + method: options.method || 'GET', + headers: { + 'Content-Type': 'application/json', + ...options.headers, + }, + body: options.body ? JSON.stringify(options.body) : undefined, + signal: controller.signal, + }); + + return response; + } finally { + clearTimeout(timeoutId); + } + } + + /** + * Make HTTP request and parse JSON response + */ + protected async fetchJson( + path: string, + options: { + method?: string; + headers?: Record; + body?: unknown; + timeout?: number; + } = {}, + ): Promise { + const response = await this.fetch(path, options); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`HTTP ${response.status}: ${errorText}`); + } + + return response.json(); + } + + /** + * Generate a unique request ID + */ + protected generateRequestId(): string { + return `chatcmpl-${Date.now().toString(36)}-${Math.random().toString(36).substring(2, 8)}`; + } +} diff --git a/ts/containers/container-manager.ts b/ts/containers/container-manager.ts new file mode 100644 index 0000000..42f5cb0 --- /dev/null +++ b/ts/containers/container-manager.ts @@ -0,0 +1,349 @@ +/** + * Container Manager + * + * Orchestrates multiple AI model containers. + */ + +import type { + IContainerConfig, + IContainerStatus, + IContainerEndpoint, + TContainerType, +} from '../interfaces/container.ts'; +import { logger } from '../logger.ts'; +import { DockerManager } from '../docker/docker-manager.ts'; +import { BaseContainer } from './base-container.ts'; +import { OllamaContainer } from './ollama.ts'; +import { VllmContainer } from './vllm.ts'; +import { TgiContainer } from './tgi.ts'; + +/** + * Container Manager - orchestrates all containers + */ +export class ContainerManager { + private containers: Map; + private dockerManager: DockerManager; + + constructor() { + this.containers = new Map(); + this.dockerManager = new DockerManager(); + } + + /** + * Initialize container manager + */ + public async initialize(): Promise { + // Ensure Docker is running + if (!await this.dockerManager.isRunning()) { + throw new Error('Docker is not running'); + } + + // Create network if it doesn't exist + await this.dockerManager.createNetwork(); + } + + /** + * Create a container instance from config + */ + private createContainerInstance(config: IContainerConfig): BaseContainer { + switch (config.type) { + case 'ollama': + return new OllamaContainer(config); + case 'vllm': + return new VllmContainer(config); + case 'tgi': + return new TgiContainer(config); + default: + throw new Error(`Unknown container type: ${config.type}`); + } + } + + /** + * Add a container + */ + public addContainer(config: IContainerConfig): BaseContainer { + if (this.containers.has(config.id)) { + throw new Error(`Container with ID ${config.id} already exists`); + } + + const container = this.createContainerInstance(config); + this.containers.set(config.id, container); + return container; + } + + /** + * Remove a container + */ + public async removeContainer(containerId: string): Promise { + const container = this.containers.get(containerId); + if (!container) { + return false; + } + + await container.remove(); + this.containers.delete(containerId); + return true; + } + + /** + * Get a container by ID + */ + public getContainer(containerId: string): BaseContainer | undefined { + return this.containers.get(containerId); + } + + /** + * Get all containers + */ + public getAllContainers(): BaseContainer[] { + return Array.from(this.containers.values()); + } + + /** + * Load containers from configuration + */ + public loadFromConfig(configs: IContainerConfig[]): void { + this.containers.clear(); + for (const config of configs) { + try { + this.addContainer(config); + } catch (error) { + logger.warn(`Failed to load container ${config.id}: ${error instanceof Error ? error.message : String(error)}`); + } + } + } + + /** + * Start all containers + */ + public async startAll(): Promise> { + const results = new Map(); + + for (const [id, container] of this.containers) { + if (!container.getConfig().autoStart) { + continue; + } + + try { + const success = await container.start(); + results.set(id, success); + } catch (error) { + logger.error(`Failed to start container ${id}: ${error instanceof Error ? error.message : String(error)}`); + results.set(id, false); + } + } + + return results; + } + + /** + * Stop all containers + */ + public async stopAll(): Promise> { + const results = new Map(); + + for (const [id, container] of this.containers) { + try { + const success = await container.stop(); + results.set(id, success); + } catch (error) { + logger.error(`Failed to stop container ${id}: ${error instanceof Error ? error.message : String(error)}`); + results.set(id, false); + } + } + + return results; + } + + /** + * Get status of all containers + */ + public async getAllStatus(): Promise> { + const statuses = new Map(); + + for (const [id, container] of this.containers) { + try { + const status = await container.getStatus(); + statuses.set(id, status); + } catch (error) { + logger.warn(`Failed to get status for container ${id}: ${error instanceof Error ? error.message : String(error)}`); + } + } + + return statuses; + } + + /** + * Get available endpoints for a model + */ + public async getEndpointsForModel(modelName: string): Promise { + const endpoints: IContainerEndpoint[] = []; + + for (const [_id, container] of this.containers) { + try { + const status = await container.getStatus(); + + if (!status.running) { + continue; + } + + // Check if container has this model + const models = await container.listModels(); + if (!models.includes(modelName)) { + continue; + } + + endpoints.push({ + containerId: container.getConfig().id, + type: container.type, + url: container.getEndpoint(), + models, + healthy: status.health === 'healthy', + priority: 0, // Could be based on load + }); + } catch { + // Skip containers that fail to respond + } + } + + return endpoints; + } + + /** + * Find best container for a model + */ + public async findContainerForModel(modelName: string): Promise { + const endpoints = await this.getEndpointsForModel(modelName); + + // Filter to healthy endpoints + const healthy = endpoints.filter((e) => e.healthy); + if (healthy.length === 0) { + return null; + } + + // Return first healthy endpoint (could add load balancing) + const endpoint = healthy[0]; + return this.containers.get(endpoint.containerId) || null; + } + + /** + * Get all available models across all containers + */ + public async getAllAvailableModels(): Promise> { + const modelMap = new Map(); + + for (const container of this.containers.values()) { + try { + const status = await container.getStatus(); + if (!status.running) continue; + + const models = await container.listModels(); + + for (const model of models) { + if (!modelMap.has(model)) { + modelMap.set(model, []); + } + + modelMap.get(model)!.push({ + containerId: container.getConfig().id, + type: container.type, + url: container.getEndpoint(), + models, + healthy: status.health === 'healthy', + priority: 0, + }); + } + } catch { + // Skip failed containers + } + } + + return modelMap; + } + + /** + * Pull a model to a specific container type + */ + public async pullModel( + modelName: string, + containerType: TContainerType = 'ollama', + containerId?: string, + ): Promise { + // Find or create appropriate container + let container: BaseContainer | undefined; + + if (containerId) { + container = this.containers.get(containerId); + } else { + // Find first container of the specified type + for (const c of this.containers.values()) { + if (c.type === containerType) { + container = c; + break; + } + } + } + + if (!container) { + logger.error(`No ${containerType} container available to pull model`); + return false; + } + + return container.pullModel(modelName, (progress) => { + const percent = progress.percent !== undefined ? ` (${progress.percent}%)` : ''; + logger.dim(` ${progress.status}${percent}`); + }); + } + + /** + * Health check all containers + */ + public async healthCheck(): Promise> { + const results = new Map(); + + for (const [id, container] of this.containers) { + try { + const healthy = await container.isHealthy(); + results.set(id, healthy); + } catch { + results.set(id, false); + } + } + + return results; + } + + /** + * Print container status summary + */ + public async printStatus(): Promise { + const statuses = await this.getAllStatus(); + + if (statuses.size === 0) { + logger.logBox('Containers', ['No containers configured'], 50, 'warning'); + return; + } + + logger.logBoxTitle('Container Status', 70, 'info'); + + for (const [id, status] of statuses) { + const runningStr = status.running ? 'Running' : 'Stopped'; + const healthStr = status.health; + const modelsStr = status.loadedModels.length > 0 + ? status.loadedModels.join(', ') + : 'None'; + + logger.logBoxLine(`${status.name} (${id})`); + logger.logBoxLine(` Type: ${status.type} | Status: ${runningStr} | Health: ${healthStr}`); + logger.logBoxLine(` Models: ${modelsStr}`); + logger.logBoxLine(` Endpoint: ${status.endpoint}`); + + if (status.gpuUtilization !== undefined) { + logger.logBoxLine(` GPU: ${status.gpuUtilization}% | Memory: ${status.memoryUsage || 0}MB`); + } + logger.logBoxLine(''); + } + + logger.logBoxEnd(); + } +} diff --git a/ts/containers/index.ts b/ts/containers/index.ts new file mode 100644 index 0000000..27a1048 --- /dev/null +++ b/ts/containers/index.ts @@ -0,0 +1,11 @@ +/** + * Container Management Module + * + * Exports all AI container implementations. + */ + +export { BaseContainer } from './base-container.ts'; +export { OllamaContainer } from './ollama.ts'; +export { VllmContainer } from './vllm.ts'; +export { TgiContainer } from './tgi.ts'; +export { ContainerManager } from './container-manager.ts'; diff --git a/ts/containers/ollama.ts b/ts/containers/ollama.ts new file mode 100644 index 0000000..6a39b4e --- /dev/null +++ b/ts/containers/ollama.ts @@ -0,0 +1,387 @@ +/** + * Ollama Container + * + * Manages Ollama containers for running local LLMs. + */ + +import type { + IContainerConfig, + ILoadedModel, + TContainerType, +} from '../interfaces/container.ts'; +import type { + IChatCompletionRequest, + IChatCompletionResponse, + IChatCompletionChoice, + IChatMessage, +} from '../interfaces/api.ts'; +import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts'; +import { logger } from '../logger.ts'; +import { BaseContainer, type TModelPullProgress } from './base-container.ts'; + +/** + * Ollama API response types + */ +interface IOllamaTagsResponse { + models: Array<{ + name: string; + size: number; + digest: string; + modified_at: string; + }>; +} + +interface IOllamaChatRequest { + model: string; + messages: Array<{ + role: string; + content: string; + }>; + stream?: boolean; + options?: { + temperature?: number; + top_p?: number; + num_predict?: number; + stop?: string[]; + }; +} + +interface IOllamaChatResponse { + model: string; + created_at: string; + message: { + role: string; + content: string; + }; + done: boolean; + total_duration?: number; + load_duration?: number; + prompt_eval_count?: number; + eval_count?: number; +} + +interface IOllamaPullResponse { + status: string; + digest?: string; + total?: number; + completed?: number; +} + +/** + * Ollama container implementation + */ +export class OllamaContainer extends BaseContainer { + public readonly type: TContainerType = 'ollama'; + public readonly displayName = 'Ollama'; + public readonly defaultImage = CONTAINER_IMAGES.OLLAMA; + public readonly defaultPort = CONTAINER_PORTS.OLLAMA; + + constructor(config: IContainerConfig) { + super(config); + + // Set defaults if not provided + if (!config.image) { + config.image = this.defaultImage; + } + if (!config.port) { + config.port = this.defaultPort; + } + + // Add default volume for model storage + if (!config.volumes || config.volumes.length === 0) { + config.volumes = [`modelgrid-ollama-${config.id}:/root/.ollama`]; + } + } + + /** + * Create Ollama container configuration + */ + public static createConfig( + id: string, + name: string, + gpuIds: string[], + options: Partial = {}, + ): IContainerConfig { + return { + id, + name, + type: 'ollama', + image: options.image || CONTAINER_IMAGES.OLLAMA, + gpuIds, + port: options.port || CONTAINER_PORTS.OLLAMA, + externalPort: options.externalPort, + models: options.models || [], + env: options.env, + volumes: options.volumes || [`modelgrid-ollama-${id}:/root/.ollama`], + autoStart: options.autoStart ?? true, + restartPolicy: options.restartPolicy || 'unless-stopped', + memoryLimit: options.memoryLimit, + cpuLimit: options.cpuLimit, + command: options.command, + }; + } + + /** + * Check if Ollama is healthy + */ + public async isHealthy(): Promise { + try { + const response = await this.fetch('/api/tags', { timeout: 5000 }); + return response.ok; + } catch { + return false; + } + } + + /** + * List available models + */ + public async listModels(): Promise { + try { + const data = await this.fetchJson('/api/tags'); + return (data.models || []).map((m) => m.name); + } catch (error) { + logger.warn(`Failed to list Ollama models: ${error instanceof Error ? error.message : String(error)}`); + return []; + } + } + + /** + * Get loaded models with details + */ + public async getLoadedModels(): Promise { + try { + const data = await this.fetchJson('/api/tags'); + return (data.models || []).map((m) => ({ + name: m.name, + size: m.size, + format: m.digest.substring(0, 12), + loaded: true, // Ollama doesn't distinguish loaded vs available + requestCount: 0, + })); + } catch { + return []; + } + } + + /** + * Pull a model + */ + public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise { + try { + logger.info(`Pulling model: ${modelName}`); + + const response = await this.fetch('/api/pull', { + method: 'POST', + body: { name: modelName }, + timeout: 3600000, // 1 hour for large models + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + + // Read streaming response + const reader = response.body?.getReader(); + if (!reader) { + throw new Error('No response body'); + } + + const decoder = new TextDecoder(); + let lastStatus = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const text = decoder.decode(value); + const lines = text.split('\n').filter((l) => l.trim()); + + for (const line of lines) { + try { + const data = JSON.parse(line) as IOllamaPullResponse; + const status = data.status; + + if (status !== lastStatus) { + lastStatus = status; + let percent: number | undefined; + + if (data.total && data.completed) { + percent = Math.round((data.completed / data.total) * 100); + } + + if (onProgress) { + onProgress({ model: modelName, status, percent }); + } else { + const progressStr = percent !== undefined ? ` (${percent}%)` : ''; + logger.dim(` ${status}${progressStr}`); + } + } + } catch { + // Invalid JSON line, skip + } + } + } + + logger.success(`Model ${modelName} pulled successfully`); + return true; + } catch (error) { + logger.error(`Failed to pull model ${modelName}: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Remove a model + */ + public async removeModel(modelName: string): Promise { + try { + const response = await this.fetch('/api/delete', { + method: 'DELETE', + body: { name: modelName }, + }); + + if (response.ok) { + logger.success(`Model ${modelName} removed`); + return true; + } + + throw new Error(`HTTP ${response.status}`); + } catch (error) { + logger.error(`Failed to remove model ${modelName}: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Send a chat completion request + */ + public async chatCompletion(request: IChatCompletionRequest): Promise { + const ollamaRequest: IOllamaChatRequest = { + model: request.model, + messages: request.messages.map((m) => ({ + role: m.role, + content: m.content, + })), + stream: false, + options: { + temperature: request.temperature, + top_p: request.top_p, + num_predict: request.max_tokens, + stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined, + }, + }; + + const response = await this.fetchJson('/api/chat', { + method: 'POST', + body: ollamaRequest, + timeout: 300000, // 5 minutes + }); + + // Convert to OpenAI format + const created = Math.floor(Date.now() / 1000); + + const choice: IChatCompletionChoice = { + index: 0, + message: { + role: 'assistant', + content: response.message.content, + }, + finish_reason: response.done ? 'stop' : null, + }; + + return { + id: this.generateRequestId(), + object: 'chat.completion', + created, + model: request.model, + choices: [choice], + usage: { + prompt_tokens: response.prompt_eval_count || 0, + completion_tokens: response.eval_count || 0, + total_tokens: (response.prompt_eval_count || 0) + (response.eval_count || 0), + }, + }; + } + + /** + * Stream a chat completion request + */ + public async chatCompletionStream( + request: IChatCompletionRequest, + onChunk: (chunk: string) => void, + ): Promise { + const ollamaRequest: IOllamaChatRequest = { + model: request.model, + messages: request.messages.map((m) => ({ + role: m.role, + content: m.content, + })), + stream: true, + options: { + temperature: request.temperature, + top_p: request.top_p, + num_predict: request.max_tokens, + stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined, + }, + }; + + const response = await this.fetch('/api/chat', { + method: 'POST', + body: ollamaRequest, + timeout: 300000, + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + + const reader = response.body?.getReader(); + if (!reader) { + throw new Error('No response body'); + } + + const decoder = new TextDecoder(); + const requestId = this.generateRequestId(); + const created = Math.floor(Date.now() / 1000); + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const text = decoder.decode(value); + const lines = text.split('\n').filter((l) => l.trim()); + + for (const line of lines) { + try { + const data = JSON.parse(line) as IOllamaChatResponse; + + // Convert to OpenAI streaming format + const chunk = { + id: requestId, + object: 'chat.completion.chunk', + created, + model: request.model, + choices: [ + { + index: 0, + delta: { + content: data.message.content, + } as Partial, + finish_reason: data.done ? 'stop' : null, + }, + ], + }; + + onChunk(`data: ${JSON.stringify(chunk)}\n\n`); + + if (data.done) { + onChunk('data: [DONE]\n\n'); + } + } catch { + // Invalid JSON, skip + } + } + } + } +} diff --git a/ts/containers/tgi.ts b/ts/containers/tgi.ts new file mode 100644 index 0000000..8dc6f21 --- /dev/null +++ b/ts/containers/tgi.ts @@ -0,0 +1,417 @@ +/** + * TGI Container (Text Generation Inference) + * + * Manages HuggingFace Text Generation Inference containers. + */ + +import type { + IContainerConfig, + ILoadedModel, + TContainerType, +} from '../interfaces/container.ts'; +import type { + IChatCompletionRequest, + IChatCompletionResponse, + IChatCompletionChoice, + IChatMessage, +} from '../interfaces/api.ts'; +import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts'; +import { logger } from '../logger.ts'; +import { BaseContainer, type TModelPullProgress } from './base-container.ts'; + +/** + * TGI info response + */ +interface ITgiInfoResponse { + model_id: string; + model_sha: string; + model_dtype: string; + model_device_type: string; + max_concurrent_requests: number; + max_best_of: number; + max_stop_sequences: number; + max_input_length: number; + max_total_tokens: number; + version: string; +} + +/** + * TGI generate request + */ +interface ITgiGenerateRequest { + inputs: string; + parameters?: { + temperature?: number; + top_p?: number; + max_new_tokens?: number; + stop?: string[]; + do_sample?: boolean; + return_full_text?: boolean; + }; +} + +/** + * TGI generate response + */ +interface ITgiGenerateResponse { + generated_text: string; + details?: { + finish_reason: string; + generated_tokens: number; + seed?: number; + }; +} + +/** + * TGI container implementation + * + * TGI is optimized for: + * - Production deployments + * - Flash Attention support + * - Quantization (bitsandbytes, GPTQ, AWQ) + * - Multiple GPU support with tensor parallelism + */ +export class TgiContainer extends BaseContainer { + public readonly type: TContainerType = 'tgi'; + public readonly displayName = 'TGI'; + public readonly defaultImage = CONTAINER_IMAGES.TGI; + public readonly defaultPort = CONTAINER_PORTS.TGI; + + constructor(config: IContainerConfig) { + super(config); + + // Set defaults if not provided + if (!config.image) { + config.image = this.defaultImage; + } + if (!config.port) { + config.port = this.defaultPort; + } + + // Add default volume for model cache + if (!config.volumes || config.volumes.length === 0) { + config.volumes = [`modelgrid-tgi-${config.id}:/data`]; + } + } + + /** + * Create TGI container configuration + */ + public static createConfig( + id: string, + name: string, + modelName: string, + gpuIds: string[], + options: Partial = {}, + ): IContainerConfig { + const env: Record = { + MODEL_ID: modelName, + PORT: String(options.port || CONTAINER_PORTS.TGI), + HUGGING_FACE_HUB_TOKEN: options.env?.HF_TOKEN || options.env?.HUGGING_FACE_HUB_TOKEN || '', + ...options.env, + }; + + // Add GPU configuration + if (gpuIds.length > 1) { + env.NUM_SHARD = String(gpuIds.length); + } + + // Add quantization if specified + if (options.env?.QUANTIZE) { + env.QUANTIZE = options.env.QUANTIZE; + } + + return { + id, + name, + type: 'tgi', + image: options.image || CONTAINER_IMAGES.TGI, + gpuIds, + port: options.port || CONTAINER_PORTS.TGI, + externalPort: options.externalPort, + models: [modelName], + env, + volumes: options.volumes || [`modelgrid-tgi-${id}:/data`], + autoStart: options.autoStart ?? true, + restartPolicy: options.restartPolicy || 'unless-stopped', + memoryLimit: options.memoryLimit, + cpuLimit: options.cpuLimit, + command: options.command, + }; + } + + /** + * Check if TGI is healthy + */ + public async isHealthy(): Promise { + try { + const response = await this.fetch('/health', { timeout: 5000 }); + return response.ok; + } catch { + return false; + } + } + + /** + * List available models + * TGI serves a single model per instance + */ + public async listModels(): Promise { + try { + const info = await this.fetchJson('/info'); + return [info.model_id]; + } catch (error) { + logger.warn(`Failed to get TGI info: ${error instanceof Error ? error.message : String(error)}`); + return this.config.models || []; + } + } + + /** + * Get loaded models with details + */ + public async getLoadedModels(): Promise { + try { + const info = await this.fetchJson('/info'); + return [{ + name: info.model_id, + size: 0, // TGI doesn't expose model size + format: info.model_dtype, + loaded: true, + requestCount: 0, + }]; + } catch { + return this.config.models.map((name) => ({ + name, + size: 0, + loaded: true, + requestCount: 0, + })); + } + } + + /** + * Pull a model + * TGI downloads models automatically at startup + */ + public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise { + logger.info(`TGI downloads models at startup. Model: ${modelName}`); + logger.info('To use a different model, create a new TGI container.'); + + if (onProgress) { + onProgress({ + model: modelName, + status: 'TGI models are loaded at container startup', + percent: 100, + }); + } + + return true; + } + + /** + * Remove a model + * TGI serves a single model per instance + */ + public async removeModel(modelName: string): Promise { + logger.info(`TGI serves a single model per instance.`); + logger.info(`To remove model ${modelName}, stop and remove this container.`); + return true; + } + + /** + * Send a chat completion request + * Convert OpenAI format to TGI format + */ + public async chatCompletion(request: IChatCompletionRequest): Promise { + // Convert messages to TGI prompt format + const prompt = this.messagesToPrompt(request.messages); + + const tgiRequest: ITgiGenerateRequest = { + inputs: prompt, + parameters: { + temperature: request.temperature, + top_p: request.top_p, + max_new_tokens: request.max_tokens || 1024, + stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined, + do_sample: (request.temperature || 0) > 0, + return_full_text: false, + }, + }; + + const response = await this.fetchJson('/generate', { + method: 'POST', + body: tgiRequest, + timeout: 300000, // 5 minutes + }); + + // Convert to OpenAI format + const created = Math.floor(Date.now() / 1000); + + const choice: IChatCompletionChoice = { + index: 0, + message: { + role: 'assistant', + content: response.generated_text, + }, + finish_reason: response.details?.finish_reason === 'eos_token' ? 'stop' : 'length', + }; + + return { + id: this.generateRequestId(), + object: 'chat.completion', + created, + model: this.config.models[0] || 'unknown', + choices: [choice], + usage: { + prompt_tokens: 0, // TGI doesn't always report this + completion_tokens: response.details?.generated_tokens || 0, + total_tokens: response.details?.generated_tokens || 0, + }, + }; + } + + /** + * Stream a chat completion request + */ + public async chatCompletionStream( + request: IChatCompletionRequest, + onChunk: (chunk: string) => void, + ): Promise { + // Convert messages to TGI prompt format + const prompt = this.messagesToPrompt(request.messages); + + const response = await this.fetch('/generate_stream', { + method: 'POST', + body: { + inputs: prompt, + parameters: { + temperature: request.temperature, + top_p: request.top_p, + max_new_tokens: request.max_tokens || 1024, + stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined, + do_sample: (request.temperature || 0) > 0, + }, + }, + timeout: 300000, + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`HTTP ${response.status}: ${error}`); + } + + const reader = response.body?.getReader(); + if (!reader) { + throw new Error('No response body'); + } + + const decoder = new TextDecoder(); + const requestId = this.generateRequestId(); + const created = Math.floor(Date.now() / 1000); + const model = this.config.models[0] || 'unknown'; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const text = decoder.decode(value); + const lines = text.split('\n').filter((l) => l.startsWith('data:')); + + for (const line of lines) { + try { + const jsonStr = line.substring(5).trim(); + if (jsonStr === '[DONE]') { + onChunk('data: [DONE]\n\n'); + continue; + } + + const data = JSON.parse(jsonStr); + + // Convert to OpenAI streaming format + const chunk = { + id: requestId, + object: 'chat.completion.chunk', + created, + model, + choices: [ + { + index: 0, + delta: { + content: data.token?.text || '', + } as Partial, + finish_reason: data.details?.finish_reason ? 'stop' : null, + }, + ], + }; + + onChunk(`data: ${JSON.stringify(chunk)}\n\n`); + } catch { + // Invalid JSON, skip + } + } + } + } + + /** + * Convert chat messages to TGI prompt format + */ + private messagesToPrompt(messages: IChatMessage[]): string { + // Use a simple chat template + // TGI can use model-specific templates via the Messages API + let prompt = ''; + + for (const message of messages) { + switch (message.role) { + case 'system': + prompt += `System: ${message.content}\n\n`; + break; + case 'user': + prompt += `User: ${message.content}\n\n`; + break; + case 'assistant': + prompt += `Assistant: ${message.content}\n\n`; + break; + } + } + + prompt += 'Assistant:'; + return prompt; + } + + /** + * Get TGI server info + */ + public async getInfo(): Promise { + try { + return await this.fetchJson('/info'); + } catch { + return null; + } + } + + /** + * Get TGI metrics + */ + public async getMetrics(): Promise> { + try { + const response = await this.fetch('/metrics', { timeout: 5000 }); + if (response.ok) { + const text = await response.text(); + // Parse Prometheus metrics + const metrics: Record = {}; + const lines = text.split('\n'); + for (const line of lines) { + if (line.startsWith('#') || !line.trim()) continue; + const match = line.match(/^(\w+)(?:\{[^}]*\})?\s+([\d.e+-]+)/); + if (match) { + metrics[match[1]] = parseFloat(match[2]); + } + } + return metrics; + } + } catch { + // Metrics endpoint may not be available + } + return {}; + } +} diff --git a/ts/containers/vllm.ts b/ts/containers/vllm.ts new file mode 100644 index 0000000..24d9041 --- /dev/null +++ b/ts/containers/vllm.ts @@ -0,0 +1,272 @@ +/** + * vLLM Container + * + * Manages vLLM containers for high-performance LLM inference. + */ + +import type { + IContainerConfig, + ILoadedModel, + TContainerType, +} from '../interfaces/container.ts'; +import type { + IChatCompletionRequest, + IChatCompletionResponse, + IChatMessage, +} from '../interfaces/api.ts'; +import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts'; +import { logger } from '../logger.ts'; +import { BaseContainer, type TModelPullProgress } from './base-container.ts'; + +/** + * vLLM model info response + */ +interface IVllmModelsResponse { + object: 'list'; + data: Array<{ + id: string; + object: 'model'; + created: number; + owned_by: string; + }>; +} + +/** + * vLLM container implementation + * + * vLLM serves a single model per instance and is optimized for: + * - High throughput with PagedAttention + * - Continuous batching + * - OpenAI-compatible API + */ +export class VllmContainer extends BaseContainer { + public readonly type: TContainerType = 'vllm'; + public readonly displayName = 'vLLM'; + public readonly defaultImage = CONTAINER_IMAGES.VLLM; + public readonly defaultPort = CONTAINER_PORTS.VLLM; + + constructor(config: IContainerConfig) { + super(config); + + // Set defaults if not provided + if (!config.image) { + config.image = this.defaultImage; + } + if (!config.port) { + config.port = this.defaultPort; + } + + // Add default volume for model cache + if (!config.volumes || config.volumes.length === 0) { + config.volumes = [`modelgrid-vllm-${config.id}:/root/.cache/huggingface`]; + } + } + + /** + * Create vLLM container configuration + */ + public static createConfig( + id: string, + name: string, + modelName: string, + gpuIds: string[], + options: Partial = {}, + ): IContainerConfig { + // vLLM requires model to be specified at startup + const command = [ + '--model', modelName, + '--host', '0.0.0.0', + '--port', String(options.port || CONTAINER_PORTS.VLLM), + ]; + + // Add tensor parallelism if multiple GPUs + if (gpuIds.length > 1) { + command.push('--tensor-parallel-size', String(gpuIds.length)); + } + + // Add additional options + if (options.env?.VLLM_MAX_MODEL_LEN) { + command.push('--max-model-len', options.env.VLLM_MAX_MODEL_LEN); + } + + return { + id, + name, + type: 'vllm', + image: options.image || CONTAINER_IMAGES.VLLM, + gpuIds, + port: options.port || CONTAINER_PORTS.VLLM, + externalPort: options.externalPort, + models: [modelName], + env: { + HF_TOKEN: options.env?.HF_TOKEN || '', + ...options.env, + }, + volumes: options.volumes || [`modelgrid-vllm-${id}:/root/.cache/huggingface`], + autoStart: options.autoStart ?? true, + restartPolicy: options.restartPolicy || 'unless-stopped', + memoryLimit: options.memoryLimit, + cpuLimit: options.cpuLimit, + command, + }; + } + + /** + * Check if vLLM is healthy + */ + public async isHealthy(): Promise { + try { + const response = await this.fetch('/health', { timeout: 5000 }); + return response.ok; + } catch { + return false; + } + } + + /** + * List available models + * vLLM serves a single model per instance + */ + public async listModels(): Promise { + try { + const data = await this.fetchJson('/v1/models'); + return (data.data || []).map((m) => m.id); + } catch (error) { + logger.warn(`Failed to list vLLM models: ${error instanceof Error ? error.message : String(error)}`); + return this.config.models || []; + } + } + + /** + * Get loaded models with details + */ + public async getLoadedModels(): Promise { + try { + const data = await this.fetchJson('/v1/models'); + return (data.data || []).map((m) => ({ + name: m.id, + size: 0, // vLLM doesn't expose size + loaded: true, + requestCount: 0, + })); + } catch { + // Return configured model as fallback + return this.config.models.map((name) => ({ + name, + size: 0, + loaded: true, + requestCount: 0, + })); + } + } + + /** + * Pull a model + * vLLM downloads models automatically at startup + * This method is a no-op - models are configured at container creation + */ + public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise { + logger.info(`vLLM downloads models at startup. Model: ${modelName}`); + logger.info('To use a different model, create a new vLLM container.'); + + if (onProgress) { + onProgress({ + model: modelName, + status: 'vLLM models are loaded at container startup', + percent: 100, + }); + } + + return true; + } + + /** + * Remove a model + * vLLM serves a single model per instance + */ + public async removeModel(modelName: string): Promise { + logger.info(`vLLM serves a single model per instance.`); + logger.info(`To remove model ${modelName}, stop and remove this container.`); + return true; + } + + /** + * Send a chat completion request + * vLLM is OpenAI-compatible + */ + public async chatCompletion(request: IChatCompletionRequest): Promise { + return this.fetchJson('/v1/chat/completions', { + method: 'POST', + body: { + ...request, + stream: false, + }, + timeout: 300000, // 5 minutes + }); + } + + /** + * Stream a chat completion request + * vLLM is OpenAI-compatible + */ + public async chatCompletionStream( + request: IChatCompletionRequest, + onChunk: (chunk: string) => void, + ): Promise { + const response = await this.fetch('/v1/chat/completions', { + method: 'POST', + body: { + ...request, + stream: true, + }, + timeout: 300000, + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`HTTP ${response.status}: ${error}`); + } + + const reader = response.body?.getReader(); + if (!reader) { + throw new Error('No response body'); + } + + const decoder = new TextDecoder(); + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const text = decoder.decode(value); + // vLLM already sends data in SSE format + onChunk(text); + } + } + + /** + * Get vLLM-specific metrics + */ + public async getMetrics(): Promise> { + try { + const response = await this.fetch('/metrics', { timeout: 5000 }); + if (response.ok) { + const text = await response.text(); + // Parse Prometheus metrics + const metrics: Record = {}; + const lines = text.split('\n'); + for (const line of lines) { + if (line.startsWith('#') || !line.trim()) continue; + const match = line.match(/^(\w+)(?:\{[^}]*\})?\s+([\d.e+-]+)/); + if (match) { + metrics[match[1]] = parseFloat(match[2]); + } + } + return metrics; + } + } catch { + // Metrics endpoint may not be enabled + } + return {}; + } +} diff --git a/ts/daemon.ts b/ts/daemon.ts new file mode 100644 index 0000000..0563162 --- /dev/null +++ b/ts/daemon.ts @@ -0,0 +1,268 @@ +/** + * ModelGrid Daemon + * + * Background process for managing containers and serving the API. + */ + +import process from 'node:process'; +import { logger } from './logger.ts'; +import { TIMING } from './constants.ts'; +import type { ModelGrid } from './modelgrid.ts'; +import { ApiServer } from './api/server.ts'; +import type { IModelGridConfig } from './interfaces/config.ts'; + +/** + * ModelGrid Daemon + */ +export class Daemon { + private modelgrid: ModelGrid; + private isRunning: boolean = false; + private apiServer?: ApiServer; + + constructor(modelgrid: ModelGrid) { + this.modelgrid = modelgrid; + } + + /** + * Start the daemon + */ + public async start(): Promise { + if (this.isRunning) { + logger.warn('Daemon is already running'); + return; + } + + logger.log('Starting ModelGrid daemon...'); + + try { + // Initialize ModelGrid + await this.modelgrid.initialize(); + + const config = this.modelgrid.getConfig(); + if (!config) { + throw new Error('Failed to load configuration'); + } + + this.logConfigLoaded(config); + + // Start API server + await this.startApiServer(config); + + // Start containers + await this.startContainers(); + + // Preload models if configured + await this.preloadModels(config); + + // Setup signal handlers + this.setupSignalHandlers(); + + this.isRunning = true; + + // Start monitoring loop + await this.monitor(); + } catch (error) { + this.isRunning = false; + logger.error(`Daemon failed to start: ${error instanceof Error ? error.message : String(error)}`); + process.exit(1); + } + } + + /** + * Stop the daemon + */ + public async stop(): Promise { + if (!this.isRunning) { + return; + } + + logger.log('Stopping ModelGrid daemon...'); + + this.isRunning = false; + + // Stop API server + if (this.apiServer) { + await this.apiServer.stop(); + } + + // Shutdown ModelGrid (stops containers) + await this.modelgrid.shutdown(); + + logger.success('ModelGrid daemon stopped'); + } + + /** + * Start the API server + */ + private async startApiServer(config: IModelGridConfig): Promise { + logger.info('Starting API server...'); + + this.apiServer = new ApiServer( + config.api, + this.modelgrid.getContainerManager(), + this.modelgrid.getModelRegistry(), + ); + + await this.apiServer.start(); + } + + /** + * Start configured containers + */ + private async startContainers(): Promise { + logger.info('Starting containers...'); + + const containerManager = this.modelgrid.getContainerManager(); + await containerManager.startAll(); + + // Wait for containers to be healthy + logger.dim('Waiting for containers to become healthy...'); + await this.waitForContainersHealthy(); + } + + /** + * Wait for all containers to report healthy + */ + private async waitForContainersHealthy(timeout: number = 60000): Promise { + const startTime = Date.now(); + const containerManager = this.modelgrid.getContainerManager(); + + while (Date.now() - startTime < timeout) { + const allHealthy = await containerManager.checkAllHealth(); + + if (allHealthy) { + logger.success('All containers are healthy'); + return; + } + + await this.sleep(5000); + } + + logger.warn('Timeout waiting for containers to become healthy'); + } + + /** + * Preload configured models + */ + private async preloadModels(config: IModelGridConfig): Promise { + if (!config.models.autoLoad || config.models.autoLoad.length === 0) { + return; + } + + logger.info(`Preloading ${config.models.autoLoad.length} model(s)...`); + + const modelLoader = this.modelgrid.getModelLoader(); + const results = await modelLoader.preloadModels(config.models.autoLoad); + + let loaded = 0; + let failed = 0; + + for (const [name, result] of results) { + if (result.success) { + loaded++; + logger.dim(` ✓ ${name}`); + } else { + failed++; + logger.warn(` ✗ ${name}: ${result.error}`); + } + } + + if (failed > 0) { + logger.warn(`Preloaded ${loaded}/${config.models.autoLoad.length} models (${failed} failed)`); + } else { + logger.success(`Preloaded ${loaded} model(s)`); + } + } + + /** + * Setup signal handlers for graceful shutdown + */ + private setupSignalHandlers(): void { + const shutdown = async () => { + logger.log(''); + logger.log('Received shutdown signal'); + await this.stop(); + process.exit(0); + }; + + process.on('SIGINT', shutdown); + process.on('SIGTERM', shutdown); + } + + /** + * Main monitoring loop + */ + private async monitor(): Promise { + logger.log('Starting monitoring loop...'); + + const config = this.modelgrid.getConfig(); + const checkInterval = config?.checkInterval || TIMING.CHECK_INTERVAL_MS; + + while (this.isRunning) { + try { + // Check container health + await this.checkContainerHealth(); + + // Log periodic status + this.logPeriodicStatus(); + + await this.sleep(checkInterval); + } catch (error) { + logger.error(`Monitor error: ${error instanceof Error ? error.message : String(error)}`); + await this.sleep(checkInterval); + } + } + } + + /** + * Check health of all containers + */ + private async checkContainerHealth(): Promise { + const containerManager = this.modelgrid.getContainerManager(); + const statuses = await containerManager.getAllStatus(); + + for (const [id, status] of statuses) { + if (status.running && status.health === 'unhealthy') { + logger.warn(`Container ${id} is unhealthy, attempting restart...`); + + const container = containerManager.getContainer(id); + if (container) { + await container.restart(); + } + } + } + } + + /** + * Log periodic status + */ + private logPeriodicStatus(): void { + if (this.apiServer) { + const info = this.apiServer.getInfo(); + if (info.running) { + logger.dim(`API server running on ${info.host}:${info.port} (uptime: ${info.uptime}s)`); + } + } + } + + /** + * Log configuration loaded message + */ + private logConfigLoaded(config: IModelGridConfig): void { + logger.log(''); + logger.logBoxTitle('Configuration Loaded', 60, 'success'); + logger.logBoxLine(`API Port: ${config.api.port}`); + logger.logBoxLine(`Containers: ${config.containers.length}`); + logger.logBoxLine(`Auto-pull: ${config.models.autoPull ? 'Enabled' : 'Disabled'}`); + logger.logBoxLine(`Check Interval: ${config.checkInterval / 1000}s`); + logger.logBoxEnd(); + logger.log(''); + } + + /** + * Sleep for specified milliseconds + */ + private sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); + } +} diff --git a/ts/docker/container-runtime.ts b/ts/docker/container-runtime.ts new file mode 100644 index 0000000..c3c1982 --- /dev/null +++ b/ts/docker/container-runtime.ts @@ -0,0 +1,558 @@ +/** + * Container Runtime + * + * Manages individual Docker containers for AI model serving. + */ + +import { exec, spawn } from 'node:child_process'; +import { promisify } from 'node:util'; +import type { + IContainerConfig, + IContainerStatus, + TContainerHealth, + TContainerRunStatus, +} from '../interfaces/container.ts'; +import { logger } from '../logger.ts'; +import { DOCKER, TIMING } from '../constants.ts'; +import { DriverManager } from '../drivers/driver-manager.ts'; + +const execAsync = promisify(exec); + +/** + * Container runtime execution result + */ +export interface IContainerExecResult { + success: boolean; + output?: string; + error?: string; + exitCode?: number; +} + +/** + * Container logs options + */ +export interface ILogsOptions { + lines?: number; + follow?: boolean; + timestamps?: boolean; +} + +/** + * Container Runtime class - manages individual containers + */ +export class ContainerRuntime { + private driverManager: DriverManager; + + constructor() { + this.driverManager = new DriverManager(); + } + + /** + * Start a container with the given configuration + */ + public async startContainer(config: IContainerConfig): Promise { + const containerName = `modelgrid-${config.id}`; + + // Check if container already exists + const existingId = await this.getContainerIdByName(containerName); + if (existingId) { + // Check if it's running + const isRunning = await this.isContainerRunning(existingId); + if (isRunning) { + logger.dim(`Container ${containerName} is already running`); + return true; + } + + // Start existing container + try { + await execAsync(`docker start ${existingId}`, { + timeout: TIMING.CONTAINER_STARTUP_TIMEOUT_MS, + }); + logger.success(`Started existing container: ${containerName}`); + return true; + } catch (error) { + logger.error(`Failed to start existing container: ${error instanceof Error ? error.message : String(error)}`); + // Try to remove and recreate + await this.removeContainer(config.id); + } + } + + // Build docker run command + const args = await this.buildRunArgs(config); + const cmd = `docker run ${args.join(' ')}`; + + logger.info(`Starting container: ${containerName}`); + logger.dim(`Command: ${cmd}`); + + try { + await execAsync(cmd, { timeout: TIMING.CONTAINER_STARTUP_TIMEOUT_MS }); + logger.success(`Container ${containerName} started`); + + // Wait for container to be healthy + await this.waitForHealth(containerName); + + return true; + } catch (error) { + logger.error(`Failed to start container: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Stop a container + */ + public async stopContainer(containerId: string, timeout: number = 30): Promise { + const containerName = `modelgrid-${containerId}`; + + try { + const dockerId = await this.getContainerIdByName(containerName); + if (!dockerId) { + logger.dim(`Container ${containerName} not found`); + return true; + } + + logger.info(`Stopping container: ${containerName}`); + await execAsync(`docker stop -t ${timeout} ${dockerId}`, { + timeout: (timeout + 10) * 1000, + }); + logger.success(`Container ${containerName} stopped`); + return true; + } catch (error) { + logger.error(`Failed to stop container: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Remove a container + */ + public async removeContainer(containerId: string, force: boolean = true): Promise { + const containerName = `modelgrid-${containerId}`; + + try { + const dockerId = await this.getContainerIdByName(containerName); + if (!dockerId) { + return true; + } + + const forceFlag = force ? '-f' : ''; + await execAsync(`docker rm ${forceFlag} ${dockerId}`, { timeout: 30000 }); + logger.success(`Container ${containerName} removed`); + return true; + } catch (error) { + logger.error(`Failed to remove container: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Restart a container + */ + public async restartContainer(containerId: string): Promise { + const containerName = `modelgrid-${containerId}`; + + try { + const dockerId = await this.getContainerIdByName(containerName); + if (!dockerId) { + logger.error(`Container ${containerName} not found`); + return false; + } + + await execAsync(`docker restart ${dockerId}`, { + timeout: TIMING.CONTAINER_STARTUP_TIMEOUT_MS, + }); + logger.success(`Container ${containerName} restarted`); + return true; + } catch (error) { + logger.error(`Failed to restart container: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Get container status + */ + public async getContainerStatus(config: IContainerConfig): Promise { + const containerName = `modelgrid-${config.id}`; + + const status: IContainerStatus = { + id: config.id, + name: config.name, + type: config.type, + running: false, + runStatus: 'stopped', + health: 'unknown', + loadedModels: [], + assignedGpus: config.gpuIds, + endpoint: `http://localhost:${config.externalPort || config.port}`, + }; + + try { + const dockerId = await this.getContainerIdByName(containerName); + if (!dockerId) { + return status; + } + + status.dockerId = dockerId; + + // Get container info + const { stdout } = await execAsync( + `docker inspect --format='{{json .}}' ${dockerId}`, + { timeout: 5000 }, + ); + + const info = JSON.parse(stdout); + + // Get run status + status.running = info.State.Running === true; + if (info.State.Running) { + status.runStatus = 'running'; + } else if (info.State.Restarting) { + status.runStatus = 'starting'; + } else if (info.State.ExitCode !== 0) { + status.runStatus = 'error'; + status.lastError = info.State.Error || `Exit code: ${info.State.ExitCode}`; + } else { + status.runStatus = 'stopped'; + } + + // Get health status + if (info.State.Health) { + status.health = info.State.Health.Status as TContainerHealth; + if (info.State.Health.Log && info.State.Health.Log.length > 0) { + const lastLog = info.State.Health.Log[info.State.Health.Log.length - 1]; + if (lastLog.Output) { + status.healthMessage = lastLog.Output.substring(0, 200); + } + } + } + + // Get uptime + if (info.State.StartedAt) { + const startTime = new Date(info.State.StartedAt).getTime(); + status.startTime = startTime; + if (status.running) { + status.uptime = Math.floor((Date.now() - startTime) / 1000); + } + } + + // Try to get loaded models from container + if (status.running) { + status.loadedModels = await this.getLoadedModels(config); + } + + // Get resource usage + const stats = await this.getContainerStats(dockerId); + if (stats) { + status.memoryUsage = stats.memoryUsage; + status.cpuUsage = stats.cpuUsage; + } + } catch (error) { + logger.dim(`Error getting container status: ${error instanceof Error ? error.message : String(error)}`); + } + + return status; + } + + /** + * Get container resource stats + */ + private async getContainerStats( + dockerId: string, + ): Promise<{ memoryUsage: number; cpuUsage: number } | null> { + try { + const { stdout } = await execAsync( + `docker stats ${dockerId} --no-stream --format "{{.MemUsage}},{{.CPUPerc}}"`, + { timeout: 5000 }, + ); + + const [memStr, cpuStr] = stdout.trim().split(','); + + // Parse memory (e.g., "1.5GiB / 16GiB") + const memMatch = memStr.match(/([\d.]+)(MiB|GiB)/i); + let memoryUsage = 0; + if (memMatch) { + memoryUsage = parseFloat(memMatch[1]); + if (memMatch[2].toLowerCase() === 'gib') { + memoryUsage *= 1024; + } + } + + // Parse CPU (e.g., "25.50%") + const cpuUsage = parseFloat(cpuStr.replace('%', '')) || 0; + + return { memoryUsage: Math.round(memoryUsage), cpuUsage }; + } catch { + return null; + } + } + + /** + * Get loaded models from a container + */ + private async getLoadedModels(config: IContainerConfig): Promise { + const containerName = `modelgrid-${config.id}`; + + try { + switch (config.type) { + case 'ollama': { + // Query Ollama API for loaded models + const { stdout } = await execAsync( + `docker exec ${containerName} curl -s http://localhost:11434/api/tags`, + { timeout: 5000 }, + ); + const data = JSON.parse(stdout); + return (data.models || []).map((m: { name: string }) => m.name); + } + + case 'vllm': + case 'tgi': { + // These typically serve a single model + return config.models || []; + } + + default: + return []; + } + } catch { + return []; + } + } + + /** + * Execute a command inside a container + */ + public async exec( + containerId: string, + command: string, + timeout: number = 30000, + ): Promise { + const containerName = `modelgrid-${containerId}`; + + try { + const dockerId = await this.getContainerIdByName(containerName); + if (!dockerId) { + return { success: false, error: 'Container not found' }; + } + + const { stdout, stderr } = await execAsync( + `docker exec ${dockerId} ${command}`, + { timeout }, + ); + + return { + success: true, + output: stdout, + error: stderr || undefined, + }; + } catch (error) { + const err = error as { code?: number; stdout?: string; stderr?: string }; + return { + success: false, + output: err.stdout, + error: err.stderr || (error instanceof Error ? error.message : String(error)), + exitCode: err.code, + }; + } + } + + /** + * Get container logs + */ + public async getLogs( + containerId: string, + options: ILogsOptions = {}, + ): Promise { + const containerName = `modelgrid-${containerId}`; + const { lines = 100, timestamps = false } = options; + + try { + const dockerId = await this.getContainerIdByName(containerName); + if (!dockerId) { + return ''; + } + + const args = ['logs']; + if (lines) args.push(`--tail=${lines}`); + if (timestamps) args.push('--timestamps'); + args.push(dockerId); + + const { stdout, stderr } = await execAsync( + `docker ${args.join(' ')}`, + { timeout: 10000 }, + ); + + return stdout + stderr; + } catch (error) { + return error instanceof Error ? error.message : String(error); + } + } + + /** + * Follow container logs (returns a way to stop following) + */ + public followLogs( + containerId: string, + onData: (data: string) => void, + ): { stop: () => void } { + const containerName = `modelgrid-${containerId}`; + + const child = spawn('docker', ['logs', '-f', containerName], { + stdio: ['ignore', 'pipe', 'pipe'], + }); + + child.stdout.on('data', (data) => onData(data.toString())); + child.stderr.on('data', (data) => onData(data.toString())); + + return { + stop: () => { + child.kill(); + }, + }; + } + + /** + * Build docker run arguments + */ + private async buildRunArgs(config: IContainerConfig): Promise { + const containerName = `modelgrid-${config.id}`; + const args: string[] = [ + '-d', // Detached mode + `--name=${containerName}`, + `--network=${DOCKER.DEFAULT_NETWORK}`, + ]; + + // Port mapping + const externalPort = config.externalPort || config.port; + args.push(`-p ${externalPort}:${config.port}`); + + // Restart policy + args.push(`--restart=${config.restartPolicy}`); + + // Memory limit + if (config.memoryLimit) { + args.push(`--memory=${config.memoryLimit}`); + } + + // CPU limit + if (config.cpuLimit) { + args.push(`--cpus=${config.cpuLimit}`); + } + + // GPU support + if (config.gpuIds && config.gpuIds.length > 0) { + const gpuArgs = await this.driverManager.getDockerGpuArgs(config.gpuIds); + args.push(...gpuArgs); + } + + // Environment variables + if (config.env) { + for (const [key, value] of Object.entries(config.env)) { + args.push(`-e ${key}=${value}`); + } + } + + // Volume mounts + if (config.volumes) { + for (const volume of config.volumes) { + args.push(`-v ${volume}`); + } + } + + // Add image + args.push(config.image); + + // Add custom command if provided + if (config.command && config.command.length > 0) { + args.push(...config.command); + } + + return args; + } + + /** + * Get Docker container ID by name + */ + private async getContainerIdByName(name: string): Promise { + try { + const { stdout } = await execAsync( + `docker ps -a --filter "name=^${name}$" --format "{{.ID}}"`, + { timeout: 5000 }, + ); + return stdout.trim() || null; + } catch { + return null; + } + } + + /** + * Check if a container is running + */ + private async isContainerRunning(dockerId: string): Promise { + try { + const { stdout } = await execAsync( + `docker inspect --format='{{.State.Running}}' ${dockerId}`, + { timeout: 5000 }, + ); + return stdout.trim() === 'true'; + } catch { + return false; + } + } + + /** + * Wait for container to be healthy + */ + private async waitForHealth( + containerName: string, + timeout: number = TIMING.CONTAINER_STARTUP_TIMEOUT_MS, + ): Promise { + const startTime = Date.now(); + const checkInterval = 2000; + + while (Date.now() - startTime < timeout) { + try { + const { stdout } = await execAsync( + `docker inspect --format='{{.State.Health.Status}}' ${containerName} 2>/dev/null || echo "none"`, + { timeout: 5000 }, + ); + + const status = stdout.trim(); + + if (status === 'healthy') { + return true; + } + + if (status === 'none') { + // Container has no health check, assume healthy if running + const { stdout: running } = await execAsync( + `docker inspect --format='{{.State.Running}}' ${containerName}`, + { timeout: 5000 }, + ); + if (running.trim() === 'true') { + return true; + } + } + + if (status === 'unhealthy') { + logger.warn(`Container ${containerName} is unhealthy`); + return false; + } + } catch { + // Container might not be ready yet + } + + await this.sleep(checkInterval); + } + + logger.warn(`Timeout waiting for container ${containerName} to be healthy`); + return false; + } + + /** + * Sleep helper + */ + private async sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); + } +} diff --git a/ts/docker/docker-manager.ts b/ts/docker/docker-manager.ts new file mode 100644 index 0000000..8f898b6 --- /dev/null +++ b/ts/docker/docker-manager.ts @@ -0,0 +1,509 @@ +/** + * Docker Manager + * + * Handles Docker installation, configuration, and management. + */ + +import { exec } from 'node:child_process'; +import { promisify } from 'node:util'; +import * as fs from 'node:fs'; +import { logger } from '../logger.ts'; +import { DOCKER, TIMING } from '../constants.ts'; + +const execAsync = promisify(exec); + +/** + * Docker installation result + */ +export interface IDockerInstallResult { + success: boolean; + version?: string; + error?: string; +} + +/** + * Docker status information + */ +export interface IDockerStatus { + installed: boolean; + running: boolean; + version?: string; + runtimes: string[]; + hasNvidiaRuntime: boolean; + networkExists: boolean; + storageDriver?: string; + rootDir?: string; +} + +/** + * Docker Manager class + */ +export class DockerManager { + private networkName: string; + + constructor(networkName: string = DOCKER.DEFAULT_NETWORK) { + this.networkName = networkName; + } + + /** + * Check if Docker is installed + */ + public async isInstalled(): Promise { + try { + const { stdout } = await execAsync('docker --version', { timeout: 5000 }); + return stdout.includes('Docker'); + } catch { + return false; + } + } + + /** + * Check if Docker daemon is running + */ + public async isRunning(): Promise { + try { + await execAsync('docker info', { timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS }); + return true; + } catch { + return false; + } + } + + /** + * Get Docker version + */ + public async getVersion(): Promise { + try { + const { stdout } = await execAsync('docker --version', { timeout: 5000 }); + const match = stdout.match(/Docker version (\d+\.\d+\.\d+)/); + return match ? match[1] : undefined; + } catch { + return undefined; + } + } + + /** + * Get full Docker status + */ + public async getStatus(): Promise { + const status: IDockerStatus = { + installed: false, + running: false, + runtimes: [], + hasNvidiaRuntime: false, + networkExists: false, + }; + + // Check installation + status.installed = await this.isInstalled(); + if (!status.installed) { + return status; + } + + status.version = await this.getVersion(); + + // Check if running + status.running = await this.isRunning(); + if (!status.running) { + return status; + } + + // Get detailed info + try { + const { stdout } = await execAsync('docker info --format json', { + timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS, + }); + + const info = JSON.parse(stdout); + + // Get runtimes + if (info.Runtimes) { + status.runtimes = Object.keys(info.Runtimes); + status.hasNvidiaRuntime = status.runtimes.includes('nvidia'); + } + + status.storageDriver = info.Driver; + status.rootDir = info.DockerRootDir; + } catch { + // Try alternative method for runtimes + try { + const { stdout } = await execAsync('docker info 2>/dev/null | grep -i "runtimes"', { + timeout: 5000, + }); + status.hasNvidiaRuntime = stdout.toLowerCase().includes('nvidia'); + } catch { + // Ignore + } + } + + // Check network exists + status.networkExists = await this.networkExists(); + + return status; + } + + /** + * Install Docker on the system + */ + public async install(): Promise { + try { + // Check if already installed + if (await this.isInstalled()) { + return { + success: true, + version: await this.getVersion(), + }; + } + + // Detect distribution + const distro = await this.getLinuxDistro(); + logger.info(`Installing Docker on ${distro.id}...`); + + if (distro.id === 'ubuntu' || distro.id === 'debian') { + await this.installOnDebian(); + } else if ( + distro.id === 'fedora' || + distro.id === 'rhel' || + distro.id === 'centos' || + distro.id === 'rocky' || + distro.id === 'almalinux' + ) { + await this.installOnRhel(); + } else { + // Use convenience script as fallback + await this.installWithScript(); + } + + // Start Docker service + await this.startService(); + + // Verify installation + const version = await this.getVersion(); + if (version) { + logger.success(`Docker ${version} installed successfully`); + return { success: true, version }; + } + + return { success: false, error: 'Installation completed but Docker not found' }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + }; + } + } + + /** + * Install Docker on Debian/Ubuntu + */ + private async installOnDebian(): Promise { + // Remove old versions + await execAsync( + 'apt-get remove -y docker docker-engine docker.io containerd runc || true', + { timeout: 60000 }, + ); + + // Install prerequisites + await execAsync('apt-get update', { timeout: 120000 }); + await execAsync( + 'DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl gnupg', + { timeout: 120000 }, + ); + + // Add Docker's official GPG key + await execAsync('install -m 0755 -d /etc/apt/keyrings'); + + const distro = await this.getLinuxDistro(); + const isUbuntu = distro.id === 'ubuntu'; + + if (isUbuntu) { + await execAsync( + 'curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg', + ); + await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg'); + + // Get Ubuntu codename + const { stdout: codename } = await execAsync('lsb_release -cs'); + await execAsync( + `echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`, + ); + } else { + await execAsync( + 'curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg', + ); + await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg'); + + const { stdout: codename } = await execAsync('lsb_release -cs'); + await execAsync( + `echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`, + ); + } + + // Install Docker + await execAsync('apt-get update', { timeout: 120000 }); + await execAsync( + 'DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin', + { timeout: 300000 }, + ); + } + + /** + * Install Docker on RHEL/Fedora/CentOS + */ + private async installOnRhel(): Promise { + const distro = await this.getLinuxDistro(); + const isFedora = distro.id === 'fedora'; + + // Remove old versions + await execAsync( + 'dnf remove -y docker docker-client docker-client-latest docker-common docker-latest docker-latest-logrotate docker-logrotate docker-engine || true', + { timeout: 60000 }, + ); + + // Install prerequisites + await execAsync('dnf install -y dnf-plugins-core', { timeout: 120000 }); + + // Add Docker repository + const repoUrl = isFedora + ? 'https://download.docker.com/linux/fedora/docker-ce.repo' + : 'https://download.docker.com/linux/centos/docker-ce.repo'; + + await execAsync(`dnf config-manager --add-repo ${repoUrl}`); + + // Install Docker + await execAsync( + 'dnf install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin', + { timeout: 300000 }, + ); + } + + /** + * Install Docker using convenience script + */ + private async installWithScript(): Promise { + logger.info('Installing Docker using convenience script...'); + await execAsync('curl -fsSL https://get.docker.com | sh', { + timeout: 600000, // 10 minutes + }); + } + + /** + * Start Docker service + */ + public async startService(): Promise { + try { + await execAsync('systemctl start docker'); + await execAsync('systemctl enable docker'); + logger.success('Docker service started and enabled'); + } catch (error) { + logger.warn(`Could not start Docker service: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Stop Docker service + */ + public async stopService(): Promise { + try { + await execAsync('systemctl stop docker'); + logger.success('Docker service stopped'); + } catch (error) { + logger.warn(`Could not stop Docker service: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Restart Docker service + */ + public async restartService(): Promise { + try { + await execAsync('systemctl restart docker'); + logger.success('Docker service restarted'); + } catch (error) { + logger.warn(`Could not restart Docker service: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Check if ModelGrid network exists + */ + public async networkExists(): Promise { + try { + await execAsync(`docker network inspect ${this.networkName}`, { timeout: 5000 }); + return true; + } catch { + return false; + } + } + + /** + * Create the ModelGrid Docker network + */ + public async createNetwork(): Promise { + try { + if (await this.networkExists()) { + logger.dim(`Network '${this.networkName}' already exists`); + return true; + } + + await execAsync(`docker network create ${this.networkName}`, { + timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS, + }); + logger.success(`Created Docker network '${this.networkName}'`); + return true; + } catch (error) { + logger.error(`Failed to create network: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Remove the ModelGrid Docker network + */ + public async removeNetwork(): Promise { + try { + if (!await this.networkExists()) { + return true; + } + + await execAsync(`docker network rm ${this.networkName}`, { + timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS, + }); + logger.success(`Removed Docker network '${this.networkName}'`); + return true; + } catch (error) { + logger.error(`Failed to remove network: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Pull a Docker image + */ + public async pullImage(image: string): Promise { + try { + logger.info(`Pulling image: ${image}`); + await execAsync(`docker pull ${image}`, { + timeout: 600000, // 10 minutes for large images + }); + logger.success(`Pulled image: ${image}`); + return true; + } catch (error) { + logger.error(`Failed to pull image: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Check if an image exists locally + */ + public async imageExists(image: string): Promise { + try { + await execAsync(`docker image inspect ${image}`, { timeout: 5000 }); + return true; + } catch { + return false; + } + } + + /** + * List running containers + */ + public async listContainers(all: boolean = false): Promise { + try { + const flag = all ? '-a' : ''; + const { stdout } = await execAsync( + `docker ps ${flag} --format "{{.ID}}"`, + { timeout: 5000 }, + ); + return stdout.trim().split('\n').filter((id) => id); + } catch { + return []; + } + } + + /** + * Get container by name + */ + public async getContainerByName(name: string): Promise { + try { + const { stdout } = await execAsync( + `docker ps -a --filter "name=${name}" --format "{{.ID}}"`, + { timeout: 5000 }, + ); + const id = stdout.trim(); + return id || null; + } catch { + return null; + } + } + + /** + * Add user to docker group + */ + public async addUserToDockerGroup(username?: string): Promise { + try { + const user = username || process.env.SUDO_USER || process.env.USER || ''; + if (!user) { + logger.warn('Could not determine username for docker group'); + return false; + } + + await execAsync(`usermod -aG docker ${user}`); + logger.success(`Added user '${user}' to docker group`); + logger.info('Log out and log back in for the change to take effect'); + return true; + } catch (error) { + logger.error(`Failed to add user to docker group: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Get Linux distribution info + */ + private async getLinuxDistro(): Promise<{ id: string; version: string }> { + try { + const content = await fs.promises.readFile('/etc/os-release', 'utf8'); + const idMatch = content.match(/^ID=["']?(\w+)["']?$/m); + const versionMatch = content.match(/^VERSION_ID=["']?([\d.]+)["']?$/m); + + return { + id: idMatch ? idMatch[1].toLowerCase() : 'unknown', + version: versionMatch ? versionMatch[1] : '', + }; + } catch { + return { id: 'unknown', version: '' }; + } + } + + /** + * Print Docker status + */ + public async printStatus(): Promise { + const status = await this.getStatus(); + + const lines: string[] = []; + lines.push(`Installed: ${status.installed ? 'Yes' : 'No'}`); + + if (status.installed) { + lines.push(`Version: ${status.version || 'Unknown'}`); + lines.push(`Running: ${status.running ? 'Yes' : 'No'}`); + + if (status.running) { + lines.push(`NVIDIA Runtime: ${status.hasNvidiaRuntime ? 'Yes' : 'No'}`); + lines.push(`ModelGrid Network: ${status.networkExists ? 'Yes' : 'No'}`); + if (status.storageDriver) { + lines.push(`Storage Driver: ${status.storageDriver}`); + } + } + } + + logger.logBox( + 'Docker Status', + lines, + 50, + status.installed && status.running ? 'success' : status.installed ? 'warning' : 'error', + ); + } +} diff --git a/ts/docker/index.ts b/ts/docker/index.ts new file mode 100644 index 0000000..0c910e1 --- /dev/null +++ b/ts/docker/index.ts @@ -0,0 +1,8 @@ +/** + * Docker Management Module + * + * Exports all Docker-related functionality. + */ + +export { DockerManager } from './docker-manager.ts'; +export { ContainerRuntime } from './container-runtime.ts'; diff --git a/ts/drivers/amd.ts b/ts/drivers/amd.ts new file mode 100644 index 0000000..35686ff --- /dev/null +++ b/ts/drivers/amd.ts @@ -0,0 +1,281 @@ +/** + * AMD Driver Management + * + * Handles AMD ROCm driver detection, installation, and container setup. + */ + +import type { IDriverStatus } from '../interfaces/gpu.ts'; +import { logger } from '../logger.ts'; +import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts'; + +/** + * AMD ROCm Driver Manager + */ +export class AmdDriver extends BaseDriver { + public readonly vendor = 'amd' as const; + public readonly displayName = 'AMD ROCm'; + + /** + * Check if AMD ROCm driver is installed + */ + public async isInstalled(): Promise { + try { + const { stdout } = await this.execCommand('rocm-smi --showdriverversion 2>/dev/null | head -1', { + timeout: 5000, + ignoreErrors: true, + }); + return stdout.includes('Driver'); + } catch { + return false; + } + } + + /** + * Get AMD ROCm driver status + */ + public async getStatus(): Promise { + const status: IDriverStatus = { + vendor: 'amd', + installed: false, + containerSupport: false, + issues: [], + }; + + // Check if rocm-smi is available + try { + const { stdout: driverInfo } = await this.execCommand( + 'rocm-smi --showdriverversion 2>/dev/null', + { timeout: 5000, ignoreErrors: true }, + ); + + if (driverInfo.includes('Driver')) { + status.installed = true; + const match = driverInfo.match(/Driver version:\s*(\S+)/i); + if (match) { + status.version = match[1]; + } + } + } catch { + status.issues.push('ROCm driver not installed or rocm-smi not available'); + return status; + } + + // Check ROCm toolkit version + try { + const { stdout: rocmVersion } = await this.execCommand( + 'cat /opt/rocm/.info/version 2>/dev/null || rocminfo 2>/dev/null | grep "ROCm" | head -1', + { timeout: 5000, ignoreErrors: true }, + ); + const match = rocmVersion.match(/(\d+\.\d+(?:\.\d+)?)/); + if (match) { + status.toolkitVersion = match[1]; + } + } catch { + // ROCm toolkit version not available + } + + // Check Docker ROCm support + try { + const { stdout: dockerInfo } = await this.execCommand( + 'docker info 2>/dev/null | grep -i "rocm\\|amd"', + { timeout: 5000, ignoreErrors: true }, + ); + + // Check if rocm/pytorch or similar images can run + const { stdout: deviceCheck } = await this.execCommand( + 'ls /dev/kfd /dev/dri/render* 2>/dev/null', + { timeout: 5000, ignoreErrors: true }, + ); + + if (deviceCheck.includes('/dev/kfd') || dockerInfo.includes('rocm')) { + status.containerSupport = true; + } else { + status.issues.push('ROCm device files not available for container access'); + } + } catch { + status.issues.push('Could not verify Docker ROCm support'); + } + + return status; + } + + /** + * Install AMD ROCm driver + */ + public async install(options: IDriverInstallOptions): Promise { + if (!await this.isRoot()) { + logger.error('Root privileges required to install AMD ROCm drivers'); + return false; + } + + const distro = await this.getLinuxDistro(); + logger.info(`Detected Linux distribution: ${distro.id} ${distro.version}`); + + try { + if (distro.id === 'ubuntu') { + return await this.installOnUbuntu(options); + } else if (distro.id === 'rhel' || distro.id === 'centos' || distro.id === 'rocky' || distro.id === 'almalinux') { + return await this.installOnRhel(options); + } else { + logger.error(`Unsupported distribution: ${distro.id}`); + logger.info('Please install ROCm drivers manually from https://rocm.docs.amd.com/'); + return false; + } + } catch (error) { + logger.error(`Failed to install AMD ROCm drivers: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Install on Ubuntu + */ + private async installOnUbuntu(options: IDriverInstallOptions): Promise { + logger.info('Installing AMD ROCm on Ubuntu...'); + + // Install prerequisites + await this.aptUpdate(); + await this.aptInstall(['wget', 'gnupg2']); + + // Add ROCm repository + const rocmVersion = options.toolkitVersion || '6.0'; + const ubuntuVersion = (await this.getLinuxDistro()).version.replace('.', ''); + + // Download and install ROCm repository + await this.execCommand( + `wget -q https://repo.radeon.com/rocm/rocm.gpg.key -O - | apt-key add -`, + ); + + await this.execCommand( + `echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${rocmVersion} ubuntu main" > /etc/apt/sources.list.d/rocm.list`, + ); + + // Add AMDGPU repository + await this.execCommand( + `echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${rocmVersion}/ubuntu ${ubuntuVersion === '2204' ? 'jammy' : 'focal'} main" > /etc/apt/sources.list.d/amdgpu.list`, + ); + + await this.aptUpdate(); + + // Install AMDGPU driver and ROCm + await this.aptInstall('amdgpu-dkms'); + + if (options.installToolkit) { + await this.aptInstall('rocm-hip-sdk'); + } else { + await this.aptInstall('rocm-smi-lib'); + } + + // Add user to video and render groups + await this.execCommand('usermod -a -G video,render $SUDO_USER || true'); + + // Install container support if requested + if (options.installContainerSupport) { + await this.installContainerSupport(); + } + + logger.success('AMD ROCm installation completed'); + logger.warn('A system reboot is required to load the new driver'); + logger.info('After reboot, verify with: rocm-smi'); + return true; + } + + /** + * Install on RHEL + */ + private async installOnRhel(options: IDriverInstallOptions): Promise { + logger.info('Installing AMD ROCm on RHEL/CentOS...'); + + const rocmVersion = options.toolkitVersion || '6.0'; + const distro = await this.getLinuxDistro(); + const rhelVersion = distro.version.split('.')[0]; + + // Add EPEL repository + await this.dnfInstall('epel-release'); + + // Add ROCm repository + await this.execCommand( + `cat < /etc/yum.repos.d/rocm.repo +[ROCm] +name=ROCm +baseurl=https://repo.radeon.com/rocm/yum/${rocmVersion}/main +enabled=1 +gpgcheck=1 +gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key +EOF`, + ); + + // Add AMDGPU repository + await this.execCommand( + `cat < /etc/yum.repos.d/amdgpu.repo +[amdgpu] +name=amdgpu +baseurl=https://repo.radeon.com/amdgpu/${rocmVersion}/rhel/${rhelVersion}/main/x86_64/ +enabled=1 +gpgcheck=1 +gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key +EOF`, + ); + + // Install AMDGPU driver + await this.dnfInstall('amdgpu-dkms'); + + if (options.installToolkit) { + await this.dnfInstall('rocm-hip-sdk'); + } else { + await this.dnfInstall('rocm-smi-lib'); + } + + // Add user to video and render groups + await this.execCommand('usermod -a -G video,render $SUDO_USER || true'); + + // Install container support if requested + if (options.installContainerSupport) { + await this.installContainerSupport(); + } + + logger.success('AMD ROCm installation completed'); + logger.warn('A system reboot is required to load the new driver'); + return true; + } + + /** + * Install container support for AMD GPUs + */ + public async installContainerSupport(): Promise { + logger.info('Configuring Docker for AMD ROCm...'); + + try { + // AMD ROCm containers work by passing through device files + // No special runtime needed, just need to pass --device flags + + // Verify device files exist + const { stdout: devices } = await this.execCommand('ls -la /dev/kfd /dev/dri/render* 2>/dev/null || true'); + + if (!devices.includes('/dev/kfd')) { + logger.warn('/dev/kfd not found. ROCm driver may not be properly loaded.'); + logger.info('Try rebooting the system after driver installation.'); + return false; + } + + // Set permissions + await this.execCommand('chmod 666 /dev/kfd /dev/dri/render* || true'); + + logger.success('AMD ROCm container support configured'); + logger.info('Use the following Docker flags for ROCm containers:'); + logger.info(' --device=/dev/kfd --device=/dev/dri --group-add video'); + return true; + } catch (error) { + logger.error(`Failed to configure ROCm container support: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Get available ROCm versions + */ + public async getAvailableVersions(): Promise { + // ROCm has a standard set of supported versions + return ['6.0', '5.7', '5.6', '5.5', '5.4']; + } +} diff --git a/ts/drivers/base-driver.ts b/ts/drivers/base-driver.ts new file mode 100644 index 0000000..7a84e5e --- /dev/null +++ b/ts/drivers/base-driver.ts @@ -0,0 +1,217 @@ +/** + * Base Driver Class + * + * Abstract base class for GPU driver management. + */ + +import { exec } from 'node:child_process'; +import { promisify } from 'node:util'; +import type { IDriverStatus, TGpuVendor } from '../interfaces/gpu.ts'; +import { logger } from '../logger.ts'; + +const execAsync = promisify(exec); + +/** + * Options for driver installation + */ +export interface IDriverInstallOptions { + /** Whether to install the GPU toolkit (CUDA, ROCm, oneAPI) */ + installToolkit: boolean; + /** Whether to install container support (nvidia-docker, etc.) */ + installContainerSupport: boolean; + /** Specific driver version to install (optional) */ + driverVersion?: string; + /** Specific toolkit version to install (optional) */ + toolkitVersion?: string; + /** Whether to run non-interactively */ + nonInteractive: boolean; +} + +/** + * Abstract base class for GPU drivers + */ +export abstract class BaseDriver { + /** GPU vendor this driver supports */ + public abstract readonly vendor: TGpuVendor; + + /** Display name for this driver */ + public abstract readonly displayName: string; + + /** + * Check if the driver is installed + */ + public abstract isInstalled(): Promise; + + /** + * Get the current driver status + */ + public abstract getStatus(): Promise; + + /** + * Install the driver + */ + public abstract install(options: IDriverInstallOptions): Promise; + + /** + * Install container runtime support (e.g., nvidia-docker) + */ + public abstract installContainerSupport(): Promise; + + /** + * Get available driver versions + */ + public abstract getAvailableVersions(): Promise; + + /** + * Execute a shell command with error handling + */ + protected async execCommand( + command: string, + options: { timeout?: number; ignoreErrors?: boolean } = {}, + ): Promise<{ stdout: string; stderr: string }> { + const { timeout = 30000, ignoreErrors = false } = options; + + try { + const result = await execAsync(command, { timeout }); + return { stdout: result.stdout, stderr: result.stderr }; + } catch (error) { + if (ignoreErrors) { + return { stdout: '', stderr: String(error) }; + } + throw error; + } + } + + /** + * Check if running as root + */ + protected async isRoot(): Promise { + try { + const { stdout } = await this.execCommand('id -u'); + return stdout.trim() === '0'; + } catch { + return false; + } + } + + /** + * Get the Linux distribution + */ + protected async getLinuxDistro(): Promise<{ id: string; version: string }> { + try { + const { stdout } = await this.execCommand('cat /etc/os-release', { ignoreErrors: true }); + + const idMatch = stdout.match(/^ID=["']?(\w+)["']?$/m); + const versionMatch = stdout.match(/^VERSION_ID=["']?([\d.]+)["']?$/m); + + return { + id: idMatch ? idMatch[1].toLowerCase() : 'unknown', + version: versionMatch ? versionMatch[1] : '', + }; + } catch { + return { id: 'unknown', version: '' }; + } + } + + /** + * Check if a package is installed (apt-based) + */ + protected async isAptPackageInstalled(packageName: string): Promise { + try { + const { stdout } = await this.execCommand(`dpkg -l ${packageName} 2>/dev/null | grep "^ii"`, { + ignoreErrors: true, + }); + return stdout.includes(packageName); + } catch { + return false; + } + } + + /** + * Check if a package is installed (dnf/yum-based) + */ + protected async isDnfPackageInstalled(packageName: string): Promise { + try { + const { stdout } = await this.execCommand(`rpm -q ${packageName} 2>/dev/null`, { + ignoreErrors: true, + }); + return !stdout.includes('not installed'); + } catch { + return false; + } + } + + /** + * Run apt-get update + */ + protected async aptUpdate(): Promise { + logger.info('Updating package lists...'); + await this.execCommand('apt-get update', { timeout: 120000 }); + } + + /** + * Install a package using apt + */ + protected async aptInstall(packages: string | string[]): Promise { + const pkgList = Array.isArray(packages) ? packages.join(' ') : packages; + logger.info(`Installing packages: ${pkgList}`); + await this.execCommand(`DEBIAN_FRONTEND=noninteractive apt-get install -y ${pkgList}`, { + timeout: 600000, // 10 minutes for large packages + }); + } + + /** + * Install a package using dnf + */ + protected async dnfInstall(packages: string | string[]): Promise { + const pkgList = Array.isArray(packages) ? packages.join(' ') : packages; + logger.info(`Installing packages: ${pkgList}`); + await this.execCommand(`dnf install -y ${pkgList}`, { + timeout: 600000, + }); + } + + /** + * Add an apt repository + */ + protected async addAptRepository(repo: string, keyUrl?: string): Promise { + if (keyUrl) { + // Add GPG key + await this.execCommand(`curl -fsSL ${keyUrl} | gpg --dearmor -o /usr/share/keyrings/$(basename ${keyUrl}).gpg`); + } + await this.execCommand(`add-apt-repository -y "${repo}"`); + } + + /** + * Log driver status summary + */ + public async logStatus(): Promise { + const status = await this.getStatus(); + + logger.logBoxTitle(`${this.displayName} Driver Status`, 60, status.installed ? 'success' : 'warning'); + logger.logBoxLine(`Installed: ${status.installed ? 'Yes' : 'No'}`); + + if (status.installed) { + if (status.version) { + logger.logBoxLine(`Driver Version: ${status.version}`); + } + if (status.toolkitVersion) { + logger.logBoxLine(`Toolkit Version: ${status.toolkitVersion}`); + } + logger.logBoxLine(`Container Support: ${status.containerSupport ? 'Yes' : 'No'}`); + if (status.containerRuntimeVersion) { + logger.logBoxLine(`Container Runtime: ${status.containerRuntimeVersion}`); + } + } + + if (status.issues.length > 0) { + logger.logBoxLine(''); + logger.logBoxLine('Issues:'); + for (const issue of status.issues) { + logger.logBoxLine(` - ${issue}`); + } + } + + logger.logBoxEnd(); + } +} diff --git a/ts/drivers/driver-manager.ts b/ts/drivers/driver-manager.ts new file mode 100644 index 0000000..376f242 --- /dev/null +++ b/ts/drivers/driver-manager.ts @@ -0,0 +1,267 @@ +/** + * Driver Manager + * + * Coordinates detection and installation of GPU drivers across all vendors. + */ + +import type { IDriverStatus, TGpuVendor } from '../interfaces/gpu.ts'; +import { logger } from '../logger.ts'; +import { GpuDetector } from '../hardware/gpu-detector.ts'; +import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts'; +import { NvidiaDriver } from './nvidia.ts'; +import { AmdDriver } from './amd.ts'; +import { IntelDriver } from './intel.ts'; + +/** + * Driver Manager - coordinates GPU driver management + */ +export class DriverManager { + private gpuDetector: GpuDetector; + private drivers: Map; + + constructor() { + this.gpuDetector = new GpuDetector(); + this.drivers = new Map([ + ['nvidia', new NvidiaDriver()], + ['amd', new AmdDriver()], + ['intel', new IntelDriver()], + ]); + } + + /** + * Get driver manager for a specific vendor + */ + public getDriver(vendor: TGpuVendor): BaseDriver | undefined { + return this.drivers.get(vendor); + } + + /** + * Get status of all GPU drivers + */ + public async getAllDriverStatus(): Promise> { + const statuses = new Map(); + + // Only check drivers for detected GPUs + const gpus = await this.gpuDetector.detectGpus(); + const detectedVendors = new Set(gpus.map((g) => g.vendor)); + + for (const vendor of detectedVendors) { + if (vendor === 'unknown') continue; + + const driver = this.drivers.get(vendor); + if (driver) { + const status = await driver.getStatus(); + statuses.set(vendor, status); + } + } + + return statuses; + } + + /** + * Check drivers for all detected GPUs + */ + public async checkAllDrivers(): Promise<{ + allInstalled: boolean; + allContainerReady: boolean; + issues: string[]; + }> { + const gpus = await this.gpuDetector.detectGpus(); + const issues: string[] = []; + let allInstalled = true; + let allContainerReady = true; + + if (gpus.length === 0) { + issues.push('No GPUs detected'); + return { allInstalled: false, allContainerReady: false, issues }; + } + + // Group GPUs by vendor + const vendorCounts = new Map(); + for (const gpu of gpus) { + vendorCounts.set(gpu.vendor, (vendorCounts.get(gpu.vendor) || 0) + 1); + } + + // Check each vendor + for (const [vendor, count] of vendorCounts) { + if (vendor === 'unknown') { + issues.push(`${count} GPU(s) with unknown vendor - cannot manage drivers`); + continue; + } + + const driver = this.drivers.get(vendor); + if (!driver) { + issues.push(`No driver manager for ${vendor}`); + continue; + } + + const status = await driver.getStatus(); + + if (!status.installed) { + allInstalled = false; + issues.push(`${driver.displayName} driver not installed for ${count} GPU(s)`); + } + + if (!status.containerSupport) { + allContainerReady = false; + issues.push(`${driver.displayName} container support not configured`); + } + + // Add specific issues + issues.push(...status.issues); + } + + return { allInstalled, allContainerReady, issues }; + } + + /** + * Install drivers for all detected GPUs + */ + public async installAllDrivers(options: Partial = {}): Promise { + const fullOptions: IDriverInstallOptions = { + installToolkit: options.installToolkit ?? true, + installContainerSupport: options.installContainerSupport ?? true, + nonInteractive: options.nonInteractive ?? false, + driverVersion: options.driverVersion, + toolkitVersion: options.toolkitVersion, + }; + + const gpus = await this.gpuDetector.detectGpus(); + const vendors = new Set(gpus.map((g) => g.vendor).filter((v) => v !== 'unknown')); + + if (vendors.size === 0) { + logger.error('No supported GPUs detected'); + return false; + } + + let allSuccess = true; + + for (const vendor of vendors) { + const driver = this.drivers.get(vendor); + if (!driver) continue; + + logger.info(`Installing ${driver.displayName} drivers...`); + + const success = await driver.install(fullOptions); + if (!success) { + allSuccess = false; + logger.error(`Failed to install ${driver.displayName} drivers`); + } + } + + return allSuccess; + } + + /** + * Install container support for all GPUs + */ + public async installContainerSupport(): Promise { + const gpus = await this.gpuDetector.detectGpus(); + const vendors = new Set(gpus.map((g) => g.vendor).filter((v) => v !== 'unknown')); + + let allSuccess = true; + + for (const vendor of vendors) { + const driver = this.drivers.get(vendor); + if (!driver) continue; + + const success = await driver.installContainerSupport(); + if (!success) { + allSuccess = false; + } + } + + return allSuccess; + } + + /** + * Print driver status summary + */ + public async printDriverStatus(): Promise { + const gpus = await this.gpuDetector.detectGpus(); + + if (gpus.length === 0) { + logger.logBox('Driver Status', ['No GPUs detected'], 50, 'warning'); + return; + } + + // Group by vendor + const vendorGpus = new Map(); + for (const gpu of gpus) { + if (!vendorGpus.has(gpu.vendor)) { + vendorGpus.set(gpu.vendor, []); + } + vendorGpus.get(gpu.vendor)!.push(gpu); + } + + // Print status for each vendor + for (const [vendor, gpuList] of vendorGpus) { + if (vendor === 'unknown') { + logger.logBox('Unknown GPUs', [ + `${gpuList.length} GPU(s) with unknown vendor`, + 'Manual driver installation may be required', + ], 50, 'warning'); + continue; + } + + const driver = this.drivers.get(vendor); + if (driver) { + await driver.logStatus(); + } + } + } + + /** + * Get Docker run arguments for GPU support + */ + public async getDockerGpuArgs(gpuIds?: string[]): Promise { + const gpus = await this.gpuDetector.detectGpus(); + const args: string[] = []; + + // Filter to specific GPUs if provided + const targetGpus = gpuIds + ? gpus.filter((g) => gpuIds.includes(g.id)) + : gpus; + + if (targetGpus.length === 0) { + return args; + } + + // Determine vendor (assume single vendor for simplicity) + const vendor = targetGpus[0].vendor; + + switch (vendor) { + case 'nvidia': + // NVIDIA uses nvidia-docker runtime + args.push('--runtime=nvidia'); + if (gpuIds && gpuIds.length > 0) { + // Use specific GPU indices + const indices = targetGpus.map((g) => g.index).join(','); + args.push(`--gpus="device=${indices}"`); + } else { + args.push('--gpus=all'); + } + break; + + case 'amd': + // AMD uses device passthrough + args.push('--device=/dev/kfd'); + for (const gpu of targetGpus) { + args.push(`--device=/dev/dri/renderD${128 + gpu.index}`); + } + args.push('--group-add=video'); + args.push('--security-opt=seccomp=unconfined'); + break; + + case 'intel': + // Intel uses device passthrough + for (const gpu of targetGpus) { + args.push(`--device=/dev/dri/renderD${128 + gpu.index}`); + } + args.push('--group-add=render'); + break; + } + + return args; + } +} diff --git a/ts/drivers/index.ts b/ts/drivers/index.ts new file mode 100644 index 0000000..0df1de8 --- /dev/null +++ b/ts/drivers/index.ts @@ -0,0 +1,11 @@ +/** + * Driver Management Module + * + * Exports all driver detection and installation functionality. + */ + +export { BaseDriver, type IDriverInstallOptions } from './base-driver.ts'; +export { NvidiaDriver } from './nvidia.ts'; +export { AmdDriver } from './amd.ts'; +export { IntelDriver } from './intel.ts'; +export { DriverManager } from './driver-manager.ts'; diff --git a/ts/drivers/intel.ts b/ts/drivers/intel.ts new file mode 100644 index 0000000..c72468b --- /dev/null +++ b/ts/drivers/intel.ts @@ -0,0 +1,339 @@ +/** + * Intel Driver Management + * + * Handles Intel Arc GPU driver detection, installation, and oneAPI setup. + */ + +import type { IDriverStatus } from '../interfaces/gpu.ts'; +import { logger } from '../logger.ts'; +import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts'; + +/** + * Intel Arc/oneAPI Driver Manager + */ +export class IntelDriver extends BaseDriver { + public readonly vendor = 'intel' as const; + public readonly displayName = 'Intel Arc'; + + /** + * Check if Intel GPU driver is installed + */ + public async isInstalled(): Promise { + try { + // Check for xpu-smi or intel_gpu_top + const { stdout } = await this.execCommand( + 'xpu-smi discovery 2>/dev/null || intel_gpu_top -l 2>/dev/null || ls /dev/dri/renderD* 2>/dev/null | grep -c renderD', + { timeout: 5000, ignoreErrors: true }, + ); + return stdout.trim().length > 0 && !stdout.includes('not found'); + } catch { + return false; + } + } + + /** + * Get Intel GPU driver status + */ + public async getStatus(): Promise { + const status: IDriverStatus = { + vendor: 'intel', + installed: false, + containerSupport: false, + issues: [], + }; + + // Check for i915 driver (Intel integrated/Arc) + try { + const { stdout: driverInfo } = await this.execCommand( + 'modinfo i915 2>/dev/null | grep "^version:"', + { timeout: 5000, ignoreErrors: true }, + ); + + if (driverInfo.includes('version')) { + status.installed = true; + const match = driverInfo.match(/version:\s*(\S+)/i); + if (match) { + status.version = match[1]; + } + } + } catch { + // i915 module info not available + } + + // Check for xpu-smi (Intel Arc specific) + try { + const { stdout: xpuVersion } = await this.execCommand( + 'xpu-smi --version 2>/dev/null', + { timeout: 5000, ignoreErrors: true }, + ); + + if (xpuVersion.includes('xpu-smi')) { + status.installed = true; + const match = xpuVersion.match(/(\d+\.\d+(?:\.\d+)?)/); + if (match) { + status.version = match[1]; + } + } + } catch { + // xpu-smi not available + } + + // Check oneAPI toolkit + try { + const { stdout: oneApiVersion } = await this.execCommand( + 'ls /opt/intel/oneapi/compiler/*/env/vars.sh 2>/dev/null | head -1 | xargs dirname | xargs dirname | xargs basename', + { timeout: 5000, ignoreErrors: true }, + ); + + if (oneApiVersion.trim()) { + status.toolkitVersion = oneApiVersion.trim(); + } + } catch { + // oneAPI not installed + } + + // Check container support + try { + const { stdout: renderDevices } = await this.execCommand( + 'ls /dev/dri/renderD* 2>/dev/null', + { timeout: 5000, ignoreErrors: true }, + ); + + if (renderDevices.includes('renderD')) { + status.containerSupport = true; + } else { + status.issues.push('Intel GPU render devices not available'); + } + } catch { + status.issues.push('Could not check Intel GPU device availability'); + } + + if (!status.installed) { + status.issues.push('Intel GPU driver not detected'); + } + + return status; + } + + /** + * Install Intel GPU drivers and optionally oneAPI + */ + public async install(options: IDriverInstallOptions): Promise { + if (!await this.isRoot()) { + logger.error('Root privileges required to install Intel GPU drivers'); + return false; + } + + const distro = await this.getLinuxDistro(); + logger.info(`Detected Linux distribution: ${distro.id} ${distro.version}`); + + try { + if (distro.id === 'ubuntu') { + return await this.installOnUbuntu(options); + } else if (distro.id === 'fedora') { + return await this.installOnFedora(options); + } else { + logger.error(`Unsupported distribution for Intel Arc: ${distro.id}`); + logger.info('Please install Intel drivers manually from https://dgpu-docs.intel.com/'); + return false; + } + } catch (error) { + logger.error(`Failed to install Intel drivers: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Install on Ubuntu + */ + private async installOnUbuntu(options: IDriverInstallOptions): Promise { + logger.info('Installing Intel GPU drivers on Ubuntu...'); + + // Install prerequisites + await this.aptUpdate(); + await this.aptInstall(['wget', 'gpg']); + + // Add Intel graphics repository + await this.execCommand( + 'wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg', + ); + + const distro = await this.getLinuxDistro(); + const ubuntuCodename = distro.version === '22.04' ? 'jammy' : distro.version === '24.04' ? 'noble' : 'jammy'; + + await this.execCommand( + `echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu ${ubuntuCodename} arc" > /etc/apt/sources.list.d/intel-graphics.list`, + ); + + await this.aptUpdate(); + + // Install Intel GPU packages + await this.aptInstall([ + 'intel-opencl-icd', + 'intel-level-zero-gpu', + 'level-zero', + 'intel-media-va-driver-non-free', + 'libmfx1', + 'libmfxgen1', + 'libvpl2', + 'libegl-mesa0', + 'libegl1-mesa', + 'libegl1-mesa-dev', + 'libgbm1', + 'libgl1-mesa-dev', + 'libgl1-mesa-dri', + 'libglapi-mesa', + 'libgles2-mesa-dev', + 'libglx-mesa0', + 'libigdgmm12', + 'libxatracker2', + 'mesa-va-drivers', + 'mesa-vdpau-drivers', + 'mesa-vulkan-drivers', + 'va-driver-all', + ]); + + // Install xpu-smi for monitoring + await this.aptInstall('xpu-smi'); + + // Install oneAPI toolkit if requested + if (options.installToolkit) { + await this.installOneApi(); + } + + // Add user to video and render groups + await this.execCommand('usermod -a -G video,render $SUDO_USER || true'); + + // Install container support if requested + if (options.installContainerSupport) { + await this.installContainerSupport(); + } + + logger.success('Intel GPU driver installation completed'); + logger.info('Verify installation with: xpu-smi discovery'); + return true; + } + + /** + * Install on Fedora + */ + private async installOnFedora(options: IDriverInstallOptions): Promise { + logger.info('Installing Intel GPU drivers on Fedora...'); + + // Intel GPU support is included in newer Fedora kernels + // We just need to install the user-space components + + await this.dnfInstall([ + 'intel-media-driver', + 'libva-intel-driver', + 'intel-compute-runtime', + 'level-zero', + 'oneapi-level-zero', + ]); + + // Try to install xpu-smi from Intel repo + try { + await this.execCommand( + 'dnf copr enable -y intel/oneapi || true', + ); + await this.dnfInstall('xpu-smi'); + } catch { + logger.warn('Could not install xpu-smi. Intel Arc monitoring may be limited.'); + } + + // Add user to video and render groups + await this.execCommand('usermod -a -G video,render $SUDO_USER || true'); + + // Install oneAPI if requested + if (options.installToolkit) { + await this.installOneApi(); + } + + // Install container support if requested + if (options.installContainerSupport) { + await this.installContainerSupport(); + } + + logger.success('Intel GPU driver installation completed'); + return true; + } + + /** + * Install Intel oneAPI toolkit + */ + private async installOneApi(): Promise { + logger.info('Installing Intel oneAPI toolkit...'); + + const distro = await this.getLinuxDistro(); + + if (distro.id === 'ubuntu' || distro.id === 'debian') { + // Add Intel oneAPI repository + await this.execCommand( + 'wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null', + ); + + await this.execCommand( + 'echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list', + ); + + await this.aptUpdate(); + await this.aptInstall('intel-basekit'); + } else if (distro.id === 'fedora') { + // Add Intel oneAPI repository + await this.execCommand( + `cat < /etc/yum.repos.d/oneAPI.repo +[oneAPI] +name=Intel oneAPI repository +baseurl=https://yum.repos.intel.com/oneapi +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB +EOF`, + ); + + await this.dnfInstall('intel-basekit'); + } + + logger.success('Intel oneAPI toolkit installed'); + logger.info('Source the environment with: source /opt/intel/oneapi/setvars.sh'); + } + + /** + * Install container support for Intel GPUs + */ + public async installContainerSupport(): Promise { + logger.info('Configuring Docker for Intel GPUs...'); + + try { + // Intel GPUs work by passing through device files + // Verify render devices exist + const { stdout: devices } = await this.execCommand('ls -la /dev/dri/renderD* 2>/dev/null || true'); + + if (!devices.includes('renderD')) { + logger.warn('/dev/dri/renderD* not found. Intel GPU driver may not be properly loaded.'); + return false; + } + + // Set permissions + await this.execCommand('chmod 666 /dev/dri/renderD* || true'); + + logger.success('Intel GPU container support configured'); + logger.info('Use the following Docker flags for Intel GPU containers:'); + logger.info(' --device=/dev/dri --group-add render'); + return true; + } catch (error) { + logger.error(`Failed to configure Intel container support: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Get available driver versions + */ + public async getAvailableVersions(): Promise { + // Intel Arc drivers are typically tied to kernel versions + // Return oneAPI versions as reference + return ['2024.0', '2023.2', '2023.1', '2023.0']; + } +} diff --git a/ts/drivers/nvidia.ts b/ts/drivers/nvidia.ts new file mode 100644 index 0000000..1433e0b --- /dev/null +++ b/ts/drivers/nvidia.ts @@ -0,0 +1,318 @@ +/** + * NVIDIA Driver Management + * + * Handles NVIDIA driver detection, installation, and container toolkit setup. + */ + +import type { IDriverStatus } from '../interfaces/gpu.ts'; +import { logger } from '../logger.ts'; +import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts'; + +/** + * NVIDIA Driver Manager + */ +export class NvidiaDriver extends BaseDriver { + public readonly vendor = 'nvidia' as const; + public readonly displayName = 'NVIDIA'; + + /** + * Check if the NVIDIA driver is installed + */ + public async isInstalled(): Promise { + try { + const { stdout } = await this.execCommand('nvidia-smi --query-gpu=driver_version --format=csv,noheader', { + timeout: 5000, + ignoreErrors: true, + }); + return stdout.trim().length > 0; + } catch { + return false; + } + } + + /** + * Get NVIDIA driver status + */ + public async getStatus(): Promise { + const status: IDriverStatus = { + vendor: 'nvidia', + installed: false, + containerSupport: false, + issues: [], + }; + + // Check if nvidia-smi is available + try { + const { stdout: driverVersion } = await this.execCommand( + 'nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1', + { timeout: 5000 }, + ); + status.installed = true; + status.version = driverVersion.trim(); + } catch { + status.issues.push('NVIDIA driver not installed or nvidia-smi not available'); + return status; + } + + // Check CUDA toolkit + try { + const { stdout: cudaVersion } = await this.execCommand( + 'nvcc --version 2>/dev/null | grep "release" | sed "s/.*release \\([0-9.]*\\).*/\\1/"', + { timeout: 5000, ignoreErrors: true }, + ); + if (cudaVersion.trim()) { + status.toolkitVersion = cudaVersion.trim(); + } + } catch { + // CUDA toolkit not installed + } + + // Check nvidia-container-toolkit + try { + const { stdout: containerVersion } = await this.execCommand( + 'nvidia-container-cli --version 2>&1 | head -1', + { timeout: 5000, ignoreErrors: true }, + ); + if (containerVersion.includes('version')) { + status.containerSupport = true; + const match = containerVersion.match(/version (\d+\.\d+\.\d+)/); + if (match) { + status.containerRuntimeVersion = match[1]; + } + } + } catch { + status.issues.push('NVIDIA Container Toolkit not installed'); + } + + // Check if Docker has nvidia runtime + try { + const { stdout: dockerInfo } = await this.execCommand( + 'docker info --format "{{.Runtimes}}" 2>/dev/null', + { timeout: 5000, ignoreErrors: true }, + ); + if (!dockerInfo.includes('nvidia')) { + status.issues.push('Docker nvidia runtime not configured'); + } + } catch { + // Docker check failed + } + + return status; + } + + /** + * Install NVIDIA driver and optionally CUDA toolkit + */ + public async install(options: IDriverInstallOptions): Promise { + if (!await this.isRoot()) { + logger.error('Root privileges required to install NVIDIA drivers'); + return false; + } + + const distro = await this.getLinuxDistro(); + logger.info(`Detected Linux distribution: ${distro.id} ${distro.version}`); + + try { + if (distro.id === 'ubuntu' || distro.id === 'debian') { + return await this.installOnDebian(options); + } else if (distro.id === 'fedora' || distro.id === 'rhel' || distro.id === 'centos' || distro.id === 'rocky' || distro.id === 'almalinux') { + return await this.installOnRhel(options); + } else { + logger.error(`Unsupported distribution: ${distro.id}`); + logger.info('Please install NVIDIA drivers manually'); + return false; + } + } catch (error) { + logger.error(`Failed to install NVIDIA drivers: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Install on Debian/Ubuntu + */ + private async installOnDebian(options: IDriverInstallOptions): Promise { + logger.info('Installing NVIDIA drivers on Debian/Ubuntu...'); + + // Add NVIDIA repository + await this.aptUpdate(); + + // Install prerequisites + await this.aptInstall(['software-properties-common', 'build-essential', 'dkms']); + + // Add NVIDIA PPA (for Ubuntu) + try { + await this.execCommand('add-apt-repository -y ppa:graphics-drivers/ppa 2>/dev/null || true'); + await this.aptUpdate(); + } catch { + // PPA might not be available on all systems + } + + // Install NVIDIA driver + const driverPackage = options.driverVersion + ? `nvidia-driver-${options.driverVersion}` + : 'nvidia-driver-535'; // Default to stable version + + await this.aptInstall(driverPackage); + + // Install CUDA toolkit if requested + if (options.installToolkit) { + await this.installCudaToolkit(options); + } + + // Install container support if requested + if (options.installContainerSupport) { + await this.installContainerSupport(); + } + + logger.success('NVIDIA driver installation completed'); + logger.warn('A system reboot is required to load the new driver'); + return true; + } + + /** + * Install on RHEL/Fedora + */ + private async installOnRhel(options: IDriverInstallOptions): Promise { + logger.info('Installing NVIDIA drivers on RHEL/Fedora...'); + + // Install prerequisites + await this.dnfInstall(['kernel-devel', 'kernel-headers', 'gcc', 'make', 'dkms', 'acpid']); + + // Add NVIDIA CUDA repository + const distro = await this.getLinuxDistro(); + const repoUrl = `https://developer.download.nvidia.com/compute/cuda/repos/rhel${distro.version.split('.')[0]}/x86_64/cuda-rhel${distro.version.split('.')[0]}.repo`; + + await this.execCommand(`dnf config-manager --add-repo ${repoUrl}`); + + // Install NVIDIA driver + await this.dnfInstall('nvidia-driver-latest-dkms'); + + // Install CUDA toolkit if requested + if (options.installToolkit) { + await this.dnfInstall('cuda'); + } + + // Install container support if requested + if (options.installContainerSupport) { + await this.installContainerSupport(); + } + + logger.success('NVIDIA driver installation completed'); + logger.warn('A system reboot is required to load the new driver'); + return true; + } + + /** + * Install CUDA toolkit + */ + private async installCudaToolkit(options: IDriverInstallOptions): Promise { + logger.info('Installing CUDA toolkit...'); + + const distro = await this.getLinuxDistro(); + + if (distro.id === 'ubuntu' || distro.id === 'debian') { + // Add CUDA repository + const cudaKeyUrl = 'https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb'; + await this.execCommand(`wget -q ${cudaKeyUrl} -O /tmp/cuda-keyring.deb && dpkg -i /tmp/cuda-keyring.deb`); + await this.aptUpdate(); + + const cudaPackage = options.toolkitVersion + ? `cuda-toolkit-${options.toolkitVersion.replace('.', '-')}` + : 'cuda-toolkit'; + + await this.aptInstall(cudaPackage); + } + } + + /** + * Install NVIDIA Container Toolkit + */ + public async installContainerSupport(): Promise { + if (!await this.isRoot()) { + logger.error('Root privileges required to install NVIDIA Container Toolkit'); + return false; + } + + const distro = await this.getLinuxDistro(); + logger.info('Installing NVIDIA Container Toolkit...'); + + try { + if (distro.id === 'ubuntu' || distro.id === 'debian') { + // Add repository + await this.execCommand( + 'curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg', + ); + + const distribution = `${distro.id}${distro.version}`; + await this.execCommand( + `curl -s -L https://nvidia.github.io/libnvidia-container/${distribution}/libnvidia-container.list | ` + + 'sed "s#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g" | ' + + 'tee /etc/apt/sources.list.d/nvidia-container-toolkit.list', + ); + + await this.aptUpdate(); + await this.aptInstall('nvidia-container-toolkit'); + } else { + // RHEL/Fedora + await this.execCommand( + 'curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | ' + + 'tee /etc/yum.repos.d/nvidia-container-toolkit.repo', + ); + await this.dnfInstall('nvidia-container-toolkit'); + } + + // Configure Docker runtime + await this.configureDockerRuntime(); + + logger.success('NVIDIA Container Toolkit installed successfully'); + return true; + } catch (error) { + logger.error(`Failed to install NVIDIA Container Toolkit: ${error instanceof Error ? error.message : String(error)}`); + return false; + } + } + + /** + * Configure Docker to use NVIDIA runtime + */ + private async configureDockerRuntime(): Promise { + logger.info('Configuring Docker to use NVIDIA runtime...'); + + try { + // Run nvidia-ctk to configure Docker + await this.execCommand('nvidia-ctk runtime configure --runtime=docker'); + + // Restart Docker + await this.execCommand('systemctl restart docker'); + + logger.success('Docker configured to use NVIDIA runtime'); + } catch (error) { + logger.warn(`Could not configure Docker runtime automatically: ${error instanceof Error ? error.message : String(error)}`); + logger.info('Please run: nvidia-ctk runtime configure --runtime=docker'); + } + } + + /** + * Get available driver versions + */ + public async getAvailableVersions(): Promise { + const versions: string[] = []; + + try { + const distro = await this.getLinuxDistro(); + + if (distro.id === 'ubuntu' || distro.id === 'debian') { + const { stdout } = await this.execCommand( + 'apt-cache search nvidia-driver | grep "^nvidia-driver-[0-9]" | sed "s/nvidia-driver-\\([0-9]*\\).*/\\1/" | sort -rn | uniq', + { ignoreErrors: true }, + ); + versions.push(...stdout.trim().split('\n').filter((v: string) => v.trim())); + } + } catch { + // Failed to get versions + } + + return versions; + } +} diff --git a/ts/hardware/gpu-detector.ts b/ts/hardware/gpu-detector.ts new file mode 100644 index 0000000..9e34901 --- /dev/null +++ b/ts/hardware/gpu-detector.ts @@ -0,0 +1,565 @@ +/** + * GPU Detector + * + * Detects GPUs on the system (NVIDIA, AMD, Intel Arc) and retrieves their information. + */ + +import { exec } from 'node:child_process'; +import { promisify } from 'node:util'; +import * as fs from 'node:fs'; +import type { IGpuInfo, IGpuStatus, TGpuVendor } from '../interfaces/gpu.ts'; +import { logger } from '../logger.ts'; +import { TIMING } from '../constants.ts'; + +const execAsync = promisify(exec); + +/** + * GPU Detector class for detecting and querying GPU information + */ +export class GpuDetector { + private cachedGpus: IGpuInfo[] | null = null; + private cacheTime: number = 0; + private readonly cacheDuration = TIMING.GPU_DETECTION_TIMEOUT_MS; + + /** + * Detect all GPUs on the system + * @param forceRefresh Force refresh even if cache is valid + * @returns Array of detected GPU information + */ + public async detectGpus(forceRefresh: boolean = false): Promise { + // Return cached data if still valid + if (!forceRefresh && this.cachedGpus && Date.now() - this.cacheTime < this.cacheDuration) { + return this.cachedGpus; + } + + const gpus: IGpuInfo[] = []; + + // Detect NVIDIA GPUs + const nvidiaGpus = await this.detectNvidiaGpus(); + gpus.push(...nvidiaGpus); + + // Detect AMD GPUs + const amdGpus = await this.detectAmdGpus(); + gpus.push(...amdGpus); + + // Detect Intel GPUs + const intelGpus = await this.detectIntelGpus(); + gpus.push(...intelGpus); + + // If no GPUs found via specific tools, try generic detection + if (gpus.length === 0) { + const genericGpus = await this.detectGenericGpus(); + gpus.push(...genericGpus); + } + + // Update cache + this.cachedGpus = gpus; + this.cacheTime = Date.now(); + + return gpus; + } + + /** + * Detect NVIDIA GPUs using nvidia-smi + */ + private async detectNvidiaGpus(): Promise { + const gpus: IGpuInfo[] = []; + + try { + // Check if nvidia-smi is available + const { stdout } = await execAsync( + 'nvidia-smi --query-gpu=index,gpu_uuid,name,memory.total,driver_version,pci.bus_id,compute_cap --format=csv,noheader,nounits', + { timeout: TIMING.GPU_DETECTION_TIMEOUT_MS }, + ); + + const lines = stdout.trim().split('\n').filter((line: string) => line.trim()); + + for (const line of lines) { + const parts = line.split(',').map((p: string) => p.trim()); + if (parts.length >= 7) { + const [index, _uuid, name, memory, driver, pciId, computeCap] = parts; + + gpus.push({ + id: `nvidia-${index}`, + vendor: 'nvidia', + model: name, + vram: parseInt(memory, 10), // Already in MB + driverVersion: driver, + computeCapability: computeCap, + pciSlot: this.extractPciSlot(pciId), + pciBusId: pciId, + index: parseInt(index, 10), + }); + } + } + + // Get CUDA version separately + if (gpus.length > 0) { + try { + const { stdout: cudaOut } = await execAsync('nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1 && nvcc --version 2>/dev/null | grep "release" | sed "s/.*release \\([0-9.]*\\).*/\\1/"', { + timeout: 5000, + }); + const cudaMatch = cudaOut.match(/(\d+\.\d+)/); + if (cudaMatch) { + for (const gpu of gpus) { + gpu.cudaVersion = cudaMatch[1]; + } + } + } catch { + // CUDA version detection failed, that's okay + } + } + } catch { + // nvidia-smi not available or failed + logger.dim('NVIDIA GPU detection: nvidia-smi not available'); + } + + return gpus; + } + + /** + * Detect AMD GPUs using rocm-smi or amdgpu-ls + */ + private async detectAmdGpus(): Promise { + const gpus: IGpuInfo[] = []; + + try { + // Try rocm-smi first + const { stdout } = await execAsync( + 'rocm-smi --showproductname --showmeminfo vram --showdriverversion --showbus --csv 2>/dev/null || rocm-smi -a --json 2>/dev/null', + { timeout: TIMING.GPU_DETECTION_TIMEOUT_MS }, + ); + + // Parse rocm-smi output + if (stdout.includes('{')) { + // JSON output + const data = JSON.parse(stdout); + let index = 0; + for (const [key, value] of Object.entries(data)) { + if (key.startsWith('card')) { + const cardData = value as Record; + gpus.push({ + id: `amd-${index}`, + vendor: 'amd', + model: String(cardData['Card series'] || cardData['card_series'] || 'AMD GPU'), + vram: this.parseMemory(String(cardData['VRAM Total Memory (B)'] || cardData['vram_total'] || '0')), + driverVersion: String(cardData['Driver version'] || cardData['driver_version'] || ''), + rocmVersion: await this.getRocmVersion(), + pciSlot: String(cardData['PCI Bus'] || cardData['pci_bus'] || ''), + pciBusId: String(cardData['PCI Bus'] || cardData['pci_bus'] || ''), + index: index++, + }); + } + } + } else { + // CSV output - parse line by line + const lines = stdout.trim().split('\n'); + let index = 0; + for (const line of lines) { + if (line.includes('GPU') || line.includes('Radeon') || line.includes('AMD')) { + // This is a GPU entry + gpus.push({ + id: `amd-${index}`, + vendor: 'amd', + model: line.trim(), + vram: 0, // Will need additional parsing + pciSlot: '', + index: index++, + }); + } + } + } + } catch { + // rocm-smi not available, try lspci + try { + const { stdout: lspciOut } = await execAsync( + 'lspci -nn | grep -i "VGA\\|3D\\|Display" | grep -i "AMD\\|ATI\\|Radeon"', + { timeout: 5000 }, + ); + + const lines = lspciOut.trim().split('\n').filter((l: string) => l.trim()); + let index = 0; + for (const line of lines) { + const match = line.match(/^([0-9a-f:.]+)\s+.*:\s+(.+)$/i); + if (match) { + gpus.push({ + id: `amd-${index}`, + vendor: 'amd', + model: match[2].trim(), + vram: await this.getAmdVramFromSysfs(match[1]), + pciSlot: match[1], + pciBusId: match[1], + index: index++, + }); + } + } + } catch { + logger.dim('AMD GPU detection: rocm-smi and lspci detection failed'); + } + } + + return gpus; + } + + /** + * Detect Intel GPUs using intel_gpu_top or xpu-smi + */ + private async detectIntelGpus(): Promise { + const gpus: IGpuInfo[] = []; + + try { + // Try xpu-smi first (for Intel Arc GPUs) + const { stdout } = await execAsync( + 'xpu-smi discovery --json 2>/dev/null', + { timeout: TIMING.GPU_DETECTION_TIMEOUT_MS }, + ); + + const data = JSON.parse(stdout); + if (data.device_list) { + let index = 0; + for (const device of data.device_list) { + gpus.push({ + id: `intel-${index}`, + vendor: 'intel', + model: device.device_name || 'Intel GPU', + vram: device.memory_physical_size_byte + ? Math.round(device.memory_physical_size_byte / (1024 * 1024)) + : 0, + oneApiVersion: await this.getOneApiVersion(), + pciSlot: device.pci_bdf || '', + pciBusId: device.pci_bdf || '', + index: index++, + }); + } + } + } catch { + // xpu-smi not available, try lspci + try { + const { stdout: lspciOut } = await execAsync( + 'lspci -nn | grep -i "VGA\\|3D\\|Display" | grep -i "Intel.*Arc\\|Intel.*Graphics"', + { timeout: 5000 }, + ); + + const lines = lspciOut.trim().split('\n').filter((l: string) => l.trim()); + let index = 0; + for (const line of lines) { + // Skip integrated graphics, only look for discrete Arc GPUs + if (line.toLowerCase().includes('arc')) { + const match = line.match(/^([0-9a-f:.]+)\s+.*:\s+(.+)$/i); + if (match) { + gpus.push({ + id: `intel-${index}`, + vendor: 'intel', + model: match[2].trim(), + vram: 0, // Intel Arc VRAM detection needs sysfs + pciSlot: match[1], + pciBusId: match[1], + index: index++, + }); + } + } + } + } catch { + logger.dim('Intel GPU detection: xpu-smi and lspci detection failed'); + } + } + + return gpus; + } + + /** + * Generic GPU detection using lspci + */ + private async detectGenericGpus(): Promise { + const gpus: IGpuInfo[] = []; + + try { + const { stdout } = await execAsync( + 'lspci -nn | grep -i "VGA\\|3D\\|Display"', + { timeout: 5000 }, + ); + + const lines = stdout.trim().split('\n').filter((l: string) => l.trim()); + let index = 0; + + for (const line of lines) { + const match = line.match(/^([0-9a-f:.]+)\s+.*:\s+(.+)$/i); + if (match) { + const model = match[2].trim(); + let vendor: TGpuVendor = 'unknown'; + + if (/nvidia/i.test(model)) vendor = 'nvidia'; + else if (/amd|ati|radeon/i.test(model)) vendor = 'amd'; + else if (/intel/i.test(model)) vendor = 'intel'; + + gpus.push({ + id: `gpu-${index}`, + vendor, + model, + vram: 0, + pciSlot: match[1], + pciBusId: match[1], + index: index++, + }); + } + } + } catch { + logger.dim('Generic GPU detection: lspci not available'); + } + + return gpus; + } + + /** + * Get real-time status for a specific GPU + */ + public async getGpuStatus(gpuId: string): Promise { + const gpus = await this.detectGpus(); + const gpu = gpus.find((g) => g.id === gpuId); + + if (!gpu) { + return null; + } + + if (gpu.vendor === 'nvidia') { + return this.getNvidiaGpuStatus(gpu); + } else if (gpu.vendor === 'amd') { + return this.getAmdGpuStatus(gpu); + } else if (gpu.vendor === 'intel') { + return this.getIntelGpuStatus(gpu); + } + + // Unknown vendor - return basic status + return { + id: gpuId, + utilization: 0, + memoryUsed: 0, + memoryTotal: gpu.vram, + memoryPercent: 0, + temperature: 0, + powerUsage: 0, + powerLimit: 0, + lastUpdate: Date.now(), + }; + } + + /** + * Get real-time status for all GPUs + */ + public async getAllGpuStatus(): Promise> { + const statuses = new Map(); + const gpus = await this.detectGpus(); + + for (const gpu of gpus) { + const status = await this.getGpuStatus(gpu.id); + if (status) { + statuses.set(gpu.id, status); + } + } + + return statuses; + } + + /** + * Get NVIDIA GPU status using nvidia-smi + */ + private async getNvidiaGpuStatus(gpu: IGpuInfo): Promise { + try { + const { stdout } = await execAsync( + `nvidia-smi --query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu,power.draw,power.limit,fan.speed,clocks.gr,clocks.mem --format=csv,noheader,nounits -i ${gpu.index}`, + { timeout: 5000 }, + ); + + const parts = stdout.trim().split(',').map((p: string) => p.trim()); + const [utilization, memUsed, memTotal, temp, power, powerLimit, fan, gpuClock, memClock] = parts; + + return { + id: gpu.id, + utilization: parseInt(utilization, 10) || 0, + memoryUsed: parseInt(memUsed, 10) || 0, + memoryTotal: parseInt(memTotal, 10) || gpu.vram, + memoryPercent: memTotal ? Math.round((parseInt(memUsed, 10) / parseInt(memTotal, 10)) * 100) : 0, + temperature: parseInt(temp, 10) || 0, + powerUsage: parseFloat(power) || 0, + powerLimit: parseFloat(powerLimit) || 0, + fanSpeed: fan !== '[N/A]' ? parseInt(fan, 10) : undefined, + gpuClock: gpuClock !== '[N/A]' ? parseInt(gpuClock, 10) : undefined, + memoryClock: memClock !== '[N/A]' ? parseInt(memClock, 10) : undefined, + lastUpdate: Date.now(), + }; + } catch { + return { + id: gpu.id, + utilization: 0, + memoryUsed: 0, + memoryTotal: gpu.vram, + memoryPercent: 0, + temperature: 0, + powerUsage: 0, + powerLimit: 0, + lastUpdate: Date.now(), + }; + } + } + + /** + * Get AMD GPU status using rocm-smi + */ + private async getAmdGpuStatus(gpu: IGpuInfo): Promise { + try { + const { stdout } = await execAsync( + `rocm-smi -d ${gpu.index} --showuse --showmemuse --showtemp --showpower --json 2>/dev/null`, + { timeout: 5000 }, + ); + + const data = JSON.parse(stdout); + const cardKey = `card${gpu.index}`; + const cardData = data[cardKey] || {}; + + return { + id: gpu.id, + utilization: parseInt(cardData['GPU use (%)'] || '0', 10), + memoryUsed: this.parseMemory(cardData['GPU memory use (%)'] || '0'), + memoryTotal: gpu.vram, + memoryPercent: parseInt(cardData['GPU memory use (%)'] || '0', 10), + temperature: parseFloat(cardData['Temperature (Sensor edge) (C)'] || '0'), + powerUsage: parseFloat(cardData['Average Graphics Package Power (W)'] || '0'), + powerLimit: parseFloat(cardData['Max Graphics Package Power (W)'] || '0'), + lastUpdate: Date.now(), + }; + } catch { + return { + id: gpu.id, + utilization: 0, + memoryUsed: 0, + memoryTotal: gpu.vram, + memoryPercent: 0, + temperature: 0, + powerUsage: 0, + powerLimit: 0, + lastUpdate: Date.now(), + }; + } + } + + /** + * Get Intel GPU status using xpu-smi + */ + private async getIntelGpuStatus(gpu: IGpuInfo): Promise { + try { + const { stdout } = await execAsync( + `xpu-smi stats -d ${gpu.index} --json 2>/dev/null`, + { timeout: 5000 }, + ); + + const data = JSON.parse(stdout); + const stats = data.device_level || {}; + + return { + id: gpu.id, + utilization: Math.round(parseFloat(stats.gpu_utilization || '0')), + memoryUsed: Math.round(parseFloat(stats.memory_used || '0') / (1024 * 1024)), + memoryTotal: gpu.vram, + memoryPercent: Math.round(parseFloat(stats.memory_utilization || '0')), + temperature: parseFloat(stats.gpu_temperature || '0'), + powerUsage: parseFloat(stats.power || '0'), + powerLimit: 0, // Intel doesn't expose this easily + lastUpdate: Date.now(), + }; + } catch { + return { + id: gpu.id, + utilization: 0, + memoryUsed: 0, + memoryTotal: gpu.vram, + memoryPercent: 0, + temperature: 0, + powerUsage: 0, + powerLimit: 0, + lastUpdate: Date.now(), + }; + } + } + + /** + * Helper to extract PCI slot from full bus ID + */ + private extractPciSlot(pciId: string): string { + // Input: "00000000:01:00.0" -> Output: "01:00.0" + const match = pciId.match(/([0-9a-f]+:[0-9a-f]+\.[0-9a-f]+)$/i); + return match ? match[1] : pciId; + } + + /** + * Helper to parse memory values with units + */ + private parseMemory(value: string): number { + const match = value.match(/(\d+(?:\.\d+)?)\s*(B|KB|MB|GB|TB)?/i); + if (!match) return 0; + + let bytes = parseFloat(match[1]); + const unit = (match[2] || 'B').toUpperCase(); + + switch (unit) { + case 'TB': + bytes *= 1024; + // falls through + case 'GB': + bytes *= 1024; + // falls through + case 'MB': + break; // Already in MB + case 'KB': + bytes /= 1024; + break; + case 'B': + bytes /= (1024 * 1024); + break; + } + + return Math.round(bytes); + } + + /** + * Get AMD VRAM from sysfs (async) + */ + private async getAmdVramFromSysfs(pciBusId: string): Promise { + try { + const sysfsPath = `/sys/bus/pci/devices/0000:${pciBusId}/mem_info_vram_total`; + const exists = await fs.promises.access(sysfsPath).then(() => true).catch(() => false); + if (exists) { + const content = await fs.promises.readFile(sysfsPath, 'utf8'); + return Math.round(parseInt(content.trim(), 10) / (1024 * 1024)); + } + } catch { + // sysfs not available + } + return 0; + } + + /** + * Get ROCm version + */ + private async getRocmVersion(): Promise { + try { + const { stdout } = await execAsync('cat /opt/rocm/.info/version 2>/dev/null || rocminfo 2>/dev/null | grep "ROCm" | head -1'); + const match = stdout.match(/(\d+\.\d+(?:\.\d+)?)/); + return match ? match[1] : undefined; + } catch { + return undefined; + } + } + + /** + * Get oneAPI version + */ + private async getOneApiVersion(): Promise { + try { + const { stdout } = await execAsync('source /opt/intel/oneapi/setvars.sh 2>/dev/null && echo $ONEAPI_ROOT 2>/dev/null || cat /opt/intel/oneapi/compiler/latest/env/vars.sh 2>/dev/null | grep VERSION'); + const match = stdout.match(/(\d+\.\d+(?:\.\d+)?)/); + return match ? match[1] : undefined; + } catch { + return undefined; + } + } +} diff --git a/ts/hardware/index.ts b/ts/hardware/index.ts new file mode 100644 index 0000000..9a252db --- /dev/null +++ b/ts/hardware/index.ts @@ -0,0 +1,8 @@ +/** + * Hardware Detection Module + * + * Exports all hardware detection functionality. + */ + +export { GpuDetector } from './gpu-detector.ts'; +export { SystemInfo } from './system-info.ts'; diff --git a/ts/hardware/system-info.ts b/ts/hardware/system-info.ts new file mode 100644 index 0000000..e8446b0 --- /dev/null +++ b/ts/hardware/system-info.ts @@ -0,0 +1,233 @@ +/** + * System Info + * + * Gathers system information including CPU, RAM, OS, and Docker status. + */ + +import { exec } from 'node:child_process'; +import { promisify } from 'node:util'; +import * as os from 'node:os'; +import type { ISystemInfo } from '../interfaces/gpu.ts'; +import { GpuDetector } from './gpu-detector.ts'; +import { logger } from '../logger.ts'; + +const execAsync = promisify(exec); + +/** + * System Info class for gathering system information + */ +export class SystemInfo { + private gpuDetector: GpuDetector; + + constructor() { + this.gpuDetector = new GpuDetector(); + } + + /** + * Get complete system information + */ + public async getSystemInfo(): Promise { + const [gpus, dockerVersion, nvidiaContainerVersion, kernelVersion] = await Promise.all([ + this.gpuDetector.detectGpus(), + this.getDockerVersion(), + this.getNvidiaContainerVersion(), + this.getKernelVersion(), + ]); + + return { + hostname: os.hostname(), + cpuModel: this.getCpuModel(), + cpuCores: os.cpus().length, + ramTotal: Math.round(os.totalmem() / (1024 * 1024)), + ramAvailable: Math.round(os.freemem() / (1024 * 1024)), + os: this.getOsInfo(), + kernelVersion, + gpus, + dockerVersion, + nvidiaContainerVersion, + }; + } + + /** + * Get CPU model name + */ + private getCpuModel(): string { + const cpus = os.cpus(); + if (cpus.length > 0) { + return cpus[0].model; + } + return 'Unknown CPU'; + } + + /** + * Get OS information string + */ + private getOsInfo(): string { + const platform = os.platform(); + const release = os.release(); + + if (platform === 'linux') { + return `Linux ${release}`; + } else if (platform === 'darwin') { + return `macOS ${release}`; + } + + return `${platform} ${release}`; + } + + /** + * Get kernel version + */ + private async getKernelVersion(): Promise { + try { + const { stdout } = await execAsync('uname -r', { timeout: 5000 }); + return stdout.trim(); + } catch { + return os.release(); + } + } + + /** + * Get Docker version + */ + private async getDockerVersion(): Promise { + try { + const { stdout } = await execAsync('docker --version', { timeout: 5000 }); + const match = stdout.match(/Docker version (\d+\.\d+\.\d+)/); + return match ? match[1] : stdout.trim(); + } catch { + return undefined; + } + } + + /** + * Get NVIDIA Container Toolkit version + */ + private async getNvidiaContainerVersion(): Promise { + try { + const { stdout } = await execAsync('nvidia-container-cli --version 2>&1 | head -1', { timeout: 5000 }); + const match = stdout.match(/version (\d+\.\d+\.\d+)/); + return match ? match[1] : undefined; + } catch { + return undefined; + } + } + + /** + * Check if Docker is running + */ + public async isDockerRunning(): Promise { + try { + await execAsync('docker info', { timeout: 5000 }); + return true; + } catch { + return false; + } + } + + /** + * Check if NVIDIA Docker runtime is available + */ + public async isNvidiaRuntimeAvailable(): Promise { + try { + const { stdout } = await execAsync('docker info --format "{{.Runtimes}}"', { timeout: 5000 }); + return stdout.includes('nvidia'); + } catch { + return false; + } + } + + /** + * Check if Podman is available + */ + public async isPodmanAvailable(): Promise { + try { + await execAsync('podman --version', { timeout: 5000 }); + return true; + } catch { + return false; + } + } + + /** + * Get available disk space in the data directory + * @param path Directory to check + * @returns Available space in MB + */ + public async getAvailableDiskSpace(path: string = '/var/lib'): Promise { + try { + const { stdout } = await execAsync(`df -m "${path}" | tail -1 | awk '{print $4}'`, { timeout: 5000 }); + return parseInt(stdout.trim(), 10) || 0; + } catch { + return 0; + } + } + + /** + * Get system memory usage + */ + public getMemoryUsage(): { total: number; used: number; available: number; percent: number } { + const total = Math.round(os.totalmem() / (1024 * 1024)); + const available = Math.round(os.freemem() / (1024 * 1024)); + const used = total - available; + const percent = Math.round((used / total) * 100); + + return { total, used, available, percent }; + } + + /** + * Get system load average + */ + public getLoadAverage(): { load1: number; load5: number; load15: number } { + const [load1, load5, load15] = os.loadavg(); + return { + load1: Math.round(load1 * 100) / 100, + load5: Math.round(load5 * 100) / 100, + load15: Math.round(load15 * 100) / 100, + }; + } + + /** + * Print system info summary to logger + */ + public async printSystemInfo(): Promise { + const info = await this.getSystemInfo(); + + logger.logBoxTitle('System Information', 70, 'info'); + logger.logBoxLine(`Hostname: ${info.hostname}`); + logger.logBoxLine(`OS: ${info.os}`); + logger.logBoxLine(`Kernel: ${info.kernelVersion}`); + logger.logBoxLine(`CPU: ${info.cpuModel} (${info.cpuCores} cores)`); + logger.logBoxLine(`RAM: ${Math.round(info.ramTotal / 1024)} GB total, ${Math.round(info.ramAvailable / 1024)} GB available`); + logger.logBoxLine(''); + + if (info.dockerVersion) { + logger.logBoxLine(`Docker: v${info.dockerVersion}`); + } else { + logger.logBoxLine('Docker: Not installed'); + } + + if (info.nvidiaContainerVersion) { + logger.logBoxLine(`NVIDIA Container Toolkit: v${info.nvidiaContainerVersion}`); + } + + logger.logBoxLine(''); + logger.logBoxLine(`GPUs Detected: ${info.gpus.length}`); + + for (const gpu of info.gpus) { + const vramGb = Math.round(gpu.vram / 1024 * 10) / 10; + logger.logBoxLine(` ${gpu.id}: ${gpu.model} (${vramGb} GB)`); + if (gpu.driverVersion) { + logger.logBoxLine(` Driver: ${gpu.driverVersion}`); + } + if (gpu.cudaVersion) { + logger.logBoxLine(` CUDA: ${gpu.cudaVersion}`); + } + if (gpu.rocmVersion) { + logger.logBoxLine(` ROCm: ${gpu.rocmVersion}`); + } + } + + logger.logBoxEnd(); + } +} diff --git a/ts/helpers/index.ts b/ts/helpers/index.ts new file mode 100644 index 0000000..46286ae --- /dev/null +++ b/ts/helpers/index.ts @@ -0,0 +1,2 @@ +export * from './shortid.ts'; +export * from './prompt.ts'; diff --git a/ts/helpers/prompt.ts b/ts/helpers/prompt.ts new file mode 100644 index 0000000..1e93f6c --- /dev/null +++ b/ts/helpers/prompt.ts @@ -0,0 +1,55 @@ +import process from 'node:process'; + +/** + * Result from creating a prompt interface + */ +export interface IPromptInterface { + /** Function to prompt for user input */ + prompt: (question: string) => Promise; + /** Function to close the prompt interface */ + close: () => void; +} + +/** + * Create a readline prompt interface for interactive CLI input + * @returns Promise resolving to prompt function and close function + */ +export async function createPrompt(): Promise { + const readline = await import('node:readline'); + + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + + const prompt = (question: string): Promise => { + return new Promise((resolve) => { + rl.question(question, (answer: string) => { + resolve(answer); + }); + }); + }; + + const close = (): void => { + rl.close(); + process.stdin.destroy(); + }; + + return { prompt, close }; +} + +/** + * Run an async function with a prompt interface, ensuring cleanup + * @param fn Function to run with the prompt interface + * @returns Promise resolving to the function's return value + */ +export async function withPrompt( + fn: (prompt: (question: string) => Promise) => Promise, +): Promise { + const { prompt, close } = await createPrompt(); + try { + return await fn(prompt); + } finally { + close(); + } +} diff --git a/ts/helpers/shortid.ts b/ts/helpers/shortid.ts new file mode 100644 index 0000000..7283e39 --- /dev/null +++ b/ts/helpers/shortid.ts @@ -0,0 +1,22 @@ +/** + * Generate a short unique ID of 6 alphanumeric characters + * @returns A 6-character alphanumeric string + */ +export function shortId(): string { + // Define the character set: a-z, A-Z, 0-9 + const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'; + + // Generate cryptographically secure random values + const randomValues = new Uint8Array(6); + crypto.getRandomValues(randomValues); + + // Map each random value to a character in our set + let result = ''; + for (let i = 0; i < 6; i++) { + // Use modulo to map the random byte to a character index + const index = randomValues[i] % chars.length; + result += chars[index]; + } + + return result; +} diff --git a/ts/index.ts b/ts/index.ts new file mode 100644 index 0000000..01e64a9 --- /dev/null +++ b/ts/index.ts @@ -0,0 +1,40 @@ +#!/usr/bin/env node + +/** + * ModelGrid - AI Infrastructure Management + * + * Main entry point for Node.js execution. + */ + +import { ModelGridCli } from './cli.ts'; +import { logger } from './logger.ts'; +import process from 'node:process'; + +/** + * Main entry point for ModelGrid + */ +async function main() { + const cli = new ModelGridCli(); + await cli.parseAndExecute(process.argv); +} + +// Run the main function and handle any errors +main().catch((error) => { + logger.error(`Error: ${error}`); + process.exit(1); +}); + +// Export core classes for programmatic use +export { ModelGrid } from './modelgrid.ts'; +export { ModelGridCli } from './cli.ts'; +export { Daemon } from './daemon.ts'; +export { Systemd } from './systemd.ts'; + +// Export modules +export * from './interfaces/index.ts'; +export * from './hardware/index.ts'; +export * from './drivers/index.ts'; +export * from './docker/index.ts'; +export * from './containers/index.ts'; +export * from './models/index.ts'; +export * from './api/index.ts'; diff --git a/ts/interfaces/api.ts b/ts/interfaces/api.ts new file mode 100644 index 0000000..761bc02 --- /dev/null +++ b/ts/interfaces/api.ts @@ -0,0 +1,329 @@ +/** + * ModelGrid API Interfaces + * + * OpenAI-compatible API types for the ModelGrid gateway. + */ + +/** + * Chat message role + */ +export type TChatRole = 'system' | 'user' | 'assistant' | 'tool'; + +/** + * Chat message + */ +export interface IChatMessage { + /** Message role */ + role: TChatRole; + /** Message content */ + content: string; + /** Name of the participant (optional) */ + name?: string; + /** Tool calls made by the assistant (optional) */ + tool_calls?: IToolCall[]; + /** Tool call ID (for tool response messages) */ + tool_call_id?: string; +} + +/** + * Tool call from assistant + */ +export interface IToolCall { + /** Unique ID for this tool call */ + id: string; + /** Type of tool call */ + type: 'function'; + /** Function call details */ + function: { + /** Function name */ + name: string; + /** Function arguments as JSON string */ + arguments: string; + }; +} + +/** + * Tool definition for function calling + */ +export interface ITool { + /** Tool type */ + type: 'function'; + /** Function definition */ + function: { + /** Function name */ + name: string; + /** Function description */ + description: string; + /** Function parameters (JSON Schema) */ + parameters: Record; + }; +} + +/** + * Chat completion request (OpenAI-compatible) + */ +export interface IChatCompletionRequest { + /** Model to use */ + model: string; + /** Messages in the conversation */ + messages: IChatMessage[]; + /** Maximum tokens to generate */ + max_tokens?: number; + /** Sampling temperature (0-2) */ + temperature?: number; + /** Top-p sampling */ + top_p?: number; + /** Number of completions to generate */ + n?: number; + /** Whether to stream the response */ + stream?: boolean; + /** Stop sequences */ + stop?: string | string[]; + /** Presence penalty (-2 to 2) */ + presence_penalty?: number; + /** Frequency penalty (-2 to 2) */ + frequency_penalty?: number; + /** User identifier */ + user?: string; + /** Tools available for function calling */ + tools?: ITool[]; + /** Tool choice preference */ + tool_choice?: 'none' | 'auto' | { type: 'function'; function: { name: string } }; +} + +/** + * Chat completion choice + */ +export interface IChatCompletionChoice { + /** Choice index */ + index: number; + /** Generated message */ + message: IChatMessage; + /** Finish reason */ + finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null; +} + +/** + * Token usage information + */ +export interface IUsage { + /** Number of tokens in the prompt */ + prompt_tokens: number; + /** Number of tokens in the completion */ + completion_tokens: number; + /** Total tokens used */ + total_tokens: number; +} + +/** + * Chat completion response (OpenAI-compatible) + */ +export interface IChatCompletionResponse { + /** Unique ID for this completion */ + id: string; + /** Object type */ + object: 'chat.completion'; + /** Creation timestamp */ + created: number; + /** Model used */ + model: string; + /** System fingerprint */ + system_fingerprint?: string; + /** Generated choices */ + choices: IChatCompletionChoice[]; + /** Token usage */ + usage: IUsage; +} + +/** + * Chat completion chunk for streaming + */ +export interface IChatCompletionChunk { + /** Unique ID for this completion */ + id: string; + /** Object type */ + object: 'chat.completion.chunk'; + /** Creation timestamp */ + created: number; + /** Model used */ + model: string; + /** System fingerprint */ + system_fingerprint?: string; + /** Delta choices */ + choices: IChatCompletionChunkChoice[]; +} + +/** + * Streaming choice delta + */ +export interface IChatCompletionChunkChoice { + /** Choice index */ + index: number; + /** Delta content */ + delta: Partial; + /** Finish reason */ + finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null; +} + +/** + * Text completion request (legacy endpoint) + */ +export interface ICompletionRequest { + /** Model to use */ + model: string; + /** Prompt text */ + prompt: string | string[]; + /** Maximum tokens to generate */ + max_tokens?: number; + /** Sampling temperature */ + temperature?: number; + /** Top-p sampling */ + top_p?: number; + /** Number of completions */ + n?: number; + /** Whether to stream */ + stream?: boolean; + /** Stop sequences */ + stop?: string | string[]; + /** Echo prompt in response */ + echo?: boolean; +} + +/** + * Text completion response + */ +export interface ICompletionResponse { + /** Unique ID */ + id: string; + /** Object type */ + object: 'text_completion'; + /** Creation timestamp */ + created: number; + /** Model used */ + model: string; + /** Generated choices */ + choices: ICompletionChoice[]; + /** Token usage */ + usage: IUsage; +} + +/** + * Text completion choice + */ +export interface ICompletionChoice { + /** Generated text */ + text: string; + /** Choice index */ + index: number; + /** Finish reason */ + finish_reason: 'stop' | 'length' | null; +} + +/** + * Embeddings request + */ +export interface IEmbeddingsRequest { + /** Model to use */ + model: string; + /** Input text(s) */ + input: string | string[]; + /** User identifier */ + user?: string; + /** Encoding format */ + encoding_format?: 'float' | 'base64'; +} + +/** + * Embeddings response + */ +export interface IEmbeddingsResponse { + /** Object type */ + object: 'list'; + /** Embedding data */ + data: IEmbeddingData[]; + /** Model used */ + model: string; + /** Token usage */ + usage: { + prompt_tokens: number; + total_tokens: number; + }; +} + +/** + * Single embedding data + */ +export interface IEmbeddingData { + /** Object type */ + object: 'embedding'; + /** Embedding vector */ + embedding: number[]; + /** Index in the input array */ + index: number; +} + +/** + * Model information (OpenAI-compatible) + */ +export interface IModelInfo { + /** Model ID */ + id: string; + /** Object type */ + object: 'model'; + /** Creation timestamp */ + created: number; + /** Model owner/organization */ + owned_by: string; +} + +/** + * List models response + */ +export interface IListModelsResponse { + /** Object type */ + object: 'list'; + /** Available models */ + data: IModelInfo[]; +} + +/** + * API error response + */ +export interface IApiError { + /** Error details */ + error: { + /** Error message */ + message: string; + /** Error type */ + type: string; + /** Parameter that caused the error */ + param?: string; + /** Error code */ + code?: string; + }; +} + +/** + * Health check response + */ +export interface IHealthResponse { + /** Status */ + status: 'ok' | 'degraded' | 'error'; + /** Version */ + version: string; + /** Uptime in seconds */ + uptime: number; + /** Number of active containers */ + containers: number; + /** Number of available models */ + models: number; + /** Number of available GPUs */ + gpus: number; + /** Detailed status */ + details?: { + /** Container health */ + containers: Record; + /** GPU status */ + gpus: Record; + }; +} diff --git a/ts/interfaces/config.ts b/ts/interfaces/config.ts new file mode 100644 index 0000000..2ab2845 --- /dev/null +++ b/ts/interfaces/config.ts @@ -0,0 +1,121 @@ +/** + * ModelGrid Configuration Interfaces + * + * Defines the configuration structure for the ModelGrid daemon. + */ + +import type { IContainerConfig } from './container.ts'; + +/** + * API server configuration + */ +export interface IApiConfig { + /** Port to listen on (default: 8080) */ + port: number; + /** Host to bind to (default: '0.0.0.0') */ + host: string; + /** Valid API keys for authentication */ + apiKeys: string[]; + /** Rate limit in requests per minute (optional) */ + rateLimit?: number; + /** Enable CORS (default: false) */ + cors?: boolean; + /** Allowed origins for CORS */ + corsOrigins?: string[]; +} + +/** + * Docker/container runtime configuration + */ +export interface IDockerConfig { + /** Docker network name (default: 'modelgrid') */ + networkName: string; + /** Container runtime to use */ + runtime: 'docker' | 'podman'; + /** Path to docker/podman socket (optional) */ + socketPath?: string; +} + +/** + * GPU assignment configuration + */ +export interface IGpuAssignmentConfig { + /** Whether to auto-detect GPUs */ + autoDetect: boolean; + /** Manual GPU to container assignments (gpuId -> containerId) */ + assignments: Record; +} + +/** + * Model management configuration + */ +export interface IModelConfig { + /** URL to fetch greenlit models list */ + greenlistUrl: string; + /** Whether to auto-pull models when requested */ + autoPull: boolean; + /** Default container type for new models */ + defaultContainer: 'ollama' | 'vllm' | 'tgi'; + /** Models to auto-load on startup */ + autoLoad: string[]; +} + +/** + * Main ModelGrid configuration interface + */ +export interface IModelGridConfig { + /** Configuration format version */ + version: string; + /** API server configuration */ + api: IApiConfig; + /** Docker configuration */ + docker: IDockerConfig; + /** GPU configuration */ + gpus: IGpuAssignmentConfig; + /** Container configurations */ + containers: IContainerConfig[]; + /** Model management configuration */ + models: IModelConfig; + /** Health check interval in milliseconds */ + checkInterval: number; +} + +/** + * Greenlit model entry from remote list + */ +export interface IGreenlitModel { + /** Model name (e.g., "llama3:8b") */ + name: string; + /** Preferred container type */ + container: 'ollama' | 'vllm' | 'tgi'; + /** Minimum VRAM required in GB */ + minVram: number; + /** Optional tags for categorization */ + tags?: string[]; + /** Optional description */ + description?: string; +} + +/** + * Greenlit models list structure + */ +export interface IGreenlitModelsList { + /** List version */ + version: string; + /** Last updated timestamp */ + lastUpdated: string; + /** List of greenlit models */ + models: IGreenlitModel[]; +} + +/** + * Update status information + */ +export interface IUpdateStatus { + /** Current installed version */ + currentVersion: string; + /** Latest available version */ + latestVersion: string; + /** Whether an update is available */ + updateAvailable: boolean; +} diff --git a/ts/interfaces/container.ts b/ts/interfaces/container.ts new file mode 100644 index 0000000..636ec93 --- /dev/null +++ b/ts/interfaces/container.ts @@ -0,0 +1,176 @@ +/** + * ModelGrid Container Interfaces + * + * Defines types for container management (Ollama, vLLM, TGI). + */ + +/** + * Container type + */ +export type TContainerType = 'ollama' | 'vllm' | 'tgi' | 'custom'; + +/** + * Container health status + */ +export type TContainerHealth = 'healthy' | 'unhealthy' | 'starting' | 'unknown'; + +/** + * Container run status + */ +export type TContainerRunStatus = 'running' | 'stopped' | 'starting' | 'stopping' | 'error'; + +/** + * Container configuration + */ +export interface IContainerConfig { + /** Unique identifier for this container */ + id: string; + /** Container type */ + type: TContainerType; + /** Friendly name for the container */ + name: string; + /** Docker image to use */ + image: string; + /** GPU IDs to assign to this container */ + gpuIds: string[]; + /** Internal port the container listens on */ + port: number; + /** External port to expose (optional, uses internal port if not specified) */ + externalPort?: number; + /** Models to pre-load in this container */ + models: string[]; + /** Environment variables */ + env?: Record; + /** Volume mounts (host:container format) */ + volumes?: string[]; + /** Whether to auto-start this container */ + autoStart: boolean; + /** Restart policy */ + restartPolicy: 'no' | 'always' | 'on-failure' | 'unless-stopped'; + /** Maximum restart attempts (for on-failure policy) */ + maxRestarts?: number; + /** Memory limit (e.g., "16g") */ + memoryLimit?: string; + /** CPU limit (e.g., "4") */ + cpuLimit?: string; + /** Custom command arguments */ + command?: string[]; +} + +/** + * Container status information + */ +export interface IContainerStatus { + /** Container ID */ + id: string; + /** Docker container ID */ + dockerId?: string; + /** Container name */ + name: string; + /** Container type */ + type: TContainerType; + /** Whether the container is running */ + running: boolean; + /** Run status */ + runStatus: TContainerRunStatus; + /** Health status */ + health: TContainerHealth; + /** Health check message */ + healthMessage?: string; + /** GPU utilization (if assigned) */ + gpuUtilization?: number; + /** Memory usage in MB */ + memoryUsage?: number; + /** CPU usage percentage */ + cpuUsage?: number; + /** List of currently loaded models */ + loadedModels: string[]; + /** Container uptime in seconds */ + uptime?: number; + /** Container start time */ + startTime?: number; + /** Number of requests served */ + requestsServed?: number; + /** Last error message (if any) */ + lastError?: string; + /** Assigned GPU IDs */ + assignedGpus: string[]; + /** Internal endpoint URL */ + endpoint: string; +} + +/** + * Model loaded in a container + */ +export interface ILoadedModel { + /** Model name */ + name: string; + /** Model size in bytes */ + size: number; + /** Model format/quantization */ + format?: string; + /** Whether the model is currently loaded in memory */ + loaded: boolean; + /** Last used timestamp */ + lastUsed?: number; + /** Number of requests served by this model */ + requestCount: number; +} + +/** + * Container endpoint for API routing + */ +export interface IContainerEndpoint { + /** Container ID */ + containerId: string; + /** Container type */ + type: TContainerType; + /** Endpoint URL */ + url: string; + /** List of models available at this endpoint */ + models: string[]; + /** Whether the endpoint is healthy */ + healthy: boolean; + /** Priority for load balancing (lower = higher priority) */ + priority: number; +} + +/** + * Container creation options + */ +export interface IContainerCreateOptions { + /** Container type */ + type: TContainerType; + /** Friendly name */ + name: string; + /** GPU IDs to assign */ + gpuIds: string[]; + /** Models to pre-load */ + models?: string[]; + /** Custom image (optional, uses default for type) */ + image?: string; + /** Custom port (optional, uses default for type) */ + port?: number; + /** Environment variables */ + env?: Record; + /** Volume mounts */ + volumes?: string[]; + /** Auto-start on daemon startup */ + autoStart?: boolean; +} + +/** + * Container logs options + */ +export interface IContainerLogsOptions { + /** Container ID */ + containerId: string; + /** Number of lines to return (default: 100) */ + lines?: number; + /** Follow logs in real-time */ + follow?: boolean; + /** Include timestamps */ + timestamps?: boolean; + /** Filter by log level */ + level?: 'all' | 'error' | 'warn' | 'info' | 'debug'; +} diff --git a/ts/interfaces/gpu.ts b/ts/interfaces/gpu.ts new file mode 100644 index 0000000..8b8c108 --- /dev/null +++ b/ts/interfaces/gpu.ts @@ -0,0 +1,132 @@ +/** + * ModelGrid GPU Interfaces + * + * Defines types for GPU detection and management. + */ + +/** + * GPU vendor type + */ +export type TGpuVendor = 'nvidia' | 'amd' | 'intel' | 'unknown'; + +/** + * GPU information detected from the system + */ +export interface IGpuInfo { + /** Unique identifier for this GPU */ + id: string; + /** GPU vendor */ + vendor: TGpuVendor; + /** GPU model name (e.g., "NVIDIA GeForce RTX 4090") */ + model: string; + /** Total VRAM in MB */ + vram: number; + /** Driver version (if available) */ + driverVersion?: string; + /** CUDA version (NVIDIA only) */ + cudaVersion?: string; + /** Compute capability (NVIDIA only, e.g., "8.9") */ + computeCapability?: string; + /** ROCm version (AMD only) */ + rocmVersion?: string; + /** oneAPI version (Intel only) */ + oneApiVersion?: string; + /** PCI slot identifier */ + pciSlot: string; + /** PCI bus ID (e.g., "0000:01:00.0") */ + pciBusId?: string; + /** GPU index in the system */ + index: number; +} + +/** + * Real-time GPU status + */ +export interface IGpuStatus { + /** GPU identifier */ + id: string; + /** Current GPU utilization percentage (0-100) */ + utilization: number; + /** Current memory usage in MB */ + memoryUsed: number; + /** Total memory in MB */ + memoryTotal: number; + /** Memory usage percentage */ + memoryPercent: number; + /** Current temperature in Celsius */ + temperature: number; + /** Current power usage in Watts */ + powerUsage: number; + /** Power limit in Watts */ + powerLimit: number; + /** Fan speed percentage (if available) */ + fanSpeed?: number; + /** GPU clock speed in MHz */ + gpuClock?: number; + /** Memory clock speed in MHz */ + memoryClock?: number; + /** Last update timestamp */ + lastUpdate: number; +} + +/** + * Combined GPU information and status + */ +export interface IGpuFullStatus extends IGpuInfo { + /** Real-time status */ + status: IGpuStatus; + /** Container ID assigned to this GPU (if any) */ + assignedContainer?: string; + /** Whether the GPU is available for use */ + available: boolean; + /** Health status */ + health: 'healthy' | 'warning' | 'error' | 'unknown'; + /** Health message (if warning or error) */ + healthMessage?: string; +} + +/** + * System information including all GPUs + */ +export interface ISystemInfo { + /** System hostname */ + hostname: string; + /** CPU model name */ + cpuModel: string; + /** Number of CPU cores */ + cpuCores: number; + /** Total RAM in MB */ + ramTotal: number; + /** Available RAM in MB */ + ramAvailable: number; + /** Operating system */ + os: string; + /** Kernel version */ + kernelVersion: string; + /** List of detected GPUs */ + gpus: IGpuInfo[]; + /** Docker version (if installed) */ + dockerVersion?: string; + /** NVIDIA Container Toolkit version (if installed) */ + nvidiaContainerVersion?: string; +} + +/** + * Driver status for a vendor + */ +export interface IDriverStatus { + /** GPU vendor */ + vendor: TGpuVendor; + /** Whether the driver is installed */ + installed: boolean; + /** Driver version (if installed) */ + version?: string; + /** CUDA/ROCm/oneAPI toolkit version (if installed) */ + toolkitVersion?: string; + /** Container runtime support (e.g., nvidia-docker) */ + containerSupport: boolean; + /** Container runtime version */ + containerRuntimeVersion?: string; + /** List of detected issues */ + issues: string[]; +} diff --git a/ts/interfaces/index.ts b/ts/interfaces/index.ts new file mode 100644 index 0000000..d5afd84 --- /dev/null +++ b/ts/interfaces/index.ts @@ -0,0 +1,11 @@ +/** + * ModelGrid Interfaces + * + * Central export for all TypeScript interfaces used throughout ModelGrid. + */ + +export * from './config.ts'; +export * from './gpu.ts'; +export * from './container.ts'; +export * from './api.ts'; +export * from './modelgrid-accessor.ts'; diff --git a/ts/interfaces/modelgrid-accessor.ts b/ts/interfaces/modelgrid-accessor.ts new file mode 100644 index 0000000..7240f16 --- /dev/null +++ b/ts/interfaces/modelgrid-accessor.ts @@ -0,0 +1,31 @@ +/** + * ModelGrid Accessor Interface + * + * Interface to break circular dependencies between ModelGrid and its submodules. + */ + +import type { IUpdateStatus } from './config.ts'; + +/** + * Interface for accessing ModelGrid instance from submodules + * This breaks the circular dependency between ModelGrid and its managers + */ +export interface IModelGridAccessor { + /** + * Get the current version of ModelGrid + * @returns The current version string + */ + getVersion(): string; + + /** + * Get the update status + * @returns Object with current version, latest version, and update availability + */ + getUpdateStatus(): IUpdateStatus; + + /** + * Check for updates + * @returns Promise resolving to true if an update is available + */ + checkForUpdates(): Promise; +} diff --git a/ts/logger.ts b/ts/logger.ts new file mode 100644 index 0000000..ea9ca4e --- /dev/null +++ b/ts/logger.ts @@ -0,0 +1,334 @@ +import { symbols, theme } from './colors.ts'; + +/** + * Table column alignment options + */ +export type TColumnAlign = 'left' | 'right' | 'center'; + +/** + * Table column definition + */ +export interface ITableColumn { + /** Column header text */ + header: string; + /** Column key in data object */ + key: string; + /** Column alignment (default: left) */ + align?: TColumnAlign; + /** Column width (auto-calculated if not specified) */ + width?: number; + /** Color function to apply to cell values */ + color?: (value: string) => string; +} + +/** + * Box style types with colors + */ +export type TBoxStyle = 'default' | 'success' | 'error' | 'warning' | 'info'; + +/** + * A simple logger class that provides consistent formatting for log messages + * including support for logboxes with title, lines, and closing + */ +export class Logger { + private currentBoxWidth: number | null = null; + private currentBoxStyle: TBoxStyle = 'default'; + private static instance: Logger; + + /** Default width to use when no width is specified */ + private readonly DEFAULT_WIDTH = 60; + + /** + * Creates a new Logger instance + */ + constructor() { + this.currentBoxWidth = null; + } + + /** + * Get the singleton logger instance + * @returns The singleton logger instance + */ + public static getInstance(): Logger { + if (!Logger.instance) { + Logger.instance = new Logger(); + } + return Logger.instance; + } + + /** + * Log a message + * @param message Message to log + */ + public log(message: string): void { + console.log(message); + } + + /** + * Log an error message (red with X symbol) + * @param message Error message to log + */ + public error(message: string): void { + console.error(`${symbols.error} ${theme.error(message)}`); + } + + /** + * Log a warning message (yellow with warning symbol) + * @param message Warning message to log + */ + public warn(message: string): void { + console.warn(`${symbols.warning} ${theme.warning(message)}`); + } + + /** + * Log a success message (green with checkmark symbol) + * @param message Success message to log + */ + public success(message: string): void { + console.log(`${symbols.success} ${theme.success(message)}`); + } + + /** + * Log an info message (cyan with info symbol) + * @param message Info message to log + */ + public info(message: string): void { + console.log(`${symbols.info} ${theme.info(message)}`); + } + + /** + * Log a dim/secondary message + * @param message Message to log in dim style + */ + public dim(message: string): void { + console.log(theme.dim(message)); + } + + /** + * Log a highlighted/bold message + * @param message Message to highlight + */ + public highlight(message: string): void { + console.log(theme.highlight(message)); + } + + /** + * Get color function for box based on style + */ + private getBoxColor(style: TBoxStyle): (text: string) => string { + switch (style) { + case 'success': + return theme.borderSuccess; + case 'error': + return theme.borderError; + case 'warning': + return theme.borderWarning; + case 'info': + return theme.borderInfo; + case 'default': + default: + return theme.borderDefault; + } + } + + /** + * Log a logbox title and set the current box width + * @param title Title of the logbox + * @param width Width of the logbox (including borders), defaults to DEFAULT_WIDTH + * @param style Box style for coloring (default, success, error, warning, info) + */ + public logBoxTitle(title: string, width?: number, style?: TBoxStyle): void { + this.currentBoxWidth = width || this.DEFAULT_WIDTH; + this.currentBoxStyle = style || 'default'; + + const colorFn = this.getBoxColor(this.currentBoxStyle); + + // Create the title line with appropriate padding + const paddedTitle = ` ${title} `; + const remainingSpace = this.currentBoxWidth - 3 - paddedTitle.length; + + // Title line: +- Title ---+ + const titleLine = `┌─${paddedTitle}${'─'.repeat(Math.max(0, remainingSpace))}┐`; + + console.log(colorFn(titleLine)); + } + + /** + * Log a logbox line + * @param content Content of the line + * @param width Optional width override. If not provided, uses the current box width or DEFAULT_WIDTH. + */ + public logBoxLine(content: string, width?: number): void { + if (!this.currentBoxWidth && !width) { + // No current width and no width provided, use default width + this.logBoxTitle('', this.DEFAULT_WIDTH); + } + + const boxWidth = width || this.currentBoxWidth || this.DEFAULT_WIDTH; + const colorFn = this.getBoxColor(this.currentBoxStyle); + + // Calculate the available space for content (use visible length) + const availableSpace = boxWidth - 2; // Account for left and right borders + const visibleLen = this.visibleLength(content); + + if (visibleLen <= availableSpace - 1) { + // If content fits with at least one space for the right border stripe + const padding = availableSpace - visibleLen - 1; + const line = `│ ${content}${' '.repeat(padding)}│`; + console.log(colorFn(line)); + } else { + // Content is too long, let it flow out of boundaries. + const line = `│ ${content}`; + console.log(colorFn(line)); + } + } + + /** + * Log a logbox end + * @param width Optional width override. If not provided, uses the current box width or DEFAULT_WIDTH. + */ + public logBoxEnd(width?: number): void { + const boxWidth = width || this.currentBoxWidth || this.DEFAULT_WIDTH; + const colorFn = this.getBoxColor(this.currentBoxStyle); + + // Create the bottom border + const bottomLine = `└${'─'.repeat(boxWidth - 2)}┘`; + console.log(colorFn(bottomLine)); + + // Reset the current box width and style + this.currentBoxWidth = null; + this.currentBoxStyle = 'default'; + } + + /** + * Log a complete logbox with title, content lines, and ending + * @param title Title of the logbox + * @param lines Array of content lines + * @param width Width of the logbox, defaults to DEFAULT_WIDTH + * @param style Box style for coloring + */ + public logBox(title: string, lines: string[], width?: number, style?: TBoxStyle): void { + this.logBoxTitle(title, width || this.DEFAULT_WIDTH, style); + + for (const line of lines) { + this.logBoxLine(line); + } + + this.logBoxEnd(); + } + + /** + * Log a divider line + * @param width Width of the divider, defaults to DEFAULT_WIDTH + * @param character Character to use for the divider (default: -) + */ + public logDivider(width?: number, character: string = '─'): void { + console.log(character.repeat(width || this.DEFAULT_WIDTH)); + } + + /** + * Strip ANSI color codes from string for accurate length calculation + */ + private stripAnsi(text: string): string { + // Remove ANSI escape codes (intentional control character regex) + // deno-lint-ignore no-control-regex + return text.replace(/\x1b\[[0-9;]*m/g, ''); + } + + /** + * Get visible length of string (excluding ANSI codes) + */ + private visibleLength(text: string): number { + return this.stripAnsi(text).length; + } + + /** + * Align text within a column (handles ANSI color codes correctly) + */ + private alignText(text: string, width: number, align: TColumnAlign = 'left'): string { + const visibleLen = this.visibleLength(text); + + if (visibleLen >= width) { + // Text is too long, truncate the visible part + const stripped = this.stripAnsi(text); + return stripped.substring(0, width); + } + + const padding = width - visibleLen; + + switch (align) { + case 'right': + return ' '.repeat(padding) + text; + case 'center': { + const leftPad = Math.floor(padding / 2); + const rightPad = padding - leftPad; + return ' '.repeat(leftPad) + text + ' '.repeat(rightPad); + } + case 'left': + default: + return text + ' '.repeat(padding); + } + } + + /** + * Log a formatted table + * @param columns Column definitions + * @param rows Array of data objects + * @param title Optional table title + */ + public logTable(columns: ITableColumn[], rows: Record[], title?: string): void { + if (rows.length === 0) { + this.dim('No data to display'); + return; + } + + // Calculate column widths + const columnWidths = columns.map((col) => { + if (col.width) return col.width; + + // Auto-calculate width based on header and data (use visible length) + let maxWidth = this.visibleLength(col.header); + for (const row of rows) { + const value = String(row[col.key] || ''); + maxWidth = Math.max(maxWidth, this.visibleLength(value)); + } + return maxWidth; + }); + + // Calculate total table width + const totalWidth = columnWidths.reduce((sum, w) => sum + w, 0) + (columns.length * 3) + 1; + + // Print title if provided + if (title) { + this.logBoxTitle(title, totalWidth); + } else { + // Print top border + console.log('┌' + columnWidths.map((w) => '─'.repeat(w + 2)).join('┬') + '┐'); + } + + // Print header row + const headerCells = columns.map((col, i) => + theme.highlight(this.alignText(col.header, columnWidths[i], col.align)) + ); + console.log('│ ' + headerCells.join(' │ ') + ' │'); + + // Print separator + console.log('├' + columnWidths.map((w) => '─'.repeat(w + 2)).join('┼') + '┤'); + + // Print data rows + for (const row of rows) { + const cells = columns.map((col, i) => { + const value = String(row[col.key] || ''); + const aligned = this.alignText(value, columnWidths[i], col.align); + return col.color ? col.color(aligned) : aligned; + }); + console.log('│ ' + cells.join(' │ ') + ' │'); + } + + // Print bottom border + console.log('└' + columnWidths.map((w) => '─'.repeat(w + 2)).join('┴') + '┘'); + } +} + +// Export a singleton instance for easy use +export const logger = Logger.getInstance(); diff --git a/ts/modelgrid.ts b/ts/modelgrid.ts new file mode 100644 index 0000000..4fc5781 --- /dev/null +++ b/ts/modelgrid.ts @@ -0,0 +1,260 @@ +/** + * ModelGrid + * + * Main coordinator class for the ModelGrid system. + */ + +import type { IModelGridConfig } from './interfaces/config.ts'; +import { logger } from './logger.ts'; +import { PATHS, VERSION } from './constants.ts'; +import { Systemd } from './systemd.ts'; +import { Daemon } from './daemon.ts'; +import { GpuDetector } from './hardware/gpu-detector.ts'; +import { SystemInfo } from './hardware/system-info.ts'; +import { DriverManager } from './drivers/driver-manager.ts'; +import { DockerManager } from './docker/docker-manager.ts'; +import { ContainerManager } from './containers/container-manager.ts'; +import { ModelRegistry } from './models/registry.ts'; +import { ModelLoader } from './models/loader.ts'; +import { GpuHandler } from './cli/gpu-handler.ts'; +import { ContainerHandler } from './cli/container-handler.ts'; +import { ModelHandler } from './cli/model-handler.ts'; +import { ConfigHandler } from './cli/config-handler.ts'; +import { ServiceHandler } from './cli/service-handler.ts'; +import * as fs from 'node:fs/promises'; + +/** + * ModelGrid - Main application coordinator + */ +export class ModelGrid { + private config?: IModelGridConfig; + private systemd: Systemd; + private daemon: Daemon; + private gpuDetector: GpuDetector; + private systemInfo: SystemInfo; + private driverManager: DriverManager; + private dockerManager: DockerManager; + private containerManager: ContainerManager; + private modelRegistry: ModelRegistry; + private modelLoader?: ModelLoader; + + // CLI Handlers + private gpuHandler: GpuHandler; + private containerHandler: ContainerHandler; + private modelHandler: ModelHandler; + private configHandler: ConfigHandler; + private serviceHandler: ServiceHandler; + + constructor() { + // Initialize core components + this.gpuDetector = new GpuDetector(); + this.systemInfo = new SystemInfo(); + this.driverManager = new DriverManager(); + this.dockerManager = new DockerManager(); + this.containerManager = new ContainerManager(); + this.modelRegistry = new ModelRegistry(); + this.systemd = new Systemd(); + this.daemon = new Daemon(this); + + // Initialize CLI handlers + this.gpuHandler = new GpuHandler(); + this.containerHandler = new ContainerHandler(this.containerManager); + this.modelHandler = new ModelHandler(this.containerManager, this.modelRegistry); + this.configHandler = new ConfigHandler(); + this.serviceHandler = new ServiceHandler(this); + } + + /** + * Load configuration from file + */ + public async loadConfig(): Promise { + try { + const configContent = await fs.readFile(PATHS.CONFIG_FILE, 'utf-8'); + this.config = JSON.parse(configContent) as IModelGridConfig; + logger.dim(`Configuration loaded from ${PATHS.CONFIG_FILE}`); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + throw new Error(`Configuration file not found: ${PATHS.CONFIG_FILE}`); + } + throw error; + } + } + + /** + * Save configuration to file + */ + public async saveConfig(): Promise { + if (!this.config) { + throw new Error('No configuration to save'); + } + + await fs.mkdir(PATHS.CONFIG_DIR, { recursive: true }); + await fs.writeFile(PATHS.CONFIG_FILE, JSON.stringify(this.config, null, 2)); + logger.dim(`Configuration saved to ${PATHS.CONFIG_FILE}`); + } + + /** + * Get current configuration + */ + public getConfig(): IModelGridConfig | undefined { + return this.config; + } + + /** + * Set configuration + */ + public setConfig(config: IModelGridConfig): void { + this.config = config; + } + + /** + * Get version string + */ + public getVersion(): string { + return VERSION; + } + + /** + * Get Systemd instance + */ + public getSystemd(): Systemd { + return this.systemd; + } + + /** + * Get Daemon instance + */ + public getDaemon(): Daemon { + return this.daemon; + } + + /** + * Get GPU Detector instance + */ + public getGpuDetector(): GpuDetector { + return this.gpuDetector; + } + + /** + * Get System Info instance + */ + public getSystemInfo(): SystemInfo { + return this.systemInfo; + } + + /** + * Get Driver Manager instance + */ + public getDriverManager(): DriverManager { + return this.driverManager; + } + + /** + * Get Docker Manager instance + */ + public getDockerManager(): DockerManager { + return this.dockerManager; + } + + /** + * Get Container Manager instance + */ + public getContainerManager(): ContainerManager { + return this.containerManager; + } + + /** + * Get Model Registry instance + */ + public getModelRegistry(): ModelRegistry { + return this.modelRegistry; + } + + /** + * Get Model Loader instance + */ + public getModelLoader(): ModelLoader { + if (!this.modelLoader) { + this.modelLoader = new ModelLoader(this.modelRegistry, this.containerManager); + } + return this.modelLoader; + } + + // CLI Handlers + + /** + * Get GPU Handler + */ + public getGpuHandler(): GpuHandler { + return this.gpuHandler; + } + + /** + * Get Container Handler + */ + public getContainerHandler(): ContainerHandler { + return this.containerHandler; + } + + /** + * Get Model Handler + */ + public getModelHandler(): ModelHandler { + return this.modelHandler; + } + + /** + * Get Config Handler + */ + public getConfigHandler(): ConfigHandler { + return this.configHandler; + } + + /** + * Get Service Handler + */ + public getServiceHandler(): ServiceHandler { + return this.serviceHandler; + } + + /** + * Initialize the ModelGrid system + */ + public async initialize(): Promise { + // Load configuration + await this.loadConfig(); + + if (!this.config) { + throw new Error('Failed to load configuration'); + } + + // Initialize containers from config + for (const containerConfig of this.config.containers) { + await this.containerManager.addContainer(containerConfig); + } + + // Initialize model registry + this.modelRegistry.setGreenlistUrl(this.config.models.greenlistUrl); + + // Create model loader + this.modelLoader = new ModelLoader( + this.modelRegistry, + this.containerManager, + this.config.models.autoPull, + ); + + logger.success('ModelGrid initialized'); + } + + /** + * Shutdown the ModelGrid system + */ + public async shutdown(): Promise { + logger.info('Shutting down ModelGrid...'); + + // Stop all containers + await this.containerManager.stopAll(); + + logger.success('ModelGrid shutdown complete'); + } +} diff --git a/ts/models/index.ts b/ts/models/index.ts new file mode 100644 index 0000000..8f53c4d --- /dev/null +++ b/ts/models/index.ts @@ -0,0 +1,8 @@ +/** + * Model Management Module + * + * Exports model registry and loader functionality. + */ + +export { ModelRegistry } from './registry.ts'; +export { ModelLoader } from './loader.ts'; diff --git a/ts/models/loader.ts b/ts/models/loader.ts new file mode 100644 index 0000000..02d050c --- /dev/null +++ b/ts/models/loader.ts @@ -0,0 +1,291 @@ +/** + * Model Loader + * + * Handles automatic model loading with greenlist validation. + */ + +import type { TContainerType } from '../interfaces/container.ts'; +import { logger } from '../logger.ts'; +import { ModelRegistry } from './registry.ts'; +import { ContainerManager } from '../containers/container-manager.ts'; +import { GpuDetector } from '../hardware/gpu-detector.ts'; + +/** + * Model load result + */ +export interface IModelLoadResult { + success: boolean; + model: string; + container?: string; + error?: string; + alreadyLoaded?: boolean; +} + +/** + * Model loader with greenlist validation + */ +export class ModelLoader { + private registry: ModelRegistry; + private containerManager: ContainerManager; + private gpuDetector: GpuDetector; + private autoPull: boolean; + + constructor( + registry: ModelRegistry, + containerManager: ContainerManager, + autoPull: boolean = true, + ) { + this.registry = registry; + this.containerManager = containerManager; + this.gpuDetector = new GpuDetector(); + this.autoPull = autoPull; + } + + /** + * Load a model with greenlist validation + */ + public async loadModel(modelName: string): Promise { + logger.info(`Loading model: ${modelName}`); + + // Step 1: Check if model is already loaded in any container + const container = await this.containerManager.findContainerForModel(modelName); + if (container) { + logger.dim(`Model ${modelName} is already available in container ${container.getConfig().id}`); + return { + success: true, + model: modelName, + container: container.getConfig().id, + alreadyLoaded: true, + }; + } + + // Step 2: Check if model is greenlit + const isGreenlit = await this.registry.isModelGreenlit(modelName); + if (!isGreenlit) { + logger.error(`Model ${modelName} is not in the greenlit list`); + logger.info('Only greenlit models can be auto-pulled for security reasons.'); + logger.info('Contact your administrator to add this model to the greenlist.'); + return { + success: false, + model: modelName, + error: `Model "${modelName}" is not greenlit. Request via admin or add to greenlist.`, + }; + } + + // Step 3: Get model info from greenlist + const modelInfo = await this.registry.getGreenlitModel(modelName); + if (!modelInfo) { + return { + success: false, + model: modelName, + error: 'Failed to get model info from greenlist', + }; + } + + // Step 4: Check VRAM requirements + const gpus = await this.gpuDetector.detectGpus(); + const totalVram = gpus.reduce((sum, gpu) => sum + gpu.vram, 0); + const totalVramGb = Math.round(totalVram / 1024); + + if (modelInfo.minVram > totalVramGb) { + logger.error(`Insufficient VRAM for model ${modelName}`); + logger.info(`Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`); + return { + success: false, + model: modelName, + error: `Insufficient VRAM. Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`, + }; + } + + // Step 5: Find or create appropriate container + const containerType = modelInfo.container; + let targetContainer = await this.findAvailableContainer(containerType); + + if (!targetContainer) { + logger.warn(`No ${containerType} container available`); + + // Could auto-create container here if desired + return { + success: false, + model: modelName, + error: `No ${containerType} container available to load model`, + }; + } + + // Step 6: Pull the model if auto-pull is enabled + if (this.autoPull) { + logger.info(`Pulling model ${modelName} to ${containerType} container...`); + + const pullSuccess = await targetContainer.pullModel(modelName, (progress) => { + const percent = progress.percent !== undefined ? ` (${progress.percent}%)` : ''; + logger.dim(` ${progress.status}${percent}`); + }); + + if (!pullSuccess) { + return { + success: false, + model: modelName, + error: 'Failed to pull model', + }; + } + } + + logger.success(`Model ${modelName} loaded successfully`); + return { + success: true, + model: modelName, + container: targetContainer.getConfig().id, + }; + } + + /** + * Find an available container of the specified type + */ + private async findAvailableContainer( + containerType: TContainerType, + ): Promise { + const containers = this.containerManager.getAllContainers(); + + for (const container of containers) { + if (container.type !== containerType) { + continue; + } + + const status = await container.getStatus(); + if (status.running) { + return container; + } + } + + // No running container found, try to start one + for (const container of containers) { + if (container.type !== containerType) { + continue; + } + + logger.info(`Starting ${containerType} container: ${container.getConfig().name}`); + const started = await container.start(); + if (started) { + return container; + } + } + + return null; + } + + /** + * Preload a list of models + */ + public async preloadModels(modelNames: string[]): Promise> { + const results = new Map(); + + for (const modelName of modelNames) { + const result = await this.loadModel(modelName); + results.set(modelName, result); + + if (!result.success) { + logger.warn(`Failed to preload model: ${modelName}`); + } + } + + return results; + } + + /** + * Unload a model from a container + */ + public async unloadModel(modelName: string): Promise { + const container = await this.containerManager.findContainerForModel(modelName); + if (!container) { + logger.warn(`Model ${modelName} not found in any container`); + return false; + } + + return container.removeModel(modelName); + } + + /** + * Check if auto-pull is enabled + */ + public isAutoPullEnabled(): boolean { + return this.autoPull; + } + + /** + * Enable or disable auto-pull + */ + public setAutoPull(enabled: boolean): void { + this.autoPull = enabled; + } + + /** + * Get loading recommendations for available VRAM + */ + public async getRecommendations(): Promise<{ + canLoad: string[]; + cannotLoad: string[]; + loaded: string[]; + }> { + const gpus = await this.gpuDetector.detectGpus(); + const totalVramGb = Math.round(gpus.reduce((sum, gpu) => sum + gpu.vram, 0) / 1024); + + const allModels = await this.registry.getAllGreenlitModels(); + const availableModels = await this.containerManager.getAllAvailableModels(); + const loadedNames = new Set(availableModels.keys()); + + const canLoad: string[] = []; + const cannotLoad: string[] = []; + const loaded: string[] = []; + + for (const model of allModels) { + if (loadedNames.has(model.name)) { + loaded.push(model.name); + } else if (model.minVram <= totalVramGb) { + canLoad.push(model.name); + } else { + cannotLoad.push(model.name); + } + } + + return { canLoad, cannotLoad, loaded }; + } + + /** + * Print loading status + */ + public async printStatus(): Promise { + const recommendations = await this.getRecommendations(); + + logger.logBoxTitle('Model Loading Status', 60, 'info'); + + logger.logBoxLine(`Loaded Models (${recommendations.loaded.length}):`); + if (recommendations.loaded.length > 0) { + for (const model of recommendations.loaded) { + logger.logBoxLine(` - ${model}`); + } + } else { + logger.logBoxLine(' None'); + } + + logger.logBoxLine(''); + logger.logBoxLine(`Available to Load (${recommendations.canLoad.length}):`); + for (const model of recommendations.canLoad.slice(0, 5)) { + logger.logBoxLine(` - ${model}`); + } + if (recommendations.canLoad.length > 5) { + logger.logBoxLine(` ... and ${recommendations.canLoad.length - 5} more`); + } + + logger.logBoxLine(''); + logger.logBoxLine(`Insufficient VRAM (${recommendations.cannotLoad.length}):`); + for (const model of recommendations.cannotLoad.slice(0, 3)) { + const info = await this.registry.getGreenlitModel(model); + logger.logBoxLine(` - ${model} (needs ${info?.minVram || '?'}GB)`); + } + if (recommendations.cannotLoad.length > 3) { + logger.logBoxLine(` ... and ${recommendations.cannotLoad.length - 3} more`); + } + + logger.logBoxEnd(); + } +} diff --git a/ts/models/registry.ts b/ts/models/registry.ts new file mode 100644 index 0000000..ee4882e --- /dev/null +++ b/ts/models/registry.ts @@ -0,0 +1,252 @@ +/** + * Model Registry + * + * Manages the greenlit model list and model availability. + */ + +import type { IGreenlitModel, IGreenlitModelsList } from '../interfaces/config.ts'; +import type { TContainerType } from '../interfaces/container.ts'; +import { MODEL_REGISTRY, TIMING } from '../constants.ts'; +import { logger } from '../logger.ts'; + +/** + * Model registry for managing greenlit models + */ +export class ModelRegistry { + private greenlistUrl: string; + private cachedGreenlist: IGreenlitModelsList | null = null; + private cacheTime: number = 0; + + constructor(greenlistUrl: string = MODEL_REGISTRY.DEFAULT_GREENLIST_URL) { + this.greenlistUrl = greenlistUrl; + } + + /** + * Set the greenlist URL + */ + public setGreenlistUrl(url: string): void { + this.greenlistUrl = url; + this.cachedGreenlist = null; + this.cacheTime = 0; + } + + /** + * Fetch the greenlit model list from remote URL + */ + public async fetchGreenlist(forceRefresh: boolean = false): Promise { + // Return cached data if still valid + if ( + !forceRefresh && + this.cachedGreenlist && + Date.now() - this.cacheTime < TIMING.GREENLIST_CACHE_DURATION_MS + ) { + return this.cachedGreenlist; + } + + try { + logger.dim(`Fetching greenlit models from: ${this.greenlistUrl}`); + + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 30000); + + const response = await fetch(this.greenlistUrl, { + signal: controller.signal, + headers: { + 'Accept': 'application/json', + 'User-Agent': 'ModelGrid/1.0', + }, + }); + + clearTimeout(timeout); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + const greenlist = await response.json() as IGreenlitModelsList; + + // Validate structure + if (!greenlist.models || !Array.isArray(greenlist.models)) { + throw new Error('Invalid greenlist format: missing models array'); + } + + // Cache the result + this.cachedGreenlist = greenlist; + this.cacheTime = Date.now(); + + logger.dim(`Loaded ${greenlist.models.length} greenlit models`); + return greenlist; + } catch (error) { + logger.warn(`Failed to fetch greenlist: ${error instanceof Error ? error.message : String(error)}`); + + // Return fallback if we have no cache + if (!this.cachedGreenlist) { + logger.dim('Using fallback greenlist'); + return this.getFallbackGreenlist(); + } + + // Return stale cache + return this.cachedGreenlist; + } + } + + /** + * Get fallback greenlist + */ + private getFallbackGreenlist(): IGreenlitModelsList { + return { + version: '1.0', + lastUpdated: new Date().toISOString(), + models: MODEL_REGISTRY.FALLBACK_GREENLIST as unknown as IGreenlitModel[], + }; + } + + /** + * Check if a model is greenlit + */ + public async isModelGreenlit(modelName: string): Promise { + const greenlist = await this.fetchGreenlist(); + return greenlist.models.some((m) => this.normalizeModelName(m.name) === this.normalizeModelName(modelName)); + } + + /** + * Get greenlit model info + */ + public async getGreenlitModel(modelName: string): Promise { + const greenlist = await this.fetchGreenlist(); + const normalized = this.normalizeModelName(modelName); + return greenlist.models.find((m) => this.normalizeModelName(m.name) === normalized) || null; + } + + /** + * Get all greenlit models + */ + public async getAllGreenlitModels(): Promise { + const greenlist = await this.fetchGreenlist(); + return greenlist.models; + } + + /** + * Get greenlit models by container type + */ + public async getModelsByContainer(containerType: TContainerType): Promise { + const greenlist = await this.fetchGreenlist(); + return greenlist.models.filter((m) => m.container === containerType); + } + + /** + * Get greenlit models that fit within VRAM limit + */ + public async getModelsWithinVram(maxVramGb: number): Promise { + const greenlist = await this.fetchGreenlist(); + return greenlist.models.filter((m) => m.minVram <= maxVramGb); + } + + /** + * Get recommended container type for a model + */ + public async getRecommendedContainer(modelName: string): Promise { + const model = await this.getGreenlitModel(modelName); + return model ? model.container : null; + } + + /** + * Get minimum VRAM required for a model + */ + public async getMinVram(modelName: string): Promise { + const model = await this.getGreenlitModel(modelName); + return model ? model.minVram : null; + } + + /** + * Check if model fits in available VRAM + */ + public async modelFitsInVram(modelName: string, availableVramGb: number): Promise { + const minVram = await this.getMinVram(modelName); + if (minVram === null) { + // Model not in greenlist, assume it might fit + return true; + } + return availableVramGb >= minVram; + } + + /** + * Normalize model name for comparison + * Handles variations like "llama3:8b" vs "llama3:8B" vs "meta-llama/llama-3-8b" + */ + private normalizeModelName(name: string): string { + return name + .toLowerCase() + .replace(/[^a-z0-9:.-]/g, '') + .trim(); + } + + /** + * Search models by name pattern + */ + public async searchModels(pattern: string): Promise { + const greenlist = await this.fetchGreenlist(); + const normalizedPattern = pattern.toLowerCase(); + + return greenlist.models.filter((m) => + m.name.toLowerCase().includes(normalizedPattern) || + m.description?.toLowerCase().includes(normalizedPattern) || + m.tags?.some((t) => t.toLowerCase().includes(normalizedPattern)) + ); + } + + /** + * Get models by tags + */ + public async getModelsByTags(tags: string[]): Promise { + const greenlist = await this.fetchGreenlist(); + const normalizedTags = tags.map((t) => t.toLowerCase()); + + return greenlist.models.filter((m) => + m.tags?.some((t) => normalizedTags.includes(t.toLowerCase())) + ); + } + + /** + * Clear the cached greenlist + */ + public clearCache(): void { + this.cachedGreenlist = null; + this.cacheTime = 0; + } + + /** + * Print greenlist summary + */ + public async printSummary(): Promise { + const greenlist = await this.fetchGreenlist(); + + // Group by container type + const byContainer = new Map(); + for (const model of greenlist.models) { + if (!byContainer.has(model.container)) { + byContainer.set(model.container, []); + } + byContainer.get(model.container)!.push(model); + } + + logger.logBoxTitle('Greenlit Models', 60, 'info'); + logger.logBoxLine(`Version: ${greenlist.version}`); + logger.logBoxLine(`Last Updated: ${greenlist.lastUpdated}`); + logger.logBoxLine(`Total Models: ${greenlist.models.length}`); + logger.logBoxLine(''); + + for (const [container, models] of byContainer) { + logger.logBoxLine(`${container.toUpperCase()} (${models.length}):`); + for (const model of models.slice(0, 5)) { + logger.logBoxLine(` - ${model.name} (${model.minVram}GB VRAM)`); + } + if (models.length > 5) { + logger.logBoxLine(` ... and ${models.length - 5} more`); + } + logger.logBoxLine(''); + } + + logger.logBoxEnd(); + } +} diff --git a/ts/systemd.ts b/ts/systemd.ts new file mode 100644 index 0000000..c570cba --- /dev/null +++ b/ts/systemd.ts @@ -0,0 +1,283 @@ +/** + * ModelGrid Systemd Service Manager + * + * Handles installation, removal, and control of the ModelGrid systemd service. + */ + +import process from 'node:process'; +import { promises as fs } from 'node:fs'; +import { execSync } from 'node:child_process'; +import { logger } from './logger.ts'; +import { theme, symbols } from './colors.ts'; +import { PATHS, VERSION } from './constants.ts'; + +/** + * Systemd service manager for ModelGrid + */ +export class Systemd { + private readonly serviceFilePath = '/etc/systemd/system/modelgrid.service'; + + private readonly serviceTemplate = `[Unit] +Description=ModelGrid - AI Infrastructure Management +After=network.target docker.service +Requires=docker.service + +[Service] +ExecStart=/usr/local/bin/modelgrid service start-daemon +Restart=always +RestartSec=10 +User=root +Group=root +Environment=PATH=/usr/bin:/usr/local/bin +WorkingDirectory=/opt/modelgrid + +[Install] +WantedBy=multi-user.target +`; + + /** + * Install the systemd service + */ + public async install(): Promise { + try { + // Check if configuration exists + await this.checkConfigExists(); + + // Write service file + await fs.writeFile(this.serviceFilePath, this.serviceTemplate); + logger.logBoxTitle('Service Installation', 50); + logger.logBoxLine(`Service file created at ${this.serviceFilePath}`); + + // Reload systemd + execSync('systemctl daemon-reload'); + logger.logBoxLine('Systemd daemon reloaded'); + + // Enable service + execSync('systemctl enable modelgrid.service'); + logger.logBoxLine('Service enabled to start on boot'); + logger.logBoxEnd(); + } catch (error) { + if (error instanceof Error && error.message === 'Configuration not found') { + throw error; + } + logger.error(`Failed to install systemd service: ${error}`); + throw error; + } + } + + /** + * Start the systemd service + */ + public async start(): Promise { + try { + await this.checkConfigExists(); + execSync('systemctl start modelgrid.service'); + logger.logBoxTitle('Service Status', 45); + logger.logBoxLine('ModelGrid service started successfully'); + logger.logBoxEnd(); + } catch (error) { + if (error instanceof Error && error.message === 'Configuration not found') { + process.exit(1); + } + logger.error(`Failed to start service: ${error}`); + throw error; + } + } + + /** + * Stop the systemd service + */ + public stop(): void { + try { + execSync('systemctl stop modelgrid.service'); + logger.success('ModelGrid service stopped'); + } catch (error) { + logger.error(`Failed to stop service: ${error}`); + throw error; + } + } + + /** + * Get status of the systemd service + */ + public async getStatus(): Promise { + try { + // Display version + logger.log(''); + logger.log(`${theme.dim('ModelGrid')} ${theme.dim('v' + VERSION)}`); + + // Check if config exists + try { + await this.checkConfigExists(); + } catch (_error) { + return; + } + + // Display service status + await this.displayServiceStatus(); + + // Display container status + await this.displayContainerStatus(); + + // Display GPU status + await this.displayGpuStatus(); + } catch (error) { + logger.error(`Failed to get status: ${error instanceof Error ? error.message : String(error)}`); + } + } + + /** + * Display systemd service status + */ + private async displayServiceStatus(): Promise { + try { + const serviceStatus = execSync('systemctl status modelgrid.service').toString(); + const lines = serviceStatus.split('\n'); + + let isActive = false; + let pid = ''; + let memory = ''; + + for (const line of lines) { + if (line.includes('Active:')) { + isActive = line.includes('active (running)'); + } else if (line.includes('Main PID:')) { + const match = line.match(/Main PID:\s+(\d+)/); + if (match) pid = match[1]; + } else if (line.includes('Memory:')) { + const match = line.match(/Memory:\s+([\d.]+[A-Z])/); + if (match) memory = match[1]; + } + } + + logger.log(''); + if (isActive) { + logger.log(`${symbols.running} ${theme.success('Service:')} ${theme.statusActive('active (running)')}`); + } else { + logger.log(`${symbols.stopped} ${theme.dim('Service:')} ${theme.statusInactive('inactive')}`); + } + + if (pid || memory) { + const details = []; + if (pid) details.push(`PID: ${theme.dim(pid)}`); + if (memory) details.push(`Memory: ${theme.dim(memory)}`); + logger.log(` ${details.join(' ')}`); + } + logger.log(''); + } catch (_error) { + logger.log(''); + logger.log(`${symbols.stopped} ${theme.dim('Service:')} ${theme.statusInactive('not installed')}`); + logger.log(''); + } + } + + /** + * Display container status + */ + private async displayContainerStatus(): Promise { + try { + // Try to get container info from docker + const output = execSync('docker ps --filter "name=modelgrid" --format "{{.Names}}\\t{{.Status}}"', { encoding: 'utf-8' }); + const lines = output.trim().split('\n').filter(l => l.trim()); + + if (lines.length === 0) { + logger.info('Containers: None running'); + return; + } + + logger.info(`Containers (${lines.length}):`); + + for (const line of lines) { + const [name, status] = line.split('\t'); + const isUp = status?.toLowerCase().includes('up'); + + logger.log(` ${isUp ? symbols.running : symbols.stopped} ${theme.highlight(name)} - ${isUp ? theme.success(status) : theme.dim(status)}`); + } + logger.log(''); + } catch (_error) { + // Docker might not be running + } + } + + /** + * Display GPU status + */ + private async displayGpuStatus(): Promise { + try { + // Try nvidia-smi + const output = execSync('nvidia-smi --query-gpu=name,utilization.gpu,memory.used,memory.total --format=csv,noheader,nounits', { encoding: 'utf-8' }); + const lines = output.trim().split('\n'); + + if (lines.length === 0) { + return; + } + + logger.info(`GPUs (${lines.length}):`); + + for (const line of lines) { + const [name, util, memUsed, memTotal] = line.split(',').map(s => s.trim()); + const memPercent = Math.round((parseInt(memUsed) / parseInt(memTotal)) * 100); + + logger.log(` ${symbols.info} ${theme.gpuNvidia(name)}`); + logger.log(` Utilization: ${theme.highlight(util + '%')} Memory: ${theme.info(memUsed)}/${memTotal} MB (${memPercent}%)`); + } + logger.log(''); + } catch (_error) { + // nvidia-smi might not be available + } + } + + /** + * Disable and uninstall the service + */ + public async disable(): Promise { + try { + // Stop if running + try { + execSync('systemctl stop modelgrid.service'); + logger.log('Service stopped'); + } catch (_error) { + // Might not be running + } + + // Disable + try { + execSync('systemctl disable modelgrid.service'); + logger.log('Service disabled'); + } catch (_error) { + // Might not be enabled + } + + // Remove service file + try { + await fs.unlink(this.serviceFilePath); + logger.log('Service file removed'); + } catch (_error) { + // Might not exist + } + + // Reload systemd + execSync('systemctl daemon-reload'); + logger.success('ModelGrid service uninstalled'); + } catch (error) { + logger.error(`Failed to disable service: ${error}`); + throw error; + } + } + + /** + * Check if configuration file exists + */ + private async checkConfigExists(): Promise { + try { + await fs.access(PATHS.CONFIG_FILE); + } catch (_error) { + logger.log(''); + logger.error('No configuration found'); + logger.log(` ${theme.dim('Config file:')} ${PATHS.CONFIG_FILE}`); + logger.log(` ${theme.dim('Run')} ${theme.command('modelgrid config init')} ${theme.dim('to create one')}`); + logger.log(''); + throw new Error('Configuration not found'); + } + } +} diff --git a/uninstall.sh b/uninstall.sh new file mode 100644 index 0000000..a549bd6 --- /dev/null +++ b/uninstall.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# ModelGrid Uninstaller Script +# Completely removes ModelGrid from the system + +# Check if running as root +if [ "$EUID" -ne 0 ]; then + echo "Please run as root (sudo modelgrid uninstall or sudo ./uninstall.sh)" + exit 1 +fi + +# This script can be called directly or through the CLI +# When called through the CLI, environment variables are set +# REMOVE_CONFIG=yes|no - whether to remove configuration files +# REMOVE_REPO=yes|no - whether to remove the repository + +# If not set through CLI, use defaults +REMOVE_CONFIG=${REMOVE_CONFIG:-"no"} +REMOVE_REPO=${REMOVE_REPO:-"no"} + +echo "ModelGrid Uninstaller" +echo "=====================" +echo "This will completely remove ModelGrid from your system." + +# Find the directory where this script is located +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +# Step 1: Stop and disable the systemd service if it exists +if [ -f "/etc/systemd/system/modelgrid.service" ]; then + echo "Stopping ModelGrid service..." + systemctl stop modelgrid.service 2>/dev/null + + echo "Disabling ModelGrid service..." + systemctl disable modelgrid.service 2>/dev/null + + echo "Removing systemd service file..." + rm -f /etc/systemd/system/modelgrid.service + + echo "Reloading systemd daemon..." + systemctl daemon-reload +fi + +# Step 2: Remove global symlink +if [ -L "/usr/local/bin/modelgrid" ]; then + echo "Removing global symlink..." + rm -f /usr/local/bin/modelgrid +fi + +if [ -L "/usr/bin/modelgrid" ]; then + echo "Removing global symlink..." + rm -f /usr/bin/modelgrid +fi + +# Step 3: Remove installation directory +if [ -d "/opt/modelgrid" ]; then + echo "Removing installation directory..." + rm -rf /opt/modelgrid +fi + +# Step 4: Remove configuration if requested +if [ "$REMOVE_CONFIG" = "yes" ]; then + echo "Removing configuration files..." + rm -rf /etc/modelgrid +else + # If not called through CLI, ask user + if [ -z "$MODELGRID_CLI_CALL" ]; then + read -p "Do you want to remove the ModelGrid configuration files? (y/N) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Removing configuration files..." + rm -rf /etc/modelgrid + fi + fi +fi + +# Step 5: Remove repository if requested +if [ "$REMOVE_REPO" = "yes" ]; then + if [ -d "$SCRIPT_DIR/.git" ]; then + echo "Removing ModelGrid repository directory..." + + # Get parent directory to remove it after the script exits + PARENT_DIR=$(dirname "$SCRIPT_DIR") + REPO_NAME=$(basename "$SCRIPT_DIR") + + # Create a temporary cleanup script + CLEANUP_SCRIPT=$(mktemp) + echo "#!/bin/bash" > "$CLEANUP_SCRIPT" + echo "sleep 1" >> "$CLEANUP_SCRIPT" + echo "rm -rf \"$SCRIPT_DIR\"" >> "$CLEANUP_SCRIPT" + echo "echo \"ModelGrid repository has been removed.\"" >> "$CLEANUP_SCRIPT" + chmod +x "$CLEANUP_SCRIPT" + + # Run the cleanup script in the background + nohup "$CLEANUP_SCRIPT" > /dev/null 2>&1 & + + echo "ModelGrid repository will be removed after uninstaller exits." + else + echo "No git repository found." + fi +else + # If not requested, just display info + if [ -d "$SCRIPT_DIR/.git" ]; then + echo + echo "ModelGrid repository at $SCRIPT_DIR will remain intact." + fi +fi + +# Check for npm global installation +NODE_PATH=$(which node 2>/dev/null) +if [ -n "$NODE_PATH" ]; then + NPM_PATH=$(dirname "$NODE_PATH")/npm + if [ -x "$NPM_PATH" ]; then + echo + echo "If you installed ModelGrid via npm, you may want to uninstall it with:" + echo " npm uninstall -g @modelgrid.com/modelgrid" + fi +fi + +echo +echo "ModelGrid has been uninstalled from your system."