Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c683b02e8c | |||
| b64be03c2f | |||
| 494dac1267 | |||
| cea3407777 | |||
| a009d990d0 | |||
| 08d545f5db | |||
| a0a282c712 | |||
| 3eb0045676 | |||
| 639eb5d36c | |||
| d12d321079 | |||
| 4fcd05d3c6 | |||
| 503e25ff98 | |||
| bba0855218 | |||
| d437ffc226 | |||
| e36758f183 |
66
changelog.md
66
changelog.md
@@ -1,5 +1,71 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-03-23 - 6.3.2 - fix(docs)
|
||||
update license ownership and correct README license file reference
|
||||
|
||||
- Adjusts the copyright holder name in the license file
|
||||
- Fixes the README link to match the lowercase license filename
|
||||
|
||||
## 2026-03-21 - 6.3.1 - fix(cluster)
|
||||
improve shard reconstruction validation and start background healing service
|
||||
|
||||
- use the erasure read quorum when reconstructing chunks instead of assuming data shard count
|
||||
- verify reconstructed shards before writing healed data back to disk
|
||||
- start the healing service during server initialization with shared local shard stores
|
||||
- simplify QUIC request handling by decoding the full request buffer including trailing shard data
|
||||
- clean up unused variables and imports across cluster modules
|
||||
|
||||
## 2026-03-21 - 6.3.0 - feat(readme)
|
||||
document distributed cluster mode, erasure coding, and QUIC-based architecture
|
||||
|
||||
- Expand README overview and feature matrix to highlight clustering, multi-drive awareness, and distributed storage capabilities
|
||||
- Add standalone and cluster mode usage examples plus cluster configuration options
|
||||
- Document clustering internals including erasure coding, quorum behavior, QUIC transport, self-healing, and on-disk layout
|
||||
|
||||
## 2026-03-21 - 6.2.0 - feat(cluster)
|
||||
add shard healing, drive health heartbeats, and clustered policy directory support
|
||||
|
||||
- implements manifest-based healing that scans affected shards on offline nodes, reconstructs data with erasure coding, and rewrites recovered shards to local storage
|
||||
- includes drive status reporting in membership heartbeats by wiring DriveManager health checks into cluster heartbeat messages
|
||||
- adds clustered policies directory initialization and exposes policy storage paths from the distributed coordinator
|
||||
- extends distributed coordinator support for remote shard read and delete operations plus multipart upload session metadata
|
||||
|
||||
## 2026-03-21 - 6.1.0 - feat(cluster)
|
||||
add clustered storage backend with QUIC transport, erasure coding, and shard management
|
||||
|
||||
- introduces cluster configuration in Rust and TypeScript, including seed nodes, drive paths, heartbeat settings, and erasure coding options
|
||||
- adds core cluster modules for membership, topology state, object manifests, placement, shard storage, drive management, healing scaffolding, and inter-node protocol handling
|
||||
- adds QUIC-based transport for cluster communication and integrates a distributed storage backend alongside the existing standalone FileStore
|
||||
- updates the server startup path to initialize standalone or clustered storage based on configuration and exposes a basic clusterStatus management endpoint
|
||||
- refreshes build and dependency versions to support the new clustered storage implementation
|
||||
|
||||
## 2026-03-14 - 6.0.1 - fix(rust-bridge)
|
||||
update smartrust and limit RustBridge binary lookup to dist_rust
|
||||
|
||||
- Bumps @push.rocks/smartrust from ^1.0.0 to ^1.3.2.
|
||||
- Removes rust target debug and release fallback paths from RustBridge local binary resolution, relying on dist_rust/ruststorage.
|
||||
|
||||
## 2026-03-14 - 6.0.0 - BREAKING CHANGE(core)
|
||||
Rebrand from smarts3 to smartstorage
|
||||
|
||||
- Package renamed from @push.rocks/smarts3 to @push.rocks/smartstorage
|
||||
- Class renamed from Smarts3 to SmartStorage (no backward-compatible re-export)
|
||||
- Interface renamed from ISmarts3Config to ISmartStorageConfig
|
||||
- Method renamed from getS3Descriptor to getStorageDescriptor
|
||||
- Rust binary renamed from rusts3 to ruststorage
|
||||
- Rust types renamed: S3Error→StorageError, S3Action→StorageAction, S3Config→SmartStorageConfig, S3Server→StorageServer
|
||||
- On-disk file extension changed from ._S3_object to ._storage_object (BREAKING for existing stored data)
|
||||
- Default credentials changed from S3RVER to STORAGE
|
||||
- All internal S3 branding removed; AWS S3 protocol compatibility (IAM actions, ARNs, SigV4) fully maintained
|
||||
|
||||
## 2026-02-17 - 5.3.0 - feat(auth)
|
||||
add AWS SigV4 authentication and bucket policy support
|
||||
|
||||
- Implement AWS SigV4 full verification (constant-time comparison, 15-minute clock skew enforcement) and expose default signing region (server.region = 'us-east-1').
|
||||
- Add IAM-style bucket policy engine with Put/Get/Delete policy APIs (GetBucketPolicy/PutBucketPolicy/DeleteBucketPolicy), wildcard action/resource matching, Allow/Deny evaluation, and on-disk persistence under .policies/{bucket}.policy.json.
|
||||
- Documentation and README expanded with policy usage, examples, API table entries, and notes about policy CRUD and behavior for anonymous/authenticated requests.
|
||||
- Rust code refactors: simplify storage/server result structs and multipart handling (removed several unused size/key/bucket fields), remove S3Error::to_response and error_xml helpers, and other internal cleanup to support new auth/policy features.
|
||||
|
||||
## 2026-02-17 - 5.2.0 - feat(auth,policy)
|
||||
add AWS SigV4 authentication and S3 bucket policy support
|
||||
|
||||
|
||||
2
license
2
license
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2021 Lossless GmbH (hello@lossless.com)
|
||||
Copyright (c) 2021 Task Venture Capital GmbH (hello@task.vc)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -10,14 +10,15 @@
|
||||
"module": {
|
||||
"githost": "code.foss.global",
|
||||
"gitscope": "push.rocks",
|
||||
"gitrepo": "smarts3",
|
||||
"description": "A Node.js TypeScript package to create a local S3 endpoint for simulating AWS S3 operations using mapped local directories for development and testing purposes.",
|
||||
"npmPackagename": "@push.rocks/smarts3",
|
||||
"gitrepo": "smartstorage",
|
||||
"description": "A Node.js TypeScript package to create a local S3-compatible storage server using mapped local directories for development and testing purposes.",
|
||||
"npmPackagename": "@push.rocks/smartstorage",
|
||||
"license": "MIT",
|
||||
"projectDomain": "push.rocks",
|
||||
"keywords": [
|
||||
"S3 Mock Server",
|
||||
"Local S3",
|
||||
"smartstorage",
|
||||
"S3 Compatible",
|
||||
"Local Storage Server",
|
||||
"Node.js",
|
||||
"TypeScript",
|
||||
"Local Development",
|
||||
@@ -26,8 +27,8 @@
|
||||
"File Storage",
|
||||
"AWS S3 Compatibility",
|
||||
"Development Tool",
|
||||
"S3 Endpoint",
|
||||
"S3 Simulation",
|
||||
"Storage Endpoint",
|
||||
"Storage Simulation",
|
||||
"Bucket Management",
|
||||
"File Upload",
|
||||
"CI/CD Integration",
|
||||
|
||||
42
package.json
42
package.json
@@ -1,27 +1,28 @@
|
||||
{
|
||||
"name": "@push.rocks/smarts3",
|
||||
"version": "5.2.0",
|
||||
"name": "@push.rocks/smartstorage",
|
||||
"version": "6.3.2",
|
||||
"private": false,
|
||||
"description": "A Node.js TypeScript package to create a local S3 endpoint for simulating AWS S3 operations using mapped local directories for development and testing purposes.",
|
||||
"description": "A Node.js TypeScript package to create a local S3-compatible storage server using mapped local directories for development and testing purposes.",
|
||||
"main": "dist_ts/index.js",
|
||||
"typings": "dist_ts/index.d.ts",
|
||||
"type": "module",
|
||||
"author": "Lossless GmbH",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"test:before": "(tsrust)",
|
||||
"test": "(tstest test/ --web --verbose --logfile --timeout 60)",
|
||||
"build": "(tsrust && tsbuild --web --allowimplicitany)",
|
||||
"build": "(tsrust && tsbuild tsfolders --allowimplicitany)",
|
||||
"buildDocs": "tsdoc"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@aws-sdk/client-s3": "^3.937.0",
|
||||
"@git.zone/tsbuild": "^3.1.0",
|
||||
"@git.zone/tsbundle": "^2.5.2",
|
||||
"@git.zone/tsrun": "^2.0.0",
|
||||
"@git.zone/tstest": "^3.1.0",
|
||||
"@push.rocks/smartbucket": "^4.3.0",
|
||||
"@aws-sdk/client-s3": "^3.1014.0",
|
||||
"@git.zone/tsbuild": "^4.3.0",
|
||||
"@git.zone/tsbundle": "^2.9.1",
|
||||
"@git.zone/tsrun": "^2.0.1",
|
||||
"@git.zone/tsrust": "^1.3.0",
|
||||
"@types/node": "^22.9.0"
|
||||
"@git.zone/tstest": "^3.5.0",
|
||||
"@push.rocks/smartbucket": "^4.5.1",
|
||||
"@types/node": "^25.5.0"
|
||||
},
|
||||
"browserslist": [
|
||||
"last 1 chrome versions"
|
||||
@@ -41,12 +42,13 @@
|
||||
],
|
||||
"dependencies": {
|
||||
"@push.rocks/smartpath": "^6.0.0",
|
||||
"@push.rocks/smartrust": "^1.0.0",
|
||||
"@tsclass/tsclass": "^9.3.0"
|
||||
"@push.rocks/smartrust": "^1.3.2",
|
||||
"@tsclass/tsclass": "^9.5.0"
|
||||
},
|
||||
"keywords": [
|
||||
"S3 Mock Server",
|
||||
"Local S3",
|
||||
"smartstorage",
|
||||
"S3 Compatible",
|
||||
"Local Storage Server",
|
||||
"Node.js",
|
||||
"TypeScript",
|
||||
"Local Development",
|
||||
@@ -55,20 +57,20 @@
|
||||
"File Storage",
|
||||
"AWS S3 Compatibility",
|
||||
"Development Tool",
|
||||
"S3 Endpoint",
|
||||
"S3 Simulation",
|
||||
"Storage Endpoint",
|
||||
"Storage Simulation",
|
||||
"Bucket Management",
|
||||
"File Upload",
|
||||
"CI/CD Integration",
|
||||
"Developer Onboarding"
|
||||
],
|
||||
"homepage": "https://code.foss.global/push.rocks/smarts3#readme",
|
||||
"homepage": "https://code.foss.global/push.rocks/smartstorage#readme",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://code.foss.global/push.rocks/smarts3.git"
|
||||
"url": "ssh://git@code.foss.global:29419/push.rocks/smartstorage.git"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://code.foss.global/push.rocks/smarts3/issues"
|
||||
"url": "https://code.foss.global/push.rocks/smartstorage/issues"
|
||||
},
|
||||
"packageManager": "pnpm@10.14.0+sha512.ad27a79641b49c3e481a16a805baa71817a04bbe06a38d17e60e2eaee83f6a146c6a688125f5792e48dd5ba30e7da52a5cda4c3992b9ccf333f9ce223af84748",
|
||||
"pnpm": {
|
||||
|
||||
6243
pnpm-lock.yaml
generated
6243
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
# Production-Readiness Plan for smarts3
|
||||
# Production-Readiness Plan for smartstorage
|
||||
|
||||
**Goal:** Make smarts3 production-ready as a MinIO alternative for use cases where:
|
||||
**Goal:** Make smartstorage production-ready as a MinIO alternative for use cases where:
|
||||
- Running MinIO is out of scope
|
||||
- You have a program written for S3 and want to use the local filesystem
|
||||
- You need a lightweight, zero-dependency S3-compatible server
|
||||
@@ -31,7 +31,7 @@
|
||||
|
||||
### 1. Multipart Upload Support 🚀 **HIGHEST PRIORITY**
|
||||
|
||||
**Why:** Essential for uploading files >5MB efficiently. Without this, smarts3 can't handle real-world production workloads.
|
||||
**Why:** Essential for uploading files >5MB efficiently. Without this, smartstorage can't handle real-world production workloads.
|
||||
|
||||
**Implementation Required:**
|
||||
- `POST /:bucket/:key?uploads` - CreateMultipartUpload
|
||||
@@ -46,13 +46,13 @@
|
||||
**Files to Create/Modify:**
|
||||
- `ts/controllers/multipart.controller.ts` (new)
|
||||
- `ts/classes/filesystem-store.ts` (add multipart methods)
|
||||
- `ts/classes/smarts3-server.ts` (add multipart routes)
|
||||
- `ts/classes/smartstorage-server.ts` (add multipart routes)
|
||||
|
||||
---
|
||||
|
||||
### 2. Configurable Authentication 🔐
|
||||
|
||||
**Why:** Currently hardcoded credentials ('S3RVER'/'S3RVER'). Production needs custom credentials.
|
||||
**Why:** Currently hardcoded credentials ('STORAGE'/'STORAGE'). Production needs custom credentials.
|
||||
|
||||
**Implementation Required:**
|
||||
- Support custom access keys and secrets via configuration
|
||||
@@ -75,7 +75,7 @@ interface IAuthConfig {
|
||||
**Files to Create/Modify:**
|
||||
- `ts/classes/auth-middleware.ts` (new)
|
||||
- `ts/classes/signature-validator.ts` (new)
|
||||
- `ts/classes/smarts3-server.ts` (integrate auth middleware)
|
||||
- `ts/classes/smartstorage-server.ts` (integrate auth middleware)
|
||||
- `ts/index.ts` (add auth config options)
|
||||
|
||||
---
|
||||
@@ -105,7 +105,7 @@ interface ICorsConfig {
|
||||
|
||||
**Files to Create/Modify:**
|
||||
- `ts/classes/cors-middleware.ts` (new)
|
||||
- `ts/classes/smarts3-server.ts` (integrate CORS middleware)
|
||||
- `ts/classes/smartstorage-server.ts` (integrate CORS middleware)
|
||||
- `ts/index.ts` (add CORS config options)
|
||||
|
||||
---
|
||||
@@ -131,7 +131,7 @@ interface ISslConfig {
|
||||
```
|
||||
|
||||
**Files to Create/Modify:**
|
||||
- `ts/classes/smarts3-server.ts` (add HTTPS server creation)
|
||||
- `ts/classes/smartstorage-server.ts` (add HTTPS server creation)
|
||||
- `ts/index.ts` (add SSL config options)
|
||||
|
||||
---
|
||||
@@ -147,7 +147,7 @@ interface ISslConfig {
|
||||
- Sensible production defaults
|
||||
- Example configurations for common use cases
|
||||
|
||||
**Configuration File Example (`smarts3.config.json`):**
|
||||
**Configuration File Example (`smartstorage.config.json`):**
|
||||
```json
|
||||
{
|
||||
"server": {
|
||||
@@ -220,7 +220,7 @@ interface ISslConfig {
|
||||
**Files to Create/Modify:**
|
||||
- `ts/classes/logger.ts` (new - use @push.rocks/smartlog?)
|
||||
- `ts/classes/access-logger-middleware.ts` (new)
|
||||
- `ts/classes/smarts3-server.ts` (replace console.log with logger)
|
||||
- `ts/classes/smartstorage-server.ts` (replace console.log with logger)
|
||||
- All controller files (use structured logging)
|
||||
|
||||
---
|
||||
@@ -238,7 +238,7 @@ interface ISslConfig {
|
||||
**Files to Create/Modify:**
|
||||
- `ts/controllers/health.controller.ts` (new)
|
||||
- `ts/classes/metrics-collector.ts` (new)
|
||||
- `ts/classes/smarts3-server.ts` (add health routes)
|
||||
- `ts/classes/smartstorage-server.ts` (add health routes)
|
||||
|
||||
---
|
||||
|
||||
@@ -266,7 +266,7 @@ interface ISslConfig {
|
||||
**Files to Create/Modify:**
|
||||
- `ts/classes/validation-middleware.ts` (new)
|
||||
- `ts/utils/validators.ts` (new)
|
||||
- `ts/classes/smarts3-server.ts` (integrate validation middleware)
|
||||
- `ts/classes/smartstorage-server.ts` (integrate validation middleware)
|
||||
|
||||
---
|
||||
|
||||
@@ -291,7 +291,7 @@ interface ISslConfig {
|
||||
- SIGTERM/SIGINT handling
|
||||
|
||||
**Files to Create/Modify:**
|
||||
- `ts/classes/smarts3-server.ts` (add graceful shutdown logic)
|
||||
- `ts/classes/smartstorage-server.ts` (add graceful shutdown logic)
|
||||
- `ts/index.ts` (add signal handlers)
|
||||
|
||||
---
|
||||
@@ -336,7 +336,7 @@ interface ISslConfig {
|
||||
4. ✅ Production configuration system
|
||||
5. ✅ Production logging
|
||||
|
||||
**Outcome:** smarts3 can handle real production workloads
|
||||
**Outcome:** smartstorage can handle real production workloads
|
||||
|
||||
---
|
||||
|
||||
@@ -350,7 +350,7 @@ interface ISslConfig {
|
||||
9. ✅ Graceful shutdown
|
||||
10. ✅ Batch operations
|
||||
|
||||
**Outcome:** smarts3 is operationally mature
|
||||
**Outcome:** smartstorage is operationally mature
|
||||
|
||||
---
|
||||
|
||||
@@ -363,7 +363,7 @@ interface ISslConfig {
|
||||
13. ✅ Comprehensive test suite
|
||||
14. ✅ Documentation updates
|
||||
|
||||
**Outcome:** smarts3 has broad S3 API compatibility
|
||||
**Outcome:** smartstorage has broad S3 API compatibility
|
||||
|
||||
---
|
||||
|
||||
@@ -375,7 +375,7 @@ interface ISslConfig {
|
||||
16. ✅ Performance optimization
|
||||
17. ✅ Advanced features based on user feedback
|
||||
|
||||
**Outcome:** smarts3 is a complete MinIO alternative
|
||||
**Outcome:** smartstorage is a complete MinIO alternative
|
||||
|
||||
---
|
||||
|
||||
@@ -392,7 +392,7 @@ interface ISslConfig {
|
||||
|
||||
## 🎯 Target Use Cases
|
||||
|
||||
**With this plan implemented, smarts3 will be a solid MinIO alternative for:**
|
||||
**With this plan implemented, smartstorage will be a solid MinIO alternative for:**
|
||||
|
||||
✅ **Local S3 development** - Fast, simple, no Docker required
|
||||
✅ **Testing S3 integrations** - Reliable, repeatable tests
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# Project Hints for smarts3
|
||||
# Project Hints for smartstorage
|
||||
|
||||
## Current State (v6.0.0-dev)
|
||||
## Current State (v6.0.0)
|
||||
|
||||
- **Rust-powered S3 server** via `@push.rocks/smartrust` IPC bridge
|
||||
- **Rust-powered S3-compatible storage server** via `@push.rocks/smartrust` IPC bridge
|
||||
- High-performance: streaming I/O, zero-copy, backpressure, range seek
|
||||
- TypeScript is thin IPC wrapper; all HTTP/storage/routing in Rust binary `rusts3`
|
||||
- TypeScript is thin IPC wrapper; all HTTP/storage/routing in Rust binary `ruststorage`
|
||||
- Full S3 compatibility: PUT, GET, HEAD, DELETE for objects and buckets
|
||||
- Multipart upload support (streaming, no OOM)
|
||||
- **Real AWS SigV4 authentication** (cryptographic signature verification)
|
||||
@@ -18,37 +18,37 @@
|
||||
- `main.rs` - Clap CLI, management mode entry
|
||||
- `config.rs` - Serde config structs matching TS interfaces (includes `region`)
|
||||
- `management.rs` - IPC loop (newline-delimited JSON over stdin/stdout)
|
||||
- `server.rs` - hyper 1.x HTTP server, routing, CORS, auth+policy pipeline, all S3 handlers
|
||||
- `server.rs` - hyper 1.x HTTP server, routing, CORS, auth+policy pipeline, all S3-compatible handlers
|
||||
- `storage.rs` - FileStore: filesystem-backed storage, multipart manager, `.policies/` dir
|
||||
- `xml_response.rs` - S3 XML response builders
|
||||
- `s3_error.rs` - S3 error codes with HTTP status mapping
|
||||
- `xml_response.rs` - S3-compatible XML response builders
|
||||
- `error.rs` - StorageError codes with HTTP status mapping
|
||||
- `auth.rs` - AWS SigV4 signature verification (HMAC-SHA256, clock skew, constant-time compare)
|
||||
- `action.rs` - S3Action enum + request-to-IAM-action resolver + RequestContext
|
||||
- `action.rs` - StorageAction enum + request-to-IAM-action resolver + RequestContext
|
||||
- `policy.rs` - BucketPolicy model, evaluation engine (Deny > Allow > NoOpinion), PolicyStore (RwLock cache + disk)
|
||||
|
||||
### TypeScript Bridge (`ts/`)
|
||||
- `ts/index.ts` - Smarts3 class with RustBridge<TRustS3Commands>
|
||||
- `ts/index.ts` - SmartStorage class with RustBridge<TRustStorageCommands>
|
||||
- `ts/plugins.ts` - path, smartpath, RustBridge, tsclass
|
||||
- `ts/paths.ts` - packageDir, bucketsDir defaults
|
||||
|
||||
### IPC Commands
|
||||
| Command | Params | Action |
|
||||
|---------|--------|--------|
|
||||
| `start` | `{ config: ISmarts3Config }` | Init storage + HTTP server |
|
||||
| `start` | `{ config: ISmartStorageConfig }` | Init storage + HTTP server |
|
||||
| `stop` | `{}` | Graceful shutdown |
|
||||
| `createBucket` | `{ name: string }` | Create bucket directory |
|
||||
|
||||
### Storage Layout (backward-compatible)
|
||||
- Objects: `{root}/{bucket}/{key}._S3_object`
|
||||
- Metadata: `{root}/{bucket}/{key}._S3_object.metadata.json`
|
||||
- MD5: `{root}/{bucket}/{key}._S3_object.md5`
|
||||
### Storage Layout
|
||||
- Objects: `{root}/{bucket}/{key}._storage_object`
|
||||
- Metadata: `{root}/{bucket}/{key}._storage_object.metadata.json`
|
||||
- MD5: `{root}/{bucket}/{key}._storage_object.md5`
|
||||
- Multipart: `{root}/.multipart/{upload_id}/part-{N}`
|
||||
- Policies: `{root}/.policies/{bucket}.policy.json`
|
||||
|
||||
## Build
|
||||
|
||||
- `pnpm build` runs `tsrust && tsbuild --web --allowimplicitany`
|
||||
- `tsrust` compiles Rust to `dist_rust/rusts3`
|
||||
- `tsrust` compiles Rust to `dist_rust/ruststorage`
|
||||
- Targets: linux_amd64, linux_arm64 (configured in npmextra.json)
|
||||
|
||||
## Dependencies
|
||||
|
||||
378
readme.md
378
readme.md
@@ -1,78 +1,119 @@
|
||||
# @push.rocks/smarts3 🚀
|
||||
# @push.rocks/smartstorage
|
||||
|
||||
A high-performance, S3-compatible local server powered by a **Rust core** with a clean TypeScript API. Drop-in replacement for AWS S3 during development and testing — no cloud, no Docker, no MinIO. Just `npm install` and go.
|
||||
A high-performance, S3-compatible storage server powered by a **Rust core** with a clean TypeScript API. Runs standalone for dev/test — or scales out as a **distributed, erasure-coded cluster** with QUIC-based inter-node communication. No cloud, no Docker. Just `npm install` and go. 🚀
|
||||
|
||||
## Issue Reporting and Security
|
||||
|
||||
For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly.
|
||||
|
||||
## 🌟 Why smarts3?
|
||||
## Why smartstorage?
|
||||
|
||||
| Feature | smarts3 | MinIO | s3rver |
|
||||
|---------|---------|-------|--------|
|
||||
| Feature | smartstorage | MinIO | s3rver |
|
||||
|---------|-------------|-------|--------|
|
||||
| Install | `pnpm add` | Docker / binary | `npm install` |
|
||||
| Startup time | ~20ms | seconds | ~200ms |
|
||||
| Large file uploads | ✅ Streaming, zero-copy | ✅ | ❌ OOM risk |
|
||||
| Range requests | ✅ Seek-based | ✅ | ❌ Full read |
|
||||
| Large file uploads | Streaming, zero-copy | Yes | OOM risk |
|
||||
| Range requests | Seek-based | Yes | Full read |
|
||||
| Language | Rust + TypeScript | Go | JavaScript |
|
||||
| Multipart uploads | ✅ Full support | ✅ | ❌ |
|
||||
| Auth | AWS v2/v4 key extraction | Full IAM | Basic |
|
||||
| Multipart uploads | ✅ Full support | Yes | No |
|
||||
| Auth | AWS SigV4 (full verification) | Full IAM | Basic |
|
||||
| Bucket policies | IAM-style evaluation | Yes | No |
|
||||
| Clustering | ✅ Erasure-coded, QUIC | Yes | No |
|
||||
| Multi-drive awareness | ✅ Per-drive health | Yes | No |
|
||||
|
||||
### Core Features
|
||||
|
||||
- ⚡ **Rust-powered HTTP server** — hyper 1.x with streaming I/O, zero-copy, backpressure
|
||||
- 🔄 **Full S3 API compatibility** — works with AWS SDK v3, SmartBucket, any S3 client
|
||||
- 📂 **Filesystem-backed storage** — buckets map to directories, objects to files
|
||||
- 🦀 **Rust-powered HTTP server** — hyper 1.x with streaming I/O, zero-copy, backpressure
|
||||
- 📦 **Full S3-compatible API** — works with AWS SDK v3, SmartBucket, any S3 client
|
||||
- 💾 **Filesystem-backed storage** — buckets map to directories, objects to files
|
||||
- 📤 **Streaming multipart uploads** — large files without memory pressure
|
||||
- 🎯 **Byte-range requests** — `seek()` directly to the requested byte offset
|
||||
- 🔐 **Authentication** — AWS v2/v4 signature key extraction
|
||||
- 📐 **Byte-range requests** — `seek()` directly to the requested byte offset
|
||||
- 🔐 **AWS SigV4 authentication** — full signature verification with constant-time comparison
|
||||
- 📋 **Bucket policies** — IAM-style JSON policies with Allow/Deny evaluation and wildcard matching
|
||||
- 🌐 **CORS middleware** — configurable cross-origin support
|
||||
- 📊 **Structured logging** — tracing-based, error through debug levels
|
||||
- 🧹 **Clean slate mode** — wipe storage on startup for test isolation
|
||||
- 🧪 **Test-first design** — start/stop in milliseconds, no port conflicts
|
||||
- ⚡ **Test-first design** — start/stop in milliseconds, no port conflicts
|
||||
|
||||
## 📦 Installation
|
||||
### Clustering Features
|
||||
|
||||
- 🔗 **Erasure coding** — Reed-Solomon (configurable k data + m parity shards) for storage efficiency and fault tolerance
|
||||
- 🚄 **QUIC transport** — multiplexed, encrypted inter-node communication via `quinn` with zero head-of-line blocking
|
||||
- 💽 **Multi-drive awareness** — each node manages multiple independent storage paths with health monitoring
|
||||
- 🤝 **Cluster membership** — static seed config + runtime join, heartbeat-based failure detection
|
||||
- ✍️ **Quorum writes** — data is only acknowledged after k+1 shards are persisted
|
||||
- 📖 **Quorum reads** — reconstruct from any k available shards, local-first fast path
|
||||
- 🩹 **Self-healing** — background scanner detects and reconstructs missing/corrupt shards
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pnpm add @push.rocks/smarts3 -D
|
||||
pnpm add @push.rocks/smartstorage -D
|
||||
```
|
||||
|
||||
> **Note:** The package ships with precompiled Rust binaries for `linux_amd64` and `linux_arm64`. No Rust toolchain needed on your machine.
|
||||
|
||||
## 🚀 Quick Start
|
||||
## Quick Start
|
||||
|
||||
### Standalone Mode (Dev & Test)
|
||||
|
||||
```typescript
|
||||
import { Smarts3 } from '@push.rocks/smarts3';
|
||||
import { SmartStorage } from '@push.rocks/smartstorage';
|
||||
|
||||
// Start a local S3 server
|
||||
const s3 = await Smarts3.createAndStart({
|
||||
// Start a local S3-compatible storage server
|
||||
const storage = await SmartStorage.createAndStart({
|
||||
server: { port: 3000 },
|
||||
storage: { cleanSlate: true },
|
||||
});
|
||||
|
||||
// Create a bucket
|
||||
await s3.createBucket('my-bucket');
|
||||
await storage.createBucket('my-bucket');
|
||||
|
||||
// Get connection details for any S3 client
|
||||
const descriptor = await s3.getS3Descriptor();
|
||||
// → { endpoint: 'localhost', port: 3000, accessKey: 'S3RVER', accessSecret: 'S3RVER', useSsl: false }
|
||||
const descriptor = await storage.getStorageDescriptor();
|
||||
// → { endpoint: 'localhost', port: 3000, accessKey: 'STORAGE', accessSecret: 'STORAGE', useSsl: false }
|
||||
|
||||
// When done
|
||||
await s3.stop();
|
||||
await storage.stop();
|
||||
```
|
||||
|
||||
## 📖 Configuration
|
||||
### Cluster Mode (Distributed)
|
||||
|
||||
```typescript
|
||||
import { SmartStorage } from '@push.rocks/smartstorage';
|
||||
|
||||
const storage = await SmartStorage.createAndStart({
|
||||
server: { port: 3000 },
|
||||
cluster: {
|
||||
enabled: true,
|
||||
nodeId: 'node-1',
|
||||
quicPort: 4000,
|
||||
seedNodes: ['192.168.1.11:4000', '192.168.1.12:4000'],
|
||||
erasure: {
|
||||
dataShards: 4, // k: minimum shards to reconstruct data
|
||||
parityShards: 2, // m: fault tolerance (can lose up to m shards)
|
||||
},
|
||||
drives: {
|
||||
paths: ['/mnt/disk1', '/mnt/disk2', '/mnt/disk3'],
|
||||
},
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
Objects are automatically split into chunks (default 4 MB), erasure-coded into 6 shards (4 data + 2 parity), and distributed across drives/nodes. Any 4 of 6 shards can reconstruct the original data.
|
||||
|
||||
## Configuration
|
||||
|
||||
All config fields are optional — sensible defaults are applied automatically.
|
||||
|
||||
```typescript
|
||||
import { Smarts3, ISmarts3Config } from '@push.rocks/smarts3';
|
||||
import { SmartStorage, ISmartStorageConfig } from '@push.rocks/smartstorage';
|
||||
|
||||
const config: ISmarts3Config = {
|
||||
const config: ISmartStorageConfig = {
|
||||
server: {
|
||||
port: 3000, // Default: 3000
|
||||
address: '0.0.0.0', // Default: '0.0.0.0'
|
||||
silent: false, // Default: false
|
||||
region: 'us-east-1', // Default: 'us-east-1' — used for SigV4 signing
|
||||
},
|
||||
storage: {
|
||||
directory: './my-data', // Default: .nogit/bucketsDir
|
||||
@@ -108,16 +149,32 @@ const config: ISmarts3Config = {
|
||||
expirationDays: 7,
|
||||
cleanupIntervalMinutes: 60,
|
||||
},
|
||||
cluster: { // Optional — omit for standalone mode
|
||||
enabled: true,
|
||||
nodeId: 'node-1', // Auto-generated UUID if omitted
|
||||
quicPort: 4000, // Default: 4000
|
||||
seedNodes: [], // Addresses of existing cluster members
|
||||
erasure: {
|
||||
dataShards: 4, // Default: 4
|
||||
parityShards: 2, // Default: 2
|
||||
chunkSizeBytes: 4194304, // Default: 4 MB
|
||||
},
|
||||
drives: {
|
||||
paths: ['/mnt/disk1', '/mnt/disk2'],
|
||||
},
|
||||
heartbeatIntervalMs: 5000, // Default: 5000
|
||||
heartbeatTimeoutMs: 30000, // Default: 30000
|
||||
},
|
||||
};
|
||||
|
||||
const s3 = await Smarts3.createAndStart(config);
|
||||
const storage = await SmartStorage.createAndStart(config);
|
||||
```
|
||||
|
||||
### Common Configurations
|
||||
|
||||
**CI/CD testing** — silent, clean, fast:
|
||||
```typescript
|
||||
const s3 = await Smarts3.createAndStart({
|
||||
const storage = await SmartStorage.createAndStart({
|
||||
server: { port: 9999, silent: true },
|
||||
storage: { cleanSlate: true },
|
||||
});
|
||||
@@ -125,7 +182,7 @@ const s3 = await Smarts3.createAndStart({
|
||||
|
||||
**Auth enabled:**
|
||||
```typescript
|
||||
const s3 = await Smarts3.createAndStart({
|
||||
const storage = await SmartStorage.createAndStart({
|
||||
auth: {
|
||||
enabled: true,
|
||||
credentials: [{ accessKeyId: 'test', secretAccessKey: 'test123' }],
|
||||
@@ -135,7 +192,7 @@ const s3 = await Smarts3.createAndStart({
|
||||
|
||||
**CORS for local web dev:**
|
||||
```typescript
|
||||
const s3 = await Smarts3.createAndStart({
|
||||
const storage = await SmartStorage.createAndStart({
|
||||
cors: {
|
||||
enabled: true,
|
||||
allowedOrigins: ['http://localhost:5173'],
|
||||
@@ -144,12 +201,12 @@ const s3 = await Smarts3.createAndStart({
|
||||
});
|
||||
```
|
||||
|
||||
## 📤 Usage with AWS SDK v3
|
||||
## Usage with AWS SDK v3
|
||||
|
||||
```typescript
|
||||
import { S3Client, PutObjectCommand, GetObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3';
|
||||
|
||||
const descriptor = await s3.getS3Descriptor();
|
||||
const descriptor = await storage.getStorageDescriptor();
|
||||
|
||||
const client = new S3Client({
|
||||
endpoint: `http://${descriptor.endpoint}:${descriptor.port}`,
|
||||
@@ -158,14 +215,14 @@ const client = new S3Client({
|
||||
accessKeyId: descriptor.accessKey,
|
||||
secretAccessKey: descriptor.accessSecret,
|
||||
},
|
||||
forcePathStyle: true, // Required for path-style S3
|
||||
forcePathStyle: true, // Required for path-style access
|
||||
});
|
||||
|
||||
// Upload
|
||||
await client.send(new PutObjectCommand({
|
||||
Bucket: 'my-bucket',
|
||||
Key: 'hello.txt',
|
||||
Body: 'Hello, S3!',
|
||||
Body: 'Hello, Storage!',
|
||||
ContentType: 'text/plain',
|
||||
}));
|
||||
|
||||
@@ -174,7 +231,7 @@ const { Body } = await client.send(new GetObjectCommand({
|
||||
Bucket: 'my-bucket',
|
||||
Key: 'hello.txt',
|
||||
}));
|
||||
const content = await Body.transformToString(); // "Hello, S3!"
|
||||
const content = await Body.transformToString(); // "Hello, Storage!"
|
||||
|
||||
// Delete
|
||||
await client.send(new DeleteObjectCommand({
|
||||
@@ -183,12 +240,12 @@ await client.send(new DeleteObjectCommand({
|
||||
}));
|
||||
```
|
||||
|
||||
## 🪣 Usage with SmartBucket
|
||||
## Usage with SmartBucket
|
||||
|
||||
```typescript
|
||||
import { SmartBucket } from '@push.rocks/smartbucket';
|
||||
|
||||
const smartbucket = new SmartBucket(await s3.getS3Descriptor());
|
||||
const smartbucket = new SmartBucket(await storage.getStorageDescriptor());
|
||||
const bucket = await smartbucket.createBucket('my-bucket');
|
||||
const dir = await bucket.getBaseDirectory();
|
||||
|
||||
@@ -202,9 +259,9 @@ const content = await dir.fastGet('docs/readme.txt');
|
||||
const files = await dir.listFiles();
|
||||
```
|
||||
|
||||
## 📤 Multipart Uploads
|
||||
## Multipart Uploads
|
||||
|
||||
For files larger than 5 MB, use multipart uploads. smarts3 handles them with **streaming I/O** — parts are written directly to disk, never buffered in memory.
|
||||
For files larger than 5 MB, use multipart uploads. smartstorage handles them with **streaming I/O** — parts are written directly to disk, never buffered in memory. In cluster mode, each part is independently erasure-coded and distributed.
|
||||
|
||||
```typescript
|
||||
import {
|
||||
@@ -241,38 +298,161 @@ await client.send(new CompleteMultipartUploadCommand({
|
||||
}));
|
||||
```
|
||||
|
||||
## 🧪 Testing Integration
|
||||
## Bucket Policies
|
||||
|
||||
smartstorage supports AWS-style bucket policies for fine-grained access control. Policies use the same IAM JSON format as real S3 — so you can develop and test your policy logic locally before deploying.
|
||||
|
||||
When `auth.enabled` is `true`, the auth pipeline works as follows:
|
||||
1. **Authenticate** — verify the AWS SigV4 signature (anonymous requests skip this step)
|
||||
2. **Authorize** — evaluate bucket policies against the request action, resource, and caller identity
|
||||
3. **Default** — authenticated users get full access; anonymous requests are denied unless a policy explicitly allows them
|
||||
|
||||
### Setting a Bucket Policy
|
||||
|
||||
```typescript
|
||||
import { Smarts3 } from '@push.rocks/smarts3';
|
||||
import { PutBucketPolicyCommand } from '@aws-sdk/client-s3';
|
||||
|
||||
// Allow anonymous read access to all objects in a bucket
|
||||
await client.send(new PutBucketPolicyCommand({
|
||||
Bucket: 'public-assets',
|
||||
Policy: JSON.stringify({
|
||||
Version: '2012-10-17',
|
||||
Statement: [{
|
||||
Sid: 'PublicRead',
|
||||
Effect: 'Allow',
|
||||
Principal: '*',
|
||||
Action: ['s3:GetObject'],
|
||||
Resource: ['arn:aws:s3:::public-assets/*'],
|
||||
}],
|
||||
}),
|
||||
}));
|
||||
```
|
||||
|
||||
### Policy Features
|
||||
|
||||
- **Effect**: `Allow` and `Deny` (explicit Deny always wins)
|
||||
- **Principal**: `"*"` (everyone) or `{ "AWS": ["arn:..."] }` for specific identities
|
||||
- **Action**: IAM-style actions like `s3:GetObject`, `s3:PutObject`, `s3:*`, or prefix wildcards like `s3:Get*`
|
||||
- **Resource**: ARN patterns with `*` and `?` wildcards (e.g. `arn:aws:s3:::my-bucket/*`)
|
||||
- **Persistence**: Policies survive server restarts — stored as JSON on disk alongside your data
|
||||
|
||||
### Policy CRUD Operations
|
||||
|
||||
| Operation | AWS SDK Command | HTTP |
|
||||
|-----------|----------------|------|
|
||||
| Get policy | `GetBucketPolicyCommand` | `GET /{bucket}?policy` |
|
||||
| Set policy | `PutBucketPolicyCommand` | `PUT /{bucket}?policy` |
|
||||
| Delete policy | `DeleteBucketPolicyCommand` | `DELETE /{bucket}?policy` |
|
||||
|
||||
Deleting a bucket automatically removes its associated policy.
|
||||
|
||||
## Clustering Deep Dive 🔗
|
||||
|
||||
smartstorage can run as a distributed storage cluster where multiple nodes cooperate to store and retrieve data with built-in redundancy.
|
||||
|
||||
### How It Works
|
||||
|
||||
```
|
||||
Client ──HTTP PUT──▶ Node A (coordinator)
|
||||
│
|
||||
├─ Split object into 4 MB chunks
|
||||
├─ Erasure-code each chunk (4 data + 2 parity = 6 shards)
|
||||
│
|
||||
├──QUIC──▶ Node B (shard writes)
|
||||
├──QUIC──▶ Node C (shard writes)
|
||||
└─ Local disk (shard writes)
|
||||
```
|
||||
|
||||
1. **Any node can coordinate** — the client connects to any cluster member
|
||||
2. **Objects are chunked** — large objects split into fixed-size pieces (default 4 MB)
|
||||
3. **Each chunk is erasure-coded** — Reed-Solomon produces k data + m parity shards
|
||||
4. **Shards are distributed** — placed across different nodes and drives for fault isolation
|
||||
5. **Quorum guarantees consistency** — writes need k+1 acks, reads need k shards
|
||||
|
||||
### Erasure Coding
|
||||
|
||||
With the default `4+2` configuration:
|
||||
- Storage overhead: **33%** (vs. 200% for 3x replication)
|
||||
- Fault tolerance: **any 2 drives/nodes can fail** simultaneously
|
||||
- Read efficiency: only **4 of 6 shards** needed to reconstruct data
|
||||
|
||||
| Config | Total Shards | Overhead | Tolerance | Min Nodes |
|
||||
|--------|-------------|----------|-----------|-----------|
|
||||
| 4+2 | 6 | 33% | 2 failures | 3 |
|
||||
| 6+3 | 9 | 50% | 3 failures | 5 |
|
||||
| 2+1 | 3 | 50% | 1 failure | 2 |
|
||||
|
||||
### QUIC Transport
|
||||
|
||||
Inter-node communication uses [QUIC](https://en.wikipedia.org/wiki/QUIC) via the `quinn` library:
|
||||
- 🔒 **Built-in TLS** — self-signed certs auto-generated at cluster init
|
||||
- 🔀 **Multiplexed streams** — concurrent shard transfers without head-of-line blocking
|
||||
- ⚡ **Connection pooling** — persistent connections to peer nodes
|
||||
- 🌊 **Natural backpressure** — QUIC flow control prevents overloading slow peers
|
||||
|
||||
### Cluster Membership
|
||||
|
||||
- **Static seed nodes** — initial cluster defined in config
|
||||
- **Runtime join** — new nodes can join a running cluster
|
||||
- **Heartbeat monitoring** — every 5s (configurable), with suspect/offline detection
|
||||
- **Split-brain prevention** — nodes only mark peers offline when they have majority
|
||||
|
||||
### Self-Healing
|
||||
|
||||
A background scanner periodically (default: every 24h):
|
||||
1. Checks shard checksums (CRC32C) for bit-rot detection
|
||||
2. Identifies shards on offline nodes
|
||||
3. Reconstructs missing shards from remaining data using Reed-Solomon
|
||||
4. Places healed shards on healthy drives
|
||||
|
||||
Healing runs at low priority to avoid impacting foreground I/O.
|
||||
|
||||
### Erasure Set Formation
|
||||
|
||||
Drives are organized into fixed **erasure sets** at cluster initialization:
|
||||
|
||||
```
|
||||
3 nodes × 4 drives each = 12 drives total
|
||||
With 6-shard erasure sets → 2 erasure sets
|
||||
|
||||
Set 0: Node1-Disk0, Node2-Disk0, Node3-Disk0, Node1-Disk1, Node2-Disk1, Node3-Disk1
|
||||
Set 1: Node1-Disk2, Node2-Disk2, Node3-Disk2, Node1-Disk3, Node2-Disk3, Node3-Disk3
|
||||
```
|
||||
|
||||
Drives are interleaved across nodes for maximum fault isolation. New nodes form new erasure sets — existing data is never rebalanced.
|
||||
|
||||
## Testing Integration
|
||||
|
||||
```typescript
|
||||
import { SmartStorage } from '@push.rocks/smartstorage';
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
|
||||
let s3: Smarts3;
|
||||
let storage: SmartStorage;
|
||||
|
||||
tap.test('setup', async () => {
|
||||
s3 = await Smarts3.createAndStart({
|
||||
storage = await SmartStorage.createAndStart({
|
||||
server: { port: 4567, silent: true },
|
||||
storage: { cleanSlate: true },
|
||||
});
|
||||
});
|
||||
|
||||
tap.test('should store and retrieve objects', async () => {
|
||||
await s3.createBucket('test');
|
||||
await storage.createBucket('test');
|
||||
// ... your test logic using AWS SDK or SmartBucket
|
||||
});
|
||||
|
||||
tap.test('teardown', async () => {
|
||||
await s3.stop();
|
||||
await storage.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
```
|
||||
|
||||
## 🔧 API Reference
|
||||
## API Reference
|
||||
|
||||
### `Smarts3` Class
|
||||
### `SmartStorage` Class
|
||||
|
||||
#### `static createAndStart(config?: ISmarts3Config): Promise<Smarts3>`
|
||||
#### `static createAndStart(config?: ISmartStorageConfig): Promise<SmartStorage>`
|
||||
|
||||
Create and start a server in one call.
|
||||
|
||||
@@ -286,11 +466,11 @@ Gracefully stop the server and kill the Rust process.
|
||||
|
||||
#### `createBucket(name: string): Promise<{ name: string }>`
|
||||
|
||||
Create an S3 bucket.
|
||||
Create a storage bucket.
|
||||
|
||||
#### `getS3Descriptor(options?): Promise<IS3Descriptor>`
|
||||
#### `getStorageDescriptor(options?): Promise<IS3Descriptor>`
|
||||
|
||||
Get connection details for S3 clients. Returns:
|
||||
Get connection details for S3-compatible clients. Returns:
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
@@ -300,35 +480,42 @@ Get connection details for S3 clients. Returns:
|
||||
| `accessSecret` | `string` | Secret key from first configured credential |
|
||||
| `useSsl` | `boolean` | Always `false` (plain HTTP) |
|
||||
|
||||
## 🏗️ Architecture
|
||||
## Architecture
|
||||
|
||||
smarts3 uses a **hybrid Rust + TypeScript** architecture:
|
||||
smartstorage uses a **hybrid Rust + TypeScript** architecture:
|
||||
|
||||
```
|
||||
┌─────────────────────────────────┐
|
||||
│ Your Code (AWS SDK, etc.) │
|
||||
│ ↕ HTTP (localhost:3000) │
|
||||
├─────────────────────────────────┤
|
||||
│ rusts3 binary (Rust) │
|
||||
│ ├─ hyper 1.x HTTP server │
|
||||
│ ├─ S3 path-style routing │
|
||||
│ ├─ Streaming storage layer │
|
||||
│ ├─ Multipart manager │
|
||||
│ ├─ CORS / Auth middleware │
|
||||
│ └─ S3 XML response builder │
|
||||
├─────────────────────────────────┤
|
||||
│ TypeScript (thin IPC wrapper) │
|
||||
│ ├─ Smarts3 class │
|
||||
│ ├─ RustBridge (stdin/stdout) │
|
||||
│ └─ Config & S3 descriptor │
|
||||
└─────────────────────────────────┘
|
||||
┌──────────────────────────────────────────────┐
|
||||
│ Your Code (AWS SDK, SmartBucket, etc.) │
|
||||
│ ↕ HTTP (localhost:3000) │
|
||||
├──────────────────────────────────────────────┤
|
||||
│ ruststorage binary (Rust) │
|
||||
│ ├─ hyper 1.x HTTP server │
|
||||
│ ├─ S3 path-style routing │
|
||||
│ ├─ StorageBackend (Standalone or Clustered) │
|
||||
│ │ ├─ FileStore (single-node mode) │
|
||||
│ │ └─ DistributedStore (cluster mode) │
|
||||
│ │ ├─ ErasureCoder (Reed-Solomon) │
|
||||
│ │ ├─ ShardStore (per-drive storage) │
|
||||
│ │ ├─ QuicTransport (quinn) │
|
||||
│ │ ├─ ClusterState & Membership │
|
||||
│ │ └─ HealingService │
|
||||
│ ├─ SigV4 auth + policy engine │
|
||||
│ ├─ CORS middleware │
|
||||
│ └─ S3 XML response builder │
|
||||
├──────────────────────────────────────────────┤
|
||||
│ TypeScript (thin IPC wrapper) │
|
||||
│ ├─ SmartStorage class │
|
||||
│ ├─ RustBridge (stdin/stdout JSON IPC) │
|
||||
│ └─ Config & S3 descriptor │
|
||||
└──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Why Rust?** The TypeScript implementation had critical perf issues: OOM on multipart uploads (parts buffered in memory), double stream copying, file descriptor leaks on HEAD requests, full-file reads for range requests, and no backpressure. The Rust binary solves all of these with streaming I/O, zero-copy, and direct `seek()` for range requests.
|
||||
**Why Rust?** The original TypeScript implementation had critical perf issues: OOM on multipart uploads (parts buffered in memory), double stream copying, file descriptor leaks on HEAD requests, full-file reads for range requests, and no backpressure. The Rust binary solves all of these with streaming I/O, zero-copy, and direct `seek()` for range requests.
|
||||
|
||||
**IPC Protocol:** TypeScript spawns the `rusts3` binary with `--management` and communicates via newline-delimited JSON over stdin/stdout. Commands: `start`, `stop`, `createBucket`.
|
||||
**IPC Protocol:** TypeScript spawns the `ruststorage` binary with `--management` and communicates via newline-delimited JSON over stdin/stdout. Commands: `start`, `stop`, `createBucket`, `clusterStatus`.
|
||||
|
||||
### S3 Operations Supported
|
||||
### S3-Compatible Operations
|
||||
|
||||
| Operation | Method | Path |
|
||||
|-----------|--------|------|
|
||||
@@ -347,32 +534,51 @@ smarts3 uses a **hybrid Rust + TypeScript** architecture:
|
||||
| CompleteMultipartUpload | `POST /{bucket}/{key}?uploadId` | |
|
||||
| AbortMultipartUpload | `DELETE /{bucket}/{key}?uploadId` | |
|
||||
| ListMultipartUploads | `GET /{bucket}?uploads` | |
|
||||
| GetBucketPolicy | `GET /{bucket}?policy` | |
|
||||
| PutBucketPolicy | `PUT /{bucket}?policy` | |
|
||||
| DeleteBucketPolicy | `DELETE /{bucket}?policy` | |
|
||||
|
||||
### On-Disk Format
|
||||
|
||||
**Standalone mode:**
|
||||
```
|
||||
{storage.directory}/
|
||||
{bucket}/
|
||||
{key}._S3_object # Object data
|
||||
{key}._S3_object.metadata.json # Metadata (content-type, x-amz-meta-*, etc.)
|
||||
{key}._S3_object.md5 # Cached MD5 hash
|
||||
{key}._storage_object # Object data
|
||||
{key}._storage_object.metadata.json # Metadata (content-type, x-amz-meta-*, etc.)
|
||||
{key}._storage_object.md5 # Cached MD5 hash
|
||||
.multipart/
|
||||
{upload-id}/
|
||||
metadata.json # Upload metadata (bucket, key, parts)
|
||||
part-1 # Part data files
|
||||
part-2
|
||||
...
|
||||
metadata.json # Upload metadata
|
||||
part-1, part-2, ... # Part data files
|
||||
.policies/
|
||||
{bucket}.policy.json # Bucket policy (IAM JSON format)
|
||||
```
|
||||
|
||||
## 🔗 Related Packages
|
||||
**Cluster mode:**
|
||||
```
|
||||
{drive_path}/.smartstorage/
|
||||
format.json # Drive metadata (cluster ID, erasure set)
|
||||
data/{bucket}/{key_hash}/{key}/
|
||||
chunk-{N}/shard-{M}.dat # Erasure-coded shard data
|
||||
chunk-{N}/shard-{M}.meta # Shard metadata (checksum, size)
|
||||
|
||||
- [`@push.rocks/smartbucket`](https://code.foss.global/push.rocks/smartbucket) — High-level S3 abstraction layer
|
||||
{storage.directory}/
|
||||
.manifests/{bucket}/
|
||||
{key}.manifest.json # Object manifest (shard placements, checksums)
|
||||
.buckets/{bucket}/ # Bucket metadata
|
||||
.policies/{bucket}.policy.json # Bucket policies
|
||||
```
|
||||
|
||||
## Related Packages
|
||||
|
||||
- [`@push.rocks/smartbucket`](https://code.foss.global/push.rocks/smartbucket) — High-level S3-compatible abstraction layer
|
||||
- [`@push.rocks/smartrust`](https://code.foss.global/push.rocks/smartrust) — TypeScript ↔ Rust IPC bridge
|
||||
- [`@git.zone/tsrust`](https://code.foss.global/git.zone/tsrust) — Rust cross-compilation for npm packages
|
||||
|
||||
## License and Legal Information
|
||||
|
||||
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [LICENSE](./LICENSE) file.
|
||||
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [license](./license) file.
|
||||
|
||||
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
|
||||
|
||||
|
||||
979
rust/Cargo.lock
generated
979
rust/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,10 +1,10 @@
|
||||
[package]
|
||||
name = "rusts3"
|
||||
name = "ruststorage"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "rusts3"
|
||||
name = "ruststorage"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
@@ -28,6 +28,16 @@ percent-encoding = "2"
|
||||
url = "2"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
futures-core = "0.3"
|
||||
futures = "0.3"
|
||||
async-trait = "0.1"
|
||||
reed-solomon-erasure = { version = "6", features = ["simd-accel"] }
|
||||
xxhash-rust = { version = "0.8", features = ["xxh64"] }
|
||||
crc32c = "0.6"
|
||||
bincode = "1"
|
||||
quinn = "0.11"
|
||||
rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
|
||||
rcgen = "0.13"
|
||||
dashmap = "6"
|
||||
hmac = "0.12"
|
||||
sha2 = "0.10"
|
||||
hex = "0.4"
|
||||
|
||||
@@ -2,9 +2,9 @@ use hyper::body::Incoming;
|
||||
use hyper::{Method, Request};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// S3 actions that map to IAM permission strings.
|
||||
/// Storage actions that map to IAM permission strings.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum S3Action {
|
||||
pub enum StorageAction {
|
||||
ListAllMyBuckets,
|
||||
CreateBucket,
|
||||
DeleteBucket,
|
||||
@@ -25,28 +25,28 @@ pub enum S3Action {
|
||||
DeleteBucketPolicy,
|
||||
}
|
||||
|
||||
impl S3Action {
|
||||
impl StorageAction {
|
||||
/// Return the IAM-style action string (e.g. "s3:GetObject").
|
||||
pub fn iam_action(&self) -> &'static str {
|
||||
match self {
|
||||
S3Action::ListAllMyBuckets => "s3:ListAllMyBuckets",
|
||||
S3Action::CreateBucket => "s3:CreateBucket",
|
||||
S3Action::DeleteBucket => "s3:DeleteBucket",
|
||||
S3Action::HeadBucket => "s3:ListBucket",
|
||||
S3Action::ListBucket => "s3:ListBucket",
|
||||
S3Action::GetObject => "s3:GetObject",
|
||||
S3Action::HeadObject => "s3:GetObject",
|
||||
S3Action::PutObject => "s3:PutObject",
|
||||
S3Action::DeleteObject => "s3:DeleteObject",
|
||||
S3Action::CopyObject => "s3:PutObject",
|
||||
S3Action::ListBucketMultipartUploads => "s3:ListBucketMultipartUploads",
|
||||
S3Action::AbortMultipartUpload => "s3:AbortMultipartUpload",
|
||||
S3Action::InitiateMultipartUpload => "s3:PutObject",
|
||||
S3Action::UploadPart => "s3:PutObject",
|
||||
S3Action::CompleteMultipartUpload => "s3:PutObject",
|
||||
S3Action::GetBucketPolicy => "s3:GetBucketPolicy",
|
||||
S3Action::PutBucketPolicy => "s3:PutBucketPolicy",
|
||||
S3Action::DeleteBucketPolicy => "s3:DeleteBucketPolicy",
|
||||
StorageAction::ListAllMyBuckets => "s3:ListAllMyBuckets",
|
||||
StorageAction::CreateBucket => "s3:CreateBucket",
|
||||
StorageAction::DeleteBucket => "s3:DeleteBucket",
|
||||
StorageAction::HeadBucket => "s3:ListBucket",
|
||||
StorageAction::ListBucket => "s3:ListBucket",
|
||||
StorageAction::GetObject => "s3:GetObject",
|
||||
StorageAction::HeadObject => "s3:GetObject",
|
||||
StorageAction::PutObject => "s3:PutObject",
|
||||
StorageAction::DeleteObject => "s3:DeleteObject",
|
||||
StorageAction::CopyObject => "s3:PutObject",
|
||||
StorageAction::ListBucketMultipartUploads => "s3:ListBucketMultipartUploads",
|
||||
StorageAction::AbortMultipartUpload => "s3:AbortMultipartUpload",
|
||||
StorageAction::InitiateMultipartUpload => "s3:PutObject",
|
||||
StorageAction::UploadPart => "s3:PutObject",
|
||||
StorageAction::CompleteMultipartUpload => "s3:PutObject",
|
||||
StorageAction::GetBucketPolicy => "s3:GetBucketPolicy",
|
||||
StorageAction::PutBucketPolicy => "s3:PutBucketPolicy",
|
||||
StorageAction::DeleteBucketPolicy => "s3:DeleteBucketPolicy",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -54,7 +54,7 @@ impl S3Action {
|
||||
/// Context extracted from a request, used for policy evaluation.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RequestContext {
|
||||
pub action: S3Action,
|
||||
pub action: StorageAction,
|
||||
pub bucket: Option<String>,
|
||||
pub key: Option<String>,
|
||||
}
|
||||
@@ -70,7 +70,7 @@ impl RequestContext {
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve the S3 action from an incoming HTTP request.
|
||||
/// Resolve the storage action from an incoming HTTP request.
|
||||
pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
|
||||
let method = req.method().clone();
|
||||
let path = req.uri().path().to_string();
|
||||
@@ -87,7 +87,7 @@ pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
|
||||
0 => {
|
||||
// Root: GET / -> ListBuckets
|
||||
RequestContext {
|
||||
action: S3Action::ListAllMyBuckets,
|
||||
action: StorageAction::ListAllMyBuckets,
|
||||
bucket: None,
|
||||
key: None,
|
||||
}
|
||||
@@ -98,15 +98,15 @@ pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
|
||||
let has_uploads = query.contains_key("uploads");
|
||||
|
||||
let action = match (&method, has_policy, has_uploads) {
|
||||
(&Method::GET, true, _) => S3Action::GetBucketPolicy,
|
||||
(&Method::PUT, true, _) => S3Action::PutBucketPolicy,
|
||||
(&Method::DELETE, true, _) => S3Action::DeleteBucketPolicy,
|
||||
(&Method::GET, _, true) => S3Action::ListBucketMultipartUploads,
|
||||
(&Method::GET, _, _) => S3Action::ListBucket,
|
||||
(&Method::PUT, _, _) => S3Action::CreateBucket,
|
||||
(&Method::DELETE, _, _) => S3Action::DeleteBucket,
|
||||
(&Method::HEAD, _, _) => S3Action::HeadBucket,
|
||||
_ => S3Action::ListBucket,
|
||||
(&Method::GET, true, _) => StorageAction::GetBucketPolicy,
|
||||
(&Method::PUT, true, _) => StorageAction::PutBucketPolicy,
|
||||
(&Method::DELETE, true, _) => StorageAction::DeleteBucketPolicy,
|
||||
(&Method::GET, _, true) => StorageAction::ListBucketMultipartUploads,
|
||||
(&Method::GET, _, _) => StorageAction::ListBucket,
|
||||
(&Method::PUT, _, _) => StorageAction::CreateBucket,
|
||||
(&Method::DELETE, _, _) => StorageAction::DeleteBucket,
|
||||
(&Method::HEAD, _, _) => StorageAction::HeadBucket,
|
||||
_ => StorageAction::ListBucket,
|
||||
};
|
||||
|
||||
RequestContext {
|
||||
@@ -125,16 +125,16 @@ pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
|
||||
let has_uploads = query.contains_key("uploads");
|
||||
|
||||
let action = match &method {
|
||||
&Method::PUT if has_part_number && has_upload_id => S3Action::UploadPart,
|
||||
&Method::PUT if has_copy_source => S3Action::CopyObject,
|
||||
&Method::PUT => S3Action::PutObject,
|
||||
&Method::GET => S3Action::GetObject,
|
||||
&Method::HEAD => S3Action::HeadObject,
|
||||
&Method::DELETE if has_upload_id => S3Action::AbortMultipartUpload,
|
||||
&Method::DELETE => S3Action::DeleteObject,
|
||||
&Method::POST if has_uploads => S3Action::InitiateMultipartUpload,
|
||||
&Method::POST if has_upload_id => S3Action::CompleteMultipartUpload,
|
||||
_ => S3Action::GetObject,
|
||||
&Method::PUT if has_part_number && has_upload_id => StorageAction::UploadPart,
|
||||
&Method::PUT if has_copy_source => StorageAction::CopyObject,
|
||||
&Method::PUT => StorageAction::PutObject,
|
||||
&Method::GET => StorageAction::GetObject,
|
||||
&Method::HEAD => StorageAction::HeadObject,
|
||||
&Method::DELETE if has_upload_id => StorageAction::AbortMultipartUpload,
|
||||
&Method::DELETE => StorageAction::DeleteObject,
|
||||
&Method::POST if has_uploads => StorageAction::InitiateMultipartUpload,
|
||||
&Method::POST if has_upload_id => StorageAction::CompleteMultipartUpload,
|
||||
_ => StorageAction::GetObject,
|
||||
};
|
||||
|
||||
RequestContext {
|
||||
@@ -144,7 +144,7 @@ pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
|
||||
}
|
||||
}
|
||||
_ => RequestContext {
|
||||
action: S3Action::ListAllMyBuckets,
|
||||
action: StorageAction::ListAllMyBuckets,
|
||||
bucket: None,
|
||||
key: None,
|
||||
},
|
||||
|
||||
@@ -4,8 +4,8 @@ use hyper::Request;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::config::{Credential, S3Config};
|
||||
use crate::s3_error::S3Error;
|
||||
use crate::config::{Credential, SmartStorageConfig};
|
||||
use crate::error::StorageError;
|
||||
|
||||
type HmacSha256 = Hmac<Sha256>;
|
||||
|
||||
@@ -27,8 +27,8 @@ struct SigV4Header {
|
||||
/// Verify the request's SigV4 signature. Returns the caller identity on success.
|
||||
pub fn verify_request(
|
||||
req: &Request<Incoming>,
|
||||
config: &S3Config,
|
||||
) -> Result<AuthenticatedIdentity, S3Error> {
|
||||
config: &SmartStorageConfig,
|
||||
) -> Result<AuthenticatedIdentity, StorageError> {
|
||||
let auth_header = req
|
||||
.headers()
|
||||
.get("authorization")
|
||||
@@ -37,18 +37,18 @@ pub fn verify_request(
|
||||
|
||||
// Reject SigV2
|
||||
if auth_header.starts_with("AWS ") {
|
||||
return Err(S3Error::authorization_header_malformed());
|
||||
return Err(StorageError::authorization_header_malformed());
|
||||
}
|
||||
|
||||
if !auth_header.starts_with("AWS4-HMAC-SHA256") {
|
||||
return Err(S3Error::authorization_header_malformed());
|
||||
return Err(StorageError::authorization_header_malformed());
|
||||
}
|
||||
|
||||
let parsed = parse_auth_header(auth_header)?;
|
||||
|
||||
// Look up credential
|
||||
let credential = find_credential(&parsed.access_key_id, config)
|
||||
.ok_or_else(S3Error::invalid_access_key_id)?;
|
||||
.ok_or_else(StorageError::invalid_access_key_id)?;
|
||||
|
||||
// Get x-amz-date
|
||||
let amz_date = req
|
||||
@@ -60,7 +60,7 @@ pub fn verify_request(
|
||||
.get("date")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
})
|
||||
.ok_or_else(|| S3Error::missing_security_header("Missing x-amz-date header"))?;
|
||||
.ok_or_else(|| StorageError::missing_security_header("Missing x-amz-date header"))?;
|
||||
|
||||
// Enforce 15-min clock skew
|
||||
check_clock_skew(amz_date)?;
|
||||
@@ -99,7 +99,7 @@ pub fn verify_request(
|
||||
|
||||
// Constant-time comparison
|
||||
if !constant_time_eq(computed_hex.as_bytes(), parsed.signature.as_bytes()) {
|
||||
return Err(S3Error::signature_does_not_match());
|
||||
return Err(StorageError::signature_does_not_match());
|
||||
}
|
||||
|
||||
Ok(AuthenticatedIdentity {
|
||||
@@ -108,11 +108,11 @@ pub fn verify_request(
|
||||
}
|
||||
|
||||
/// Parse the Authorization header into its components.
|
||||
fn parse_auth_header(header: &str) -> Result<SigV4Header, S3Error> {
|
||||
fn parse_auth_header(header: &str) -> Result<SigV4Header, StorageError> {
|
||||
// Format: AWS4-HMAC-SHA256 Credential=KEY/YYYYMMDD/region/s3/aws4_request, SignedHeaders=h1;h2, Signature=hex
|
||||
let after_algo = header
|
||||
.strip_prefix("AWS4-HMAC-SHA256")
|
||||
.ok_or_else(S3Error::authorization_header_malformed)?
|
||||
.ok_or_else(StorageError::authorization_header_malformed)?
|
||||
.trim();
|
||||
|
||||
let mut credential_str = None;
|
||||
@@ -131,17 +131,17 @@ fn parse_auth_header(header: &str) -> Result<SigV4Header, S3Error> {
|
||||
}
|
||||
|
||||
let credential_str = credential_str
|
||||
.ok_or_else(S3Error::authorization_header_malformed)?;
|
||||
.ok_or_else(StorageError::authorization_header_malformed)?;
|
||||
let signed_headers_str = signed_headers_str
|
||||
.ok_or_else(S3Error::authorization_header_malformed)?;
|
||||
.ok_or_else(StorageError::authorization_header_malformed)?;
|
||||
let signature = signature_str
|
||||
.ok_or_else(S3Error::authorization_header_malformed)?
|
||||
.ok_or_else(StorageError::authorization_header_malformed)?
|
||||
.to_string();
|
||||
|
||||
// Parse credential: KEY/YYYYMMDD/region/s3/aws4_request
|
||||
let cred_parts: Vec<&str> = credential_str.splitn(5, '/').collect();
|
||||
if cred_parts.len() < 5 {
|
||||
return Err(S3Error::authorization_header_malformed());
|
||||
return Err(StorageError::authorization_header_malformed());
|
||||
}
|
||||
|
||||
let access_key_id = cred_parts[0].to_string();
|
||||
@@ -163,7 +163,7 @@ fn parse_auth_header(header: &str) -> Result<SigV4Header, S3Error> {
|
||||
}
|
||||
|
||||
/// Find a credential by access key ID.
|
||||
fn find_credential<'a>(access_key_id: &str, config: &'a S3Config) -> Option<&'a Credential> {
|
||||
fn find_credential<'a>(access_key_id: &str, config: &'a SmartStorageConfig) -> Option<&'a Credential> {
|
||||
config
|
||||
.auth
|
||||
.credentials
|
||||
@@ -172,17 +172,17 @@ fn find_credential<'a>(access_key_id: &str, config: &'a S3Config) -> Option<&'a
|
||||
}
|
||||
|
||||
/// Check clock skew (15 minutes max).
|
||||
fn check_clock_skew(amz_date: &str) -> Result<(), S3Error> {
|
||||
fn check_clock_skew(amz_date: &str) -> Result<(), StorageError> {
|
||||
// Parse ISO 8601 basic format: YYYYMMDDTHHMMSSZ
|
||||
let parsed = chrono::NaiveDateTime::parse_from_str(amz_date, "%Y%m%dT%H%M%SZ")
|
||||
.map_err(|_| S3Error::authorization_header_malformed())?;
|
||||
.map_err(|_| StorageError::authorization_header_malformed())?;
|
||||
|
||||
let request_time = chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(parsed, chrono::Utc);
|
||||
let now = chrono::Utc::now();
|
||||
let diff = (now - request_time).num_seconds().unsigned_abs();
|
||||
|
||||
if diff > 15 * 60 {
|
||||
return Err(S3Error::request_time_too_skewed());
|
||||
return Err(StorageError::request_time_too_skewed());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
95
rust/src/cluster/config.rs
Normal file
95
rust/src/cluster/config.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ClusterConfig {
|
||||
pub enabled: bool,
|
||||
#[serde(default)]
|
||||
pub node_id: Option<String>,
|
||||
#[serde(default = "default_quic_port")]
|
||||
pub quic_port: u16,
|
||||
#[serde(default)]
|
||||
pub seed_nodes: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub erasure: ErasureConfig,
|
||||
#[serde(default)]
|
||||
pub drives: DriveConfig,
|
||||
#[serde(default = "default_heartbeat_interval")]
|
||||
pub heartbeat_interval_ms: u64,
|
||||
#[serde(default = "default_heartbeat_timeout")]
|
||||
pub heartbeat_timeout_ms: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ErasureConfig {
|
||||
#[serde(default = "default_data_shards")]
|
||||
pub data_shards: usize,
|
||||
#[serde(default = "default_parity_shards")]
|
||||
pub parity_shards: usize,
|
||||
#[serde(default = "default_chunk_size")]
|
||||
pub chunk_size_bytes: usize,
|
||||
}
|
||||
|
||||
impl ErasureConfig {
|
||||
pub fn total_shards(&self) -> usize {
|
||||
self.data_shards + self.parity_shards
|
||||
}
|
||||
|
||||
/// Minimum shards needed for a write to succeed (data_shards + 1)
|
||||
pub fn write_quorum(&self) -> usize {
|
||||
self.data_shards + 1
|
||||
}
|
||||
|
||||
/// Minimum shards needed to reconstruct data
|
||||
pub fn read_quorum(&self) -> usize {
|
||||
self.data_shards
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ErasureConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
data_shards: default_data_shards(),
|
||||
parity_shards: default_parity_shards(),
|
||||
chunk_size_bytes: default_chunk_size(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DriveConfig {
|
||||
#[serde(default)]
|
||||
pub paths: Vec<String>,
|
||||
}
|
||||
|
||||
impl Default for DriveConfig {
|
||||
fn default() -> Self {
|
||||
Self { paths: Vec::new() }
|
||||
}
|
||||
}
|
||||
|
||||
fn default_quic_port() -> u16 {
|
||||
4000
|
||||
}
|
||||
|
||||
fn default_heartbeat_interval() -> u64 {
|
||||
5000
|
||||
}
|
||||
|
||||
fn default_heartbeat_timeout() -> u64 {
|
||||
30000
|
||||
}
|
||||
|
||||
fn default_data_shards() -> usize {
|
||||
4
|
||||
}
|
||||
|
||||
fn default_parity_shards() -> usize {
|
||||
2
|
||||
}
|
||||
|
||||
fn default_chunk_size() -> usize {
|
||||
4 * 1024 * 1024 // 4 MB
|
||||
}
|
||||
1242
rust/src/cluster/coordinator.rs
Normal file
1242
rust/src/cluster/coordinator.rs
Normal file
File diff suppressed because it is too large
Load Diff
242
rust/src/cluster/drive_manager.rs
Normal file
242
rust/src/cluster/drive_manager.rs
Normal file
@@ -0,0 +1,242 @@
|
||||
use anyhow::Result;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tokio::fs;
|
||||
use super::config::DriveConfig;
|
||||
|
||||
// ============================
|
||||
// Drive format (on-disk metadata)
|
||||
// ============================
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DriveFormat {
|
||||
pub cluster_id: String,
|
||||
pub erasure_set_id: u32,
|
||||
pub drive_index_in_set: u32,
|
||||
pub format_version: u32,
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Drive state tracking
|
||||
// ============================
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum DriveStatus {
|
||||
Online,
|
||||
Degraded,
|
||||
Offline,
|
||||
Healing,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DriveStats {
|
||||
pub total_bytes: u64,
|
||||
pub used_bytes: u64,
|
||||
pub avg_write_latency_us: u64,
|
||||
pub avg_read_latency_us: u64,
|
||||
pub error_count: u64,
|
||||
pub last_error: Option<String>,
|
||||
pub last_check: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Default for DriveStats {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
total_bytes: 0,
|
||||
used_bytes: 0,
|
||||
avg_write_latency_us: 0,
|
||||
avg_read_latency_us: 0,
|
||||
error_count: 0,
|
||||
last_error: None,
|
||||
last_check: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DriveState {
|
||||
pub path: PathBuf,
|
||||
pub format: Option<DriveFormat>,
|
||||
pub status: DriveStatus,
|
||||
pub stats: DriveStats,
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Drive manager
|
||||
// ============================
|
||||
|
||||
pub struct DriveManager {
|
||||
drives: Vec<DriveState>,
|
||||
}
|
||||
|
||||
impl DriveManager {
|
||||
/// Initialize drive manager with configured drive paths.
|
||||
pub async fn new(config: &DriveConfig) -> Result<Self> {
|
||||
let mut drives = Vec::with_capacity(config.paths.len());
|
||||
|
||||
for path_str in &config.paths {
|
||||
let path = PathBuf::from(path_str);
|
||||
let storage_dir = path.join(".smartstorage");
|
||||
|
||||
// Ensure the drive directory exists
|
||||
fs::create_dir_all(&storage_dir).await?;
|
||||
|
||||
// Try to read existing format
|
||||
let format = Self::read_format(&storage_dir).await;
|
||||
let status = if path.exists() {
|
||||
DriveStatus::Online
|
||||
} else {
|
||||
DriveStatus::Offline
|
||||
};
|
||||
|
||||
drives.push(DriveState {
|
||||
path,
|
||||
format,
|
||||
status,
|
||||
stats: DriveStats::default(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Self { drives })
|
||||
}
|
||||
|
||||
/// Format drives for a new cluster. Stamps each drive with cluster and erasure set info.
|
||||
pub async fn format_drives(
|
||||
&mut self,
|
||||
cluster_id: &str,
|
||||
erasure_set_assignments: &[(u32, u32)], // (erasure_set_id, drive_index_in_set)
|
||||
) -> Result<()> {
|
||||
if erasure_set_assignments.len() != self.drives.len() {
|
||||
anyhow::bail!(
|
||||
"Erasure set assignments count ({}) doesn't match drive count ({})",
|
||||
erasure_set_assignments.len(),
|
||||
self.drives.len()
|
||||
);
|
||||
}
|
||||
|
||||
for (drive, (set_id, drive_idx)) in
|
||||
self.drives.iter_mut().zip(erasure_set_assignments.iter())
|
||||
{
|
||||
let format = DriveFormat {
|
||||
cluster_id: cluster_id.to_string(),
|
||||
erasure_set_id: *set_id,
|
||||
drive_index_in_set: *drive_idx,
|
||||
format_version: 1,
|
||||
};
|
||||
|
||||
let storage_dir = drive.path.join(".smartstorage");
|
||||
fs::create_dir_all(&storage_dir).await?;
|
||||
|
||||
let format_path = storage_dir.join("format.json");
|
||||
let json = serde_json::to_string_pretty(&format)?;
|
||||
fs::write(&format_path, json).await?;
|
||||
|
||||
drive.format = Some(format);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the number of drives managed.
|
||||
pub fn drive_count(&self) -> usize {
|
||||
self.drives.len()
|
||||
}
|
||||
|
||||
/// Get a drive's state by index.
|
||||
pub fn drive(&self, index: usize) -> Option<&DriveState> {
|
||||
self.drives.get(index)
|
||||
}
|
||||
|
||||
/// Get all drives.
|
||||
pub fn drives(&self) -> &[DriveState] {
|
||||
&self.drives
|
||||
}
|
||||
|
||||
/// Get drives that are online.
|
||||
pub fn online_drives(&self) -> Vec<usize> {
|
||||
self.drives
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, d)| d.status == DriveStatus::Online)
|
||||
.map(|(i, _)| i)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Check health of a specific drive by writing and reading a probe file.
|
||||
pub async fn check_drive_health(&mut self, index: usize) -> Result<DriveStatus> {
|
||||
let drive = self
|
||||
.drives
|
||||
.get_mut(index)
|
||||
.ok_or_else(|| anyhow::anyhow!("Drive index {} out of range", index))?;
|
||||
|
||||
let probe_path = drive.path.join(".smartstorage").join(".health_probe");
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Write probe
|
||||
match fs::write(&probe_path, b"health_check").await {
|
||||
Ok(()) => {}
|
||||
Err(e) => {
|
||||
drive.stats.error_count += 1;
|
||||
drive.stats.last_error = Some(e.to_string());
|
||||
drive.status = DriveStatus::Offline;
|
||||
drive.stats.last_check = Utc::now();
|
||||
return Ok(DriveStatus::Offline);
|
||||
}
|
||||
}
|
||||
|
||||
// Read probe
|
||||
match fs::read(&probe_path).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
drive.stats.error_count += 1;
|
||||
drive.stats.last_error = Some(e.to_string());
|
||||
drive.status = DriveStatus::Offline;
|
||||
drive.stats.last_check = Utc::now();
|
||||
return Ok(DriveStatus::Offline);
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up probe
|
||||
let _ = fs::remove_file(&probe_path).await;
|
||||
|
||||
let latency = start.elapsed();
|
||||
drive.stats.avg_write_latency_us = latency.as_micros() as u64;
|
||||
drive.stats.last_check = Utc::now();
|
||||
|
||||
// Mark degraded if latency is too high (>5 seconds)
|
||||
if latency.as_secs() > 5 {
|
||||
drive.status = DriveStatus::Degraded;
|
||||
} else {
|
||||
drive.status = DriveStatus::Online;
|
||||
}
|
||||
|
||||
Ok(drive.status.clone())
|
||||
}
|
||||
|
||||
/// Run health checks on all drives.
|
||||
pub async fn check_all_drives(&mut self) -> Vec<(usize, DriveStatus)> {
|
||||
let mut results = Vec::new();
|
||||
let count = self.drives.len();
|
||||
for i in 0..count {
|
||||
match self.check_drive_health(i).await {
|
||||
Ok(status) => results.push((i, status)),
|
||||
Err(e) => {
|
||||
tracing::error!(drive = i, error = %e, "Drive health check failed");
|
||||
results.push((i, DriveStatus::Offline));
|
||||
}
|
||||
}
|
||||
}
|
||||
results
|
||||
}
|
||||
|
||||
// Internal helpers
|
||||
|
||||
async fn read_format(storage_dir: &Path) -> Option<DriveFormat> {
|
||||
let format_path = storage_dir.join("format.json");
|
||||
let content = fs::read_to_string(&format_path).await.ok()?;
|
||||
serde_json::from_str(&content).ok()
|
||||
}
|
||||
}
|
||||
246
rust/src/cluster/erasure.rs
Normal file
246
rust/src/cluster/erasure.rs
Normal file
@@ -0,0 +1,246 @@
|
||||
use anyhow::Result;
|
||||
use reed_solomon_erasure::galois_8::ReedSolomon;
|
||||
|
||||
use super::config::ErasureConfig;
|
||||
|
||||
/// Erasure coder that splits data into data+parity shards using Reed-Solomon.
|
||||
///
|
||||
/// Objects are processed in fixed-size chunks (stripes). Each chunk is independently
|
||||
/// erasure-coded, enabling streaming encode/decode without buffering entire objects.
|
||||
pub struct ErasureCoder {
|
||||
rs: ReedSolomon,
|
||||
config: ErasureConfig,
|
||||
}
|
||||
|
||||
impl ErasureCoder {
|
||||
pub fn new(config: &ErasureConfig) -> Result<Self> {
|
||||
let rs = ReedSolomon::new(config.data_shards, config.parity_shards)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to create Reed-Solomon encoder: {:?}", e))?;
|
||||
Ok(Self {
|
||||
rs,
|
||||
config: config.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn config(&self) -> &ErasureConfig {
|
||||
&self.config
|
||||
}
|
||||
|
||||
/// Encode a single chunk of data into data+parity shards.
|
||||
///
|
||||
/// The input data is split into `data_shards` equal-size pieces (padded if needed),
|
||||
/// then `parity_shards` parity pieces are computed.
|
||||
///
|
||||
/// Returns a Vec of length `data_shards + parity_shards`, where:
|
||||
/// - indices 0..data_shards are data shards
|
||||
/// - indices data_shards..total are parity shards
|
||||
pub fn encode_chunk(&self, data: &[u8]) -> Result<Vec<Vec<u8>>> {
|
||||
let k = self.config.data_shards;
|
||||
let m = self.config.parity_shards;
|
||||
|
||||
// Compute shard size: each data shard holds ceil(data_len / k) bytes
|
||||
let shard_size = (data.len() + k - 1) / k;
|
||||
if shard_size == 0 {
|
||||
anyhow::bail!("Cannot encode empty data");
|
||||
}
|
||||
|
||||
// Pad input to fill exactly k shards
|
||||
let mut padded = data.to_vec();
|
||||
padded.resize(shard_size * k, 0);
|
||||
|
||||
// Split into k data shards
|
||||
let mut shards: Vec<Vec<u8>> = padded.chunks(shard_size).map(|c| c.to_vec()).collect();
|
||||
|
||||
// Add m empty parity shards
|
||||
for _ in 0..m {
|
||||
shards.push(vec![0u8; shard_size]);
|
||||
}
|
||||
|
||||
// Compute parity in-place
|
||||
self.rs
|
||||
.encode(&mut shards)
|
||||
.map_err(|e| anyhow::anyhow!("Reed-Solomon encoding failed: {:?}", e))?;
|
||||
|
||||
Ok(shards)
|
||||
}
|
||||
|
||||
/// Decode (reconstruct) original data from a partial set of shards.
|
||||
///
|
||||
/// `shards` must have length == total_shards (data + parity).
|
||||
/// At least `data_shards` entries must be `Some`. Missing shards are `None`.
|
||||
/// `original_size` is the original data size before padding, used to truncate.
|
||||
///
|
||||
/// Returns the reconstructed original data.
|
||||
pub fn decode_chunk(
|
||||
&self,
|
||||
shards: &mut Vec<Option<Vec<u8>>>,
|
||||
original_size: usize,
|
||||
) -> Result<Vec<u8>> {
|
||||
let k = self.config.data_shards;
|
||||
let total = self.config.total_shards();
|
||||
|
||||
if shards.len() != total {
|
||||
anyhow::bail!(
|
||||
"Expected {} shards, got {}",
|
||||
total,
|
||||
shards.len()
|
||||
);
|
||||
}
|
||||
|
||||
let available = shards.iter().filter(|s| s.is_some()).count();
|
||||
if available < k {
|
||||
anyhow::bail!(
|
||||
"Need at least {} shards for reconstruction, only {} available",
|
||||
k,
|
||||
available
|
||||
);
|
||||
}
|
||||
|
||||
// Reconstruct missing shards
|
||||
self.rs
|
||||
.reconstruct(shards)
|
||||
.map_err(|e| anyhow::anyhow!("Reed-Solomon reconstruction failed: {:?}", e))?;
|
||||
|
||||
// Concatenate data shards (first k) and truncate to original size
|
||||
let mut result = Vec::with_capacity(original_size);
|
||||
for i in 0..k {
|
||||
if let Some(ref shard) = shards[i] {
|
||||
result.extend_from_slice(shard);
|
||||
} else {
|
||||
anyhow::bail!("Data shard {} missing after reconstruction", i);
|
||||
}
|
||||
}
|
||||
result.truncate(original_size);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Verify that all shards are consistent (no corruption).
|
||||
pub fn verify(&self, shards: &[Vec<u8>]) -> Result<bool> {
|
||||
let shard_refs: Vec<&[u8]> = shards.iter().map(|s| s.as_slice()).collect();
|
||||
self.rs
|
||||
.verify(&shard_refs)
|
||||
.map_err(|e| anyhow::anyhow!("Reed-Solomon verification failed: {:?}", e))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn test_config() -> ErasureConfig {
|
||||
ErasureConfig {
|
||||
data_shards: 4,
|
||||
parity_shards: 2,
|
||||
chunk_size_bytes: 4 * 1024 * 1024,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_decode_roundtrip() {
|
||||
let coder = ErasureCoder::new(&test_config()).unwrap();
|
||||
let original = b"Hello, erasure coding! This is a test of the Reed-Solomon implementation.";
|
||||
|
||||
let shards = coder.encode_chunk(original).unwrap();
|
||||
assert_eq!(shards.len(), 6); // 4 data + 2 parity
|
||||
|
||||
// All shards should be the same size
|
||||
let shard_size = shards[0].len();
|
||||
for s in &shards {
|
||||
assert_eq!(s.len(), shard_size);
|
||||
}
|
||||
|
||||
// Reconstruct with all shards present
|
||||
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
|
||||
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
|
||||
assert_eq!(&recovered, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_with_missing_shards() {
|
||||
let coder = ErasureCoder::new(&test_config()).unwrap();
|
||||
let original = b"Testing reconstruction with missing shards - this should work with 4 of 6.";
|
||||
|
||||
let shards = coder.encode_chunk(original).unwrap();
|
||||
|
||||
// Remove 2 shards (the maximum we can tolerate with 2 parity)
|
||||
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
|
||||
shard_opts[1] = None; // Remove data shard 1
|
||||
shard_opts[4] = None; // Remove parity shard 0
|
||||
|
||||
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
|
||||
assert_eq!(&recovered, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_with_too_many_missing() {
|
||||
let coder = ErasureCoder::new(&test_config()).unwrap();
|
||||
let original = b"This should fail with 3 missing shards.";
|
||||
|
||||
let shards = coder.encode_chunk(original).unwrap();
|
||||
|
||||
// Remove 3 shards (more than parity count of 2)
|
||||
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
|
||||
shard_opts[0] = None;
|
||||
shard_opts[2] = None;
|
||||
shard_opts[5] = None;
|
||||
|
||||
let result = coder.decode_chunk(&mut shard_opts, original.len());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_large_data() {
|
||||
let coder = ErasureCoder::new(&test_config()).unwrap();
|
||||
// 1 MB of data
|
||||
let original: Vec<u8> = (0..1_000_000).map(|i| (i % 256) as u8).collect();
|
||||
|
||||
let shards = coder.encode_chunk(&original).unwrap();
|
||||
assert_eq!(shards.len(), 6);
|
||||
|
||||
// Each shard should be ~250KB (1MB / 4 data shards, rounded up)
|
||||
let expected_shard_size = (original.len() + 3) / 4;
|
||||
assert_eq!(shards[0].len(), expected_shard_size);
|
||||
|
||||
// Verify roundtrip
|
||||
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
|
||||
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
|
||||
assert_eq!(recovered, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_verify_shards() {
|
||||
let coder = ErasureCoder::new(&test_config()).unwrap();
|
||||
let original = b"Verify test data";
|
||||
|
||||
let shards = coder.encode_chunk(original).unwrap();
|
||||
assert!(coder.verify(&shards).unwrap());
|
||||
|
||||
// Corrupt a shard
|
||||
let mut corrupted = shards.clone();
|
||||
corrupted[0][0] ^= 0xFF;
|
||||
assert!(!coder.verify(&corrupted).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small_config() {
|
||||
// Minimum viable: 2 data + 1 parity
|
||||
let config = ErasureConfig {
|
||||
data_shards: 2,
|
||||
parity_shards: 1,
|
||||
chunk_size_bytes: 1024,
|
||||
};
|
||||
let coder = ErasureCoder::new(&config).unwrap();
|
||||
let original = b"Small config test";
|
||||
|
||||
let shards = coder.encode_chunk(original).unwrap();
|
||||
assert_eq!(shards.len(), 3);
|
||||
|
||||
// Remove 1 shard
|
||||
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
|
||||
shard_opts[0] = None;
|
||||
|
||||
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
|
||||
assert_eq!(&recovered, original);
|
||||
}
|
||||
}
|
||||
356
rust/src/cluster/healing.rs
Normal file
356
rust/src/cluster/healing.rs
Normal file
@@ -0,0 +1,356 @@
|
||||
use anyhow::Result;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::fs;
|
||||
|
||||
use super::config::ErasureConfig;
|
||||
use super::erasure::ErasureCoder;
|
||||
use super::metadata::ObjectManifest;
|
||||
use super::shard_store::{ShardId, ShardStore};
|
||||
use super::state::ClusterState;
|
||||
|
||||
/// Background healing service that scans for under-replicated shards
|
||||
/// and reconstructs them.
|
||||
pub struct HealingService {
|
||||
state: Arc<ClusterState>,
|
||||
erasure_coder: ErasureCoder,
|
||||
local_shard_stores: Vec<Arc<ShardStore>>,
|
||||
manifest_dir: PathBuf,
|
||||
scan_interval: Duration,
|
||||
}
|
||||
|
||||
impl HealingService {
|
||||
pub fn new(
|
||||
state: Arc<ClusterState>,
|
||||
erasure_config: &ErasureConfig,
|
||||
local_shard_stores: Vec<Arc<ShardStore>>,
|
||||
manifest_dir: PathBuf,
|
||||
scan_interval_hours: u64,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
state,
|
||||
erasure_coder: ErasureCoder::new(erasure_config)?,
|
||||
local_shard_stores,
|
||||
manifest_dir,
|
||||
scan_interval: Duration::from_secs(scan_interval_hours * 3600),
|
||||
})
|
||||
}
|
||||
|
||||
/// Run the healing loop as a background task.
|
||||
pub async fn run(&self, mut shutdown: tokio::sync::watch::Receiver<bool>) {
|
||||
let mut interval = tokio::time::interval(self.scan_interval);
|
||||
|
||||
// Skip the first immediate tick
|
||||
interval.tick().await;
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = interval.tick() => {
|
||||
tracing::info!("Starting healing scan");
|
||||
match self.heal_scan().await {
|
||||
Ok(stats) => {
|
||||
tracing::info!(
|
||||
checked = stats.shards_checked,
|
||||
healed = stats.shards_healed,
|
||||
errors = stats.errors,
|
||||
"Healing scan completed"
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Healing scan failed: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = shutdown.changed() => {
|
||||
tracing::info!("Healing service shutting down");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan all manifests for shards on offline nodes, reconstruct and re-place them.
|
||||
async fn heal_scan(&self) -> Result<HealStats> {
|
||||
let mut stats = HealStats::default();
|
||||
|
||||
let offline_nodes = self.state.offline_nodes().await;
|
||||
if offline_nodes.is_empty() {
|
||||
tracing::debug!("No offline nodes, skipping heal scan");
|
||||
return Ok(stats);
|
||||
}
|
||||
|
||||
// Check that we have majority before healing (split-brain prevention)
|
||||
if !self.state.has_majority().await {
|
||||
tracing::warn!("No majority quorum, skipping heal to prevent split-brain");
|
||||
return Ok(stats);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Found {} offline nodes, scanning for affected shards",
|
||||
offline_nodes.len()
|
||||
);
|
||||
|
||||
// Iterate all bucket directories under manifest_dir
|
||||
let mut bucket_entries = match fs::read_dir(&self.manifest_dir).await {
|
||||
Ok(e) => e,
|
||||
Err(_) => return Ok(stats),
|
||||
};
|
||||
|
||||
while let Some(bucket_entry) = bucket_entries.next_entry().await? {
|
||||
if !bucket_entry.metadata().await?.is_dir() {
|
||||
continue;
|
||||
}
|
||||
let bucket_name = bucket_entry.file_name().to_string_lossy().to_string();
|
||||
if bucket_name.starts_with('.') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Scan manifests in this bucket
|
||||
self.heal_bucket(&bucket_name, &offline_nodes, &mut stats)
|
||||
.await;
|
||||
|
||||
// Yield to avoid starving foreground I/O
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
async fn heal_bucket(
|
||||
&self,
|
||||
bucket: &str,
|
||||
offline_nodes: &[String],
|
||||
stats: &mut HealStats,
|
||||
) {
|
||||
let bucket_dir = self.manifest_dir.join(bucket);
|
||||
let manifests = match self.collect_manifests(&bucket_dir).await {
|
||||
Ok(m) => m,
|
||||
Err(e) => {
|
||||
tracing::warn!(bucket = bucket, error = %e, "Failed to list manifests");
|
||||
stats.errors += 1;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let local_id = self.state.local_node_id().to_string();
|
||||
|
||||
for manifest in &manifests {
|
||||
for chunk in &manifest.chunks {
|
||||
// Check if any shard in this chunk is on an offline node
|
||||
let affected: Vec<_> = chunk
|
||||
.shard_placements
|
||||
.iter()
|
||||
.filter(|p| offline_nodes.contains(&p.node_id))
|
||||
.collect();
|
||||
|
||||
if affected.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
stats.shards_checked += chunk.shard_placements.len() as u64;
|
||||
|
||||
// Try to reconstruct missing shards from available ones
|
||||
let k = manifest.data_shards;
|
||||
let total = manifest.data_shards + manifest.parity_shards;
|
||||
|
||||
// Count available shards (those NOT on offline nodes)
|
||||
let available_count = chunk
|
||||
.shard_placements
|
||||
.iter()
|
||||
.filter(|p| !offline_nodes.contains(&p.node_id))
|
||||
.count();
|
||||
|
||||
if available_count < k {
|
||||
tracing::error!(
|
||||
bucket = manifest.bucket,
|
||||
key = manifest.key,
|
||||
chunk = chunk.chunk_index,
|
||||
available = available_count,
|
||||
needed = k,
|
||||
"Cannot heal chunk: not enough available shards"
|
||||
);
|
||||
stats.errors += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fetch available shards (only local ones for now)
|
||||
let mut shards: Vec<Option<Vec<u8>>> = vec![None; total];
|
||||
let mut fetched = 0usize;
|
||||
|
||||
for placement in &chunk.shard_placements {
|
||||
if offline_nodes.contains(&placement.node_id) {
|
||||
continue; // Skip offline nodes
|
||||
}
|
||||
if fetched >= k {
|
||||
break;
|
||||
}
|
||||
|
||||
if placement.node_id == local_id {
|
||||
let shard_id = ShardId {
|
||||
bucket: manifest.bucket.clone(),
|
||||
key: manifest.key.clone(),
|
||||
chunk_index: chunk.chunk_index,
|
||||
shard_index: placement.shard_index,
|
||||
};
|
||||
let store_idx = placement.drive_id.parse::<usize>().unwrap_or(0);
|
||||
if let Some(store) = self.local_shard_stores.get(store_idx) {
|
||||
if let Ok((data, _)) = store.read_shard(&shard_id).await {
|
||||
shards[placement.shard_index as usize] = Some(data);
|
||||
fetched += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: fetch from other online remote nodes
|
||||
}
|
||||
|
||||
if fetched < k {
|
||||
tracing::warn!(
|
||||
bucket = manifest.bucket,
|
||||
key = manifest.key,
|
||||
chunk = chunk.chunk_index,
|
||||
"Not enough local shards to heal, skipping"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Reconstruct all shards
|
||||
let reconstructed = match self.erasure_coder.decode_chunk(
|
||||
&mut shards,
|
||||
chunk.data_size,
|
||||
) {
|
||||
Ok(_) => true,
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
bucket = manifest.bucket,
|
||||
key = manifest.key,
|
||||
chunk = chunk.chunk_index,
|
||||
error = %e,
|
||||
"Reconstruction failed"
|
||||
);
|
||||
stats.errors += 1;
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
if !reconstructed {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Re-encode to get all shards back (including the missing ones)
|
||||
let full_data_size = chunk.data_size;
|
||||
let mut data_buf = Vec::with_capacity(full_data_size);
|
||||
for i in 0..k {
|
||||
if let Some(ref shard) = shards[i] {
|
||||
data_buf.extend_from_slice(shard);
|
||||
}
|
||||
}
|
||||
data_buf.truncate(full_data_size);
|
||||
|
||||
let all_shards = match self.erasure_coder.encode_chunk(&data_buf) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::error!(error = %e, "Re-encoding for heal failed");
|
||||
stats.errors += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Verify reconstructed shards are consistent
|
||||
if !self.erasure_coder.verify(&all_shards).unwrap_or(false) {
|
||||
tracing::error!(
|
||||
bucket = manifest.bucket,
|
||||
key = manifest.key,
|
||||
chunk = chunk.chunk_index,
|
||||
"Shard verification failed after reconstruction"
|
||||
);
|
||||
stats.errors += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write the missing shards to the first available local drive
|
||||
for affected_placement in &affected {
|
||||
let shard_idx = affected_placement.shard_index as usize;
|
||||
if shard_idx < all_shards.len() {
|
||||
let shard_data = &all_shards[shard_idx];
|
||||
let checksum = crc32c::crc32c(shard_data);
|
||||
|
||||
let shard_id = ShardId {
|
||||
bucket: manifest.bucket.clone(),
|
||||
key: manifest.key.clone(),
|
||||
chunk_index: chunk.chunk_index,
|
||||
shard_index: affected_placement.shard_index,
|
||||
};
|
||||
|
||||
// Place on first available local drive
|
||||
if let Some(store) = self.local_shard_stores.first() {
|
||||
match store.write_shard(&shard_id, shard_data, checksum).await {
|
||||
Ok(()) => {
|
||||
stats.shards_healed += 1;
|
||||
tracing::info!(
|
||||
bucket = manifest.bucket,
|
||||
key = manifest.key,
|
||||
chunk = chunk.chunk_index,
|
||||
shard = affected_placement.shard_index,
|
||||
"Shard healed successfully"
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(error = %e, "Failed to write healed shard");
|
||||
stats.errors += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect all manifests under a bucket directory.
|
||||
async fn collect_manifests(&self, dir: &std::path::Path) -> Result<Vec<ObjectManifest>> {
|
||||
let mut manifests = Vec::new();
|
||||
self.collect_manifests_recursive(dir, &mut manifests).await?;
|
||||
Ok(manifests)
|
||||
}
|
||||
|
||||
fn collect_manifests_recursive<'a>(
|
||||
&'a self,
|
||||
dir: &'a std::path::Path,
|
||||
manifests: &'a mut Vec<ObjectManifest>,
|
||||
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
|
||||
Box::pin(async move {
|
||||
let mut entries = match fs::read_dir(dir).await {
|
||||
Ok(e) => e,
|
||||
Err(_) => return Ok(()),
|
||||
};
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let meta = entry.metadata().await?;
|
||||
let name = entry.file_name().to_string_lossy().to_string();
|
||||
|
||||
if meta.is_dir() {
|
||||
self.collect_manifests_recursive(&entry.path(), manifests)
|
||||
.await?;
|
||||
} else if name.ends_with(".manifest.json") {
|
||||
if let Ok(content) = fs::read_to_string(entry.path()).await {
|
||||
if let Ok(manifest) = serde_json::from_str::<ObjectManifest>(&content) {
|
||||
manifests.push(manifest);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct HealStats {
|
||||
pub shards_checked: u64,
|
||||
pub shards_healed: u64,
|
||||
pub errors: u64,
|
||||
}
|
||||
226
rust/src/cluster/membership.rs
Normal file
226
rust/src/cluster/membership.rs
Normal file
@@ -0,0 +1,226 @@
|
||||
use anyhow::Result;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use super::drive_manager::{DriveManager, DriveStatus};
|
||||
use super::protocol::{
|
||||
ClusterRequest, ClusterResponse, DriveStateInfo, HeartbeatMessage, JoinRequestMessage,
|
||||
NodeInfo,
|
||||
};
|
||||
use super::quic_transport::QuicTransport;
|
||||
use super::state::ClusterState;
|
||||
|
||||
/// Manages cluster membership: heartbeating, joining, failure detection.
|
||||
pub struct MembershipManager {
|
||||
state: Arc<ClusterState>,
|
||||
transport: Arc<QuicTransport>,
|
||||
heartbeat_interval: Duration,
|
||||
local_node_info: NodeInfo,
|
||||
drive_manager: Option<Arc<Mutex<DriveManager>>>,
|
||||
}
|
||||
|
||||
impl MembershipManager {
|
||||
pub fn new(
|
||||
state: Arc<ClusterState>,
|
||||
transport: Arc<QuicTransport>,
|
||||
heartbeat_interval_ms: u64,
|
||||
local_node_info: NodeInfo,
|
||||
) -> Self {
|
||||
Self {
|
||||
state,
|
||||
transport,
|
||||
heartbeat_interval: Duration::from_millis(heartbeat_interval_ms),
|
||||
local_node_info,
|
||||
drive_manager: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the drive manager for health reporting in heartbeats.
|
||||
pub fn with_drive_manager(mut self, dm: Arc<Mutex<DriveManager>>) -> Self {
|
||||
self.drive_manager = Some(dm);
|
||||
self
|
||||
}
|
||||
|
||||
/// Join the cluster by contacting seed nodes.
|
||||
/// Sends a JoinRequest to each seed node until one accepts.
|
||||
pub async fn join_cluster(&self, seed_nodes: &[String]) -> Result<()> {
|
||||
if seed_nodes.is_empty() {
|
||||
tracing::info!("No seed nodes configured, starting as initial cluster node");
|
||||
self.state.add_node(self.local_node_info.clone()).await;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for seed in seed_nodes {
|
||||
let addr: SocketAddr = match seed.parse() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
tracing::warn!("Invalid seed node address '{}': {}", seed, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
tracing::info!("Attempting to join cluster via seed node {}", seed);
|
||||
|
||||
match self.try_join(addr).await {
|
||||
Ok(()) => {
|
||||
tracing::info!("Successfully joined cluster via {}", seed);
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to join via {}: {}", seed, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no seed responded, start as a new cluster
|
||||
tracing::info!("Could not reach any seed nodes, starting as initial cluster node");
|
||||
self.state.add_node(self.local_node_info.clone()).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn try_join(&self, addr: SocketAddr) -> Result<()> {
|
||||
let conn = self
|
||||
.transport
|
||||
.get_connection("seed", addr)
|
||||
.await?;
|
||||
|
||||
let request = ClusterRequest::JoinRequest(JoinRequestMessage {
|
||||
node_info: self.local_node_info.clone(),
|
||||
});
|
||||
|
||||
let response = self.transport.send_request(&conn, &request).await?;
|
||||
|
||||
match response {
|
||||
ClusterResponse::JoinResponse(join_resp) => {
|
||||
if join_resp.accepted {
|
||||
if let Some(topology) = &join_resp.topology {
|
||||
self.state.apply_topology(topology).await;
|
||||
// Also register self
|
||||
self.state.add_node(self.local_node_info.clone()).await;
|
||||
tracing::info!(
|
||||
"Applied cluster topology (version {}, {} nodes, {} erasure sets)",
|
||||
topology.version,
|
||||
topology.nodes.len(),
|
||||
topology.erasure_sets.len(),
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
anyhow::bail!(
|
||||
"Join rejected: {}",
|
||||
join_resp.error.unwrap_or_default()
|
||||
)
|
||||
}
|
||||
}
|
||||
ClusterResponse::Error(e) => {
|
||||
anyhow::bail!("Join error: {} - {}", e.code, e.message)
|
||||
}
|
||||
_ => anyhow::bail!("Unexpected response to join request"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the heartbeat loop. Sends heartbeats to all peers periodically.
|
||||
pub async fn heartbeat_loop(self: Arc<Self>, mut shutdown: tokio::sync::watch::Receiver<bool>) {
|
||||
let mut interval = tokio::time::interval(self.heartbeat_interval);
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = interval.tick() => {
|
||||
self.send_heartbeats().await;
|
||||
}
|
||||
_ = shutdown.changed() => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_heartbeats(&self) {
|
||||
let peers = self.state.online_peers().await;
|
||||
let topology_version = self.state.version().await;
|
||||
let mut responded = Vec::new();
|
||||
|
||||
// Collect drive health states
|
||||
let drive_states = self.collect_drive_states().await;
|
||||
|
||||
for peer in &peers {
|
||||
let addr: SocketAddr = match peer.quic_addr.parse() {
|
||||
Ok(a) => a,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let heartbeat = ClusterRequest::Heartbeat(HeartbeatMessage {
|
||||
node_id: self.local_node_info.node_id.clone(),
|
||||
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
drive_states: drive_states.clone(),
|
||||
topology_version,
|
||||
});
|
||||
|
||||
match tokio::time::timeout(
|
||||
Duration::from_secs(5),
|
||||
self.send_heartbeat_to_peer(&peer.node_id, addr, &heartbeat),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(())) => {
|
||||
responded.push(peer.node_id.clone());
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
tracing::debug!(
|
||||
peer = %peer.node_id,
|
||||
error = %e,
|
||||
"Heartbeat failed"
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::debug!(peer = %peer.node_id, "Heartbeat timed out");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update state based on responses
|
||||
let status_changes = self.state.tick_heartbeats(&responded).await;
|
||||
for (node_id, status) in &status_changes {
|
||||
tracing::info!(node = %node_id, status = ?status, "Node status changed");
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_heartbeat_to_peer(
|
||||
&self,
|
||||
node_id: &str,
|
||||
addr: SocketAddr,
|
||||
heartbeat: &ClusterRequest,
|
||||
) -> Result<()> {
|
||||
let conn = self.transport.get_connection(node_id, addr).await?;
|
||||
let _response = self.transport.send_request(&conn, heartbeat).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Collect drive health states from the DriveManager, if available.
|
||||
async fn collect_drive_states(&self) -> Vec<DriveStateInfo> {
|
||||
let dm = match &self.drive_manager {
|
||||
Some(dm) => dm,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
|
||||
let mut manager = dm.lock().await;
|
||||
let results = manager.check_all_drives().await;
|
||||
|
||||
results
|
||||
.into_iter()
|
||||
.map(|(idx, status)| {
|
||||
let status_str = match status {
|
||||
DriveStatus::Online => "online",
|
||||
DriveStatus::Degraded => "degraded",
|
||||
DriveStatus::Offline => "offline",
|
||||
DriveStatus::Healing => "healing",
|
||||
};
|
||||
DriveStateInfo {
|
||||
drive_index: idx as u32,
|
||||
status: status_str.to_string(),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
85
rust/src/cluster/metadata.rs
Normal file
85
rust/src/cluster/metadata.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Full manifest describing how an object is stored across erasure-coded shards.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ObjectManifest {
|
||||
/// Bucket name
|
||||
pub bucket: String,
|
||||
/// Object key
|
||||
pub key: String,
|
||||
/// Unique version ID for this write
|
||||
pub version_id: String,
|
||||
/// Total object size in bytes
|
||||
pub size: u64,
|
||||
/// MD5 hex digest of the complete object
|
||||
pub content_md5: String,
|
||||
/// Content type
|
||||
pub content_type: String,
|
||||
/// User metadata (x-amz-meta-*, content-type, etc.)
|
||||
pub metadata: HashMap<String, String>,
|
||||
/// When the object was created
|
||||
pub created_at: String,
|
||||
/// Last modified timestamp
|
||||
pub last_modified: String,
|
||||
/// Number of data shards used
|
||||
pub data_shards: usize,
|
||||
/// Number of parity shards used
|
||||
pub parity_shards: usize,
|
||||
/// Chunk size in bytes (last chunk may be smaller)
|
||||
pub chunk_size: usize,
|
||||
/// Per-chunk shard placement info
|
||||
pub chunks: Vec<ChunkManifest>,
|
||||
}
|
||||
|
||||
/// Describes the shards for a single chunk of an object.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ChunkManifest {
|
||||
/// Index of this chunk (0-based)
|
||||
pub chunk_index: u32,
|
||||
/// Actual data size of this chunk (before erasure coding)
|
||||
pub data_size: usize,
|
||||
/// Where each shard was placed
|
||||
pub shard_placements: Vec<ShardPlacement>,
|
||||
}
|
||||
|
||||
/// Describes where a specific shard is stored.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ShardPlacement {
|
||||
/// Shard index within the erasure set (0..data_shards+parity_shards)
|
||||
pub shard_index: u32,
|
||||
/// Node that holds this shard
|
||||
pub node_id: String,
|
||||
/// Drive ID on that node
|
||||
pub drive_id: String,
|
||||
/// CRC32C checksum of the shard data
|
||||
pub checksum: u32,
|
||||
/// Size of the shard data in bytes
|
||||
pub shard_size: usize,
|
||||
}
|
||||
|
||||
/// Manifest for a multipart upload in progress.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MultipartUploadManifest {
|
||||
pub upload_id: String,
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
pub initiated: String,
|
||||
pub metadata: HashMap<String, String>,
|
||||
/// Per-part manifests, keyed by part number.
|
||||
pub parts: HashMap<u32, PartManifest>,
|
||||
}
|
||||
|
||||
/// Manifest for a single part of a multipart upload.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PartManifest {
|
||||
pub part_number: u32,
|
||||
pub size: u64,
|
||||
pub md5: String,
|
||||
pub chunks: Vec<ChunkManifest>,
|
||||
}
|
||||
16
rust/src/cluster/mod.rs
Normal file
16
rust/src/cluster/mod.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
// Cluster modules contain forward-looking public API that is incrementally wired.
|
||||
// Allow dead_code for methods/structs not yet called from the main server path.
|
||||
#![allow(dead_code)]
|
||||
|
||||
pub mod config;
|
||||
pub mod coordinator;
|
||||
pub mod drive_manager;
|
||||
pub mod erasure;
|
||||
pub mod healing;
|
||||
pub mod membership;
|
||||
pub mod metadata;
|
||||
pub mod placement;
|
||||
pub mod protocol;
|
||||
pub mod quic_transport;
|
||||
pub mod shard_store;
|
||||
pub mod state;
|
||||
140
rust/src/cluster/placement.rs
Normal file
140
rust/src/cluster/placement.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
use xxhash_rust::xxh64::xxh64;
|
||||
|
||||
/// Determines which erasure set an object belongs to, based on consistent hashing.
|
||||
///
|
||||
/// Uses xxhash64 of "{bucket}/{key}" to deterministically map objects to erasure sets.
|
||||
/// This is stateless — any node can independently compute the placement.
|
||||
pub fn erasure_set_for_object(bucket: &str, key: &str, num_erasure_sets: u32) -> u32 {
|
||||
if num_erasure_sets == 0 {
|
||||
return 0;
|
||||
}
|
||||
let hash_input = format!("{}/{}", bucket, key);
|
||||
let hash = xxh64(hash_input.as_bytes(), 0);
|
||||
(hash % num_erasure_sets as u64) as u32
|
||||
}
|
||||
|
||||
/// Represents a drive location within the cluster topology.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DriveLocation {
|
||||
pub node_id: String,
|
||||
pub drive_index: u32,
|
||||
}
|
||||
|
||||
/// An erasure set: a fixed group of drives that together store one complete
|
||||
/// set of shards for any object placed on them.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ErasureSet {
|
||||
pub set_id: u32,
|
||||
/// Ordered drives: index = shard_index
|
||||
pub drives: Vec<DriveLocation>,
|
||||
}
|
||||
|
||||
/// Form erasure sets from the available drives across all nodes.
|
||||
///
|
||||
/// Interleaves drives from different nodes for fault isolation:
|
||||
/// e.g., with 3 nodes x 4 drives and total_shards=6:
|
||||
/// Set 0: N0-D0, N1-D0, N2-D0, N0-D1, N1-D1, N2-D1
|
||||
/// Set 1: N0-D2, N1-D2, N2-D2, N0-D3, N1-D3, N2-D3
|
||||
pub fn form_erasure_sets(
|
||||
nodes: &[(String, u32)], // (node_id, drive_count)
|
||||
total_shards: usize,
|
||||
) -> Vec<ErasureSet> {
|
||||
// Collect all drives as (node_id, drive_index), interleaved by node
|
||||
let max_drives = nodes.iter().map(|(_, count)| *count).max().unwrap_or(0) as usize;
|
||||
let mut all_drives: Vec<DriveLocation> = Vec::new();
|
||||
|
||||
for drive_idx in 0..max_drives {
|
||||
for (node_id, drive_count) in nodes {
|
||||
if (drive_idx as u32) < *drive_count {
|
||||
all_drives.push(DriveLocation {
|
||||
node_id: node_id.clone(),
|
||||
drive_index: drive_idx as u32,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Form sets of total_shards drives each
|
||||
let num_sets = all_drives.len() / total_shards;
|
||||
let mut sets = Vec::with_capacity(num_sets);
|
||||
|
||||
for set_idx in 0..num_sets {
|
||||
let start = set_idx * total_shards;
|
||||
let end = start + total_shards;
|
||||
let drives = all_drives[start..end].to_vec();
|
||||
|
||||
sets.push(ErasureSet {
|
||||
set_id: set_idx as u32,
|
||||
drives,
|
||||
});
|
||||
}
|
||||
|
||||
sets
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_erasure_set_assignment_deterministic() {
|
||||
let set_a = erasure_set_for_object("mybucket", "mykey", 4);
|
||||
let set_b = erasure_set_for_object("mybucket", "mykey", 4);
|
||||
assert_eq!(set_a, set_b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_erasure_set_distribution() {
|
||||
// Check that objects are distributed across sets
|
||||
let num_sets = 4u32;
|
||||
let mut counts = vec![0u32; num_sets as usize];
|
||||
for i in 0..1000 {
|
||||
let key = format!("key-{}", i);
|
||||
let set = erasure_set_for_object("bucket", &key, num_sets);
|
||||
assert!(set < num_sets);
|
||||
counts[set as usize] += 1;
|
||||
}
|
||||
// Each set should have some objects (not all in one set)
|
||||
for count in &counts {
|
||||
assert!(*count > 100, "Expected >100, got {}", count);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_form_erasure_sets_3x4() {
|
||||
// 3 nodes, 4 drives each, 6 shards per set => 2 sets
|
||||
let nodes = vec![
|
||||
("node1".to_string(), 4),
|
||||
("node2".to_string(), 4),
|
||||
("node3".to_string(), 4),
|
||||
];
|
||||
let sets = form_erasure_sets(&nodes, 6);
|
||||
assert_eq!(sets.len(), 2);
|
||||
|
||||
// Set 0 should interleave across nodes
|
||||
let set0_nodes: Vec<&str> = sets[0].drives.iter().map(|d| d.node_id.as_str()).collect();
|
||||
assert_eq!(set0_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
|
||||
|
||||
// Set 1 should also interleave
|
||||
let set1_nodes: Vec<&str> = sets[1].drives.iter().map(|d| d.node_id.as_str()).collect();
|
||||
assert_eq!(set1_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
|
||||
|
||||
// Drive indices should be different between sets
|
||||
let set0_drives: Vec<u32> = sets[0].drives.iter().map(|d| d.drive_index).collect();
|
||||
let set1_drives: Vec<u32> = sets[1].drives.iter().map(|d| d.drive_index).collect();
|
||||
assert_eq!(set0_drives, vec![0, 0, 0, 1, 1, 1]);
|
||||
assert_eq!(set1_drives, vec![2, 2, 2, 3, 3, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_form_erasure_sets_remainder() {
|
||||
// 2 nodes, 3 drives each, 4 shards => 1 set (2 drives left over)
|
||||
let nodes = vec![
|
||||
("a".to_string(), 3),
|
||||
("b".to_string(), 3),
|
||||
];
|
||||
let sets = form_erasure_sets(&nodes, 4);
|
||||
assert_eq!(sets.len(), 1);
|
||||
assert_eq!(sets[0].drives.len(), 4);
|
||||
}
|
||||
}
|
||||
384
rust/src/cluster/protocol.rs
Normal file
384
rust/src/cluster/protocol.rs
Normal file
@@ -0,0 +1,384 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use super::metadata::ObjectManifest;
|
||||
|
||||
/// All inter-node cluster messages, serialized with bincode over QUIC streams.
|
||||
///
|
||||
/// Each message type gets its own bidirectional QUIC stream.
|
||||
/// For shard data transfers, the header is sent first (bincode),
|
||||
/// then raw shard bytes follow on the same stream.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum ClusterRequest {
|
||||
// ============================
|
||||
// Shard operations
|
||||
// ============================
|
||||
|
||||
/// Write a shard to a specific drive on the target node.
|
||||
/// Shard data follows after this header on the same stream.
|
||||
ShardWrite(ShardWriteRequest),
|
||||
|
||||
/// Read a shard from the target node.
|
||||
ShardRead(ShardReadRequest),
|
||||
|
||||
/// Delete a shard from the target node.
|
||||
ShardDelete(ShardDeleteRequest),
|
||||
|
||||
/// Check if a shard exists and get its metadata.
|
||||
ShardHead(ShardHeadRequest),
|
||||
|
||||
// ============================
|
||||
// Manifest operations
|
||||
// ============================
|
||||
|
||||
/// Store an object manifest on the target node.
|
||||
ManifestWrite(ManifestWriteRequest),
|
||||
|
||||
/// Retrieve an object manifest from the target node.
|
||||
ManifestRead(ManifestReadRequest),
|
||||
|
||||
/// Delete an object manifest from the target node.
|
||||
ManifestDelete(ManifestDeleteRequest),
|
||||
|
||||
/// List all manifests for a bucket on the target node.
|
||||
ManifestList(ManifestListRequest),
|
||||
|
||||
// ============================
|
||||
// Cluster management
|
||||
// ============================
|
||||
|
||||
/// Periodic heartbeat.
|
||||
Heartbeat(HeartbeatMessage),
|
||||
|
||||
/// Request to join the cluster.
|
||||
JoinRequest(JoinRequestMessage),
|
||||
|
||||
/// Synchronize cluster topology.
|
||||
TopologySync(TopologySyncMessage),
|
||||
|
||||
// ============================
|
||||
// Healing
|
||||
// ============================
|
||||
|
||||
/// Request a shard to be reconstructed and placed on a target drive.
|
||||
HealRequest(HealRequestMessage),
|
||||
}
|
||||
|
||||
/// Responses to cluster requests.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum ClusterResponse {
|
||||
// Shard ops
|
||||
ShardWriteAck(ShardWriteAck),
|
||||
ShardReadResponse(ShardReadResponse),
|
||||
ShardDeleteAck(ShardDeleteAck),
|
||||
ShardHeadResponse(ShardHeadResponse),
|
||||
|
||||
// Manifest ops
|
||||
ManifestWriteAck(ManifestWriteAck),
|
||||
ManifestReadResponse(ManifestReadResponse),
|
||||
ManifestDeleteAck(ManifestDeleteAck),
|
||||
ManifestListResponse(ManifestListResponse),
|
||||
|
||||
// Cluster mgmt
|
||||
HeartbeatAck(HeartbeatAckMessage),
|
||||
JoinResponse(JoinResponseMessage),
|
||||
TopologySyncAck(TopologySyncAckMessage),
|
||||
|
||||
// Healing
|
||||
HealResponse(HealResponseMessage),
|
||||
|
||||
// Error
|
||||
Error(ErrorResponse),
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Shard operation messages
|
||||
// ============================
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardWriteRequest {
|
||||
pub request_id: String,
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
pub chunk_index: u32,
|
||||
pub shard_index: u32,
|
||||
pub shard_data_length: u64,
|
||||
pub checksum: u32, // crc32c of shard data
|
||||
pub object_metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardWriteAck {
|
||||
pub request_id: String,
|
||||
pub success: bool,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardReadRequest {
|
||||
pub request_id: String,
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
pub chunk_index: u32,
|
||||
pub shard_index: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardReadResponse {
|
||||
pub request_id: String,
|
||||
pub found: bool,
|
||||
pub shard_data_length: u64,
|
||||
pub checksum: u32,
|
||||
// Shard data follows on the stream after this header
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardDeleteRequest {
|
||||
pub request_id: String,
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
pub chunk_index: u32,
|
||||
pub shard_index: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardDeleteAck {
|
||||
pub request_id: String,
|
||||
pub success: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardHeadRequest {
|
||||
pub request_id: String,
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
pub chunk_index: u32,
|
||||
pub shard_index: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardHeadResponse {
|
||||
pub request_id: String,
|
||||
pub found: bool,
|
||||
pub data_size: u64,
|
||||
pub checksum: u32,
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Manifest operation messages
|
||||
// ============================
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ManifestWriteRequest {
|
||||
pub request_id: String,
|
||||
pub manifest: ObjectManifest,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ManifestWriteAck {
|
||||
pub request_id: String,
|
||||
pub success: bool,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ManifestReadRequest {
|
||||
pub request_id: String,
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ManifestReadResponse {
|
||||
pub request_id: String,
|
||||
pub found: bool,
|
||||
pub manifest: Option<ObjectManifest>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ManifestDeleteRequest {
|
||||
pub request_id: String,
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ManifestDeleteAck {
|
||||
pub request_id: String,
|
||||
pub success: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ManifestListRequest {
|
||||
pub request_id: String,
|
||||
pub bucket: String,
|
||||
pub prefix: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ManifestListResponse {
|
||||
pub request_id: String,
|
||||
pub manifests: Vec<ObjectManifest>,
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Cluster management messages
|
||||
// ============================
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DriveStateInfo {
|
||||
pub drive_index: u32,
|
||||
pub status: String, // "online", "degraded", "offline", "healing"
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HeartbeatMessage {
|
||||
pub node_id: String,
|
||||
pub timestamp: String,
|
||||
pub drive_states: Vec<DriveStateInfo>,
|
||||
pub topology_version: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HeartbeatAckMessage {
|
||||
pub node_id: String,
|
||||
pub timestamp: String,
|
||||
pub topology_version: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NodeInfo {
|
||||
pub node_id: String,
|
||||
pub quic_addr: String,
|
||||
pub s3_addr: String,
|
||||
pub drive_count: u32,
|
||||
pub status: String,
|
||||
pub version: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JoinRequestMessage {
|
||||
pub node_info: NodeInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ClusterTopology {
|
||||
pub version: u64,
|
||||
pub cluster_id: String,
|
||||
pub nodes: Vec<NodeInfo>,
|
||||
pub erasure_sets: Vec<ErasureSetInfo>,
|
||||
pub data_shards: usize,
|
||||
pub parity_shards: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ErasureSetInfo {
|
||||
pub set_id: u32,
|
||||
pub drives: Vec<DriveLocationInfo>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DriveLocationInfo {
|
||||
pub node_id: String,
|
||||
pub drive_index: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JoinResponseMessage {
|
||||
pub accepted: bool,
|
||||
pub topology: Option<ClusterTopology>,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TopologySyncMessage {
|
||||
pub topology: ClusterTopology,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TopologySyncAckMessage {
|
||||
pub accepted: bool,
|
||||
pub current_version: u64,
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Healing messages
|
||||
// ============================
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HealRequestMessage {
|
||||
pub request_id: String,
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
pub chunk_index: u32,
|
||||
pub shard_index: u32,
|
||||
pub target_node_id: String,
|
||||
pub target_drive_index: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HealResponseMessage {
|
||||
pub request_id: String,
|
||||
pub success: bool,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Error response
|
||||
// ============================
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ErrorResponse {
|
||||
pub request_id: String,
|
||||
pub code: String,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Wire format helpers
|
||||
// ============================
|
||||
|
||||
/// Serialize a request to bincode bytes with a 4-byte length prefix.
|
||||
pub fn encode_request(req: &ClusterRequest) -> anyhow::Result<Vec<u8>> {
|
||||
let payload = bincode::serialize(req)?;
|
||||
let mut buf = Vec::with_capacity(4 + payload.len());
|
||||
buf.extend_from_slice(&(payload.len() as u32).to_le_bytes());
|
||||
buf.extend_from_slice(&payload);
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Serialize a response to bincode bytes with a 4-byte length prefix.
|
||||
pub fn encode_response(resp: &ClusterResponse) -> anyhow::Result<Vec<u8>> {
|
||||
let payload = bincode::serialize(resp)?;
|
||||
let mut buf = Vec::with_capacity(4 + payload.len());
|
||||
buf.extend_from_slice(&(payload.len() as u32).to_le_bytes());
|
||||
buf.extend_from_slice(&payload);
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Read a length-prefixed bincode message from raw bytes.
|
||||
/// Returns (decoded message, bytes consumed).
|
||||
pub fn decode_request(data: &[u8]) -> anyhow::Result<(ClusterRequest, usize)> {
|
||||
if data.len() < 4 {
|
||||
anyhow::bail!("Not enough data for length prefix");
|
||||
}
|
||||
let len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
|
||||
if data.len() < 4 + len {
|
||||
anyhow::bail!("Not enough data for message body");
|
||||
}
|
||||
let msg: ClusterRequest = bincode::deserialize(&data[4..4 + len])?;
|
||||
Ok((msg, 4 + len))
|
||||
}
|
||||
|
||||
/// Read a length-prefixed bincode response from raw bytes.
|
||||
pub fn decode_response(data: &[u8]) -> anyhow::Result<(ClusterResponse, usize)> {
|
||||
if data.len() < 4 {
|
||||
anyhow::bail!("Not enough data for length prefix");
|
||||
}
|
||||
let len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
|
||||
if data.len() < 4 + len {
|
||||
anyhow::bail!("Not enough data for message body");
|
||||
}
|
||||
let msg: ClusterResponse = bincode::deserialize(&data[4..4 + len])?;
|
||||
Ok((msg, 4 + len))
|
||||
}
|
||||
445
rust/src/cluster/quic_transport.rs
Normal file
445
rust/src/cluster/quic_transport.rs
Normal file
@@ -0,0 +1,445 @@
|
||||
use anyhow::Result;
|
||||
use dashmap::DashMap;
|
||||
use quinn::{ClientConfig, Endpoint, ServerConfig as QuinnServerConfig};
|
||||
use rustls::pki_types::{CertificateDer, PrivateKeyDer, PrivatePkcs8KeyDer};
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use super::protocol::{
|
||||
self, ClusterRequest, ClusterResponse, ShardReadResponse, ShardWriteAck, ShardWriteRequest,
|
||||
};
|
||||
use super::shard_store::{ShardId, ShardStore};
|
||||
|
||||
/// QUIC transport layer for inter-node communication.
|
||||
///
|
||||
/// Manages a QUIC endpoint for both sending and receiving cluster messages.
|
||||
/// Uses self-signed TLS certificates generated at init time.
|
||||
/// Maintains a connection pool to peer nodes.
|
||||
pub struct QuicTransport {
|
||||
endpoint: Endpoint,
|
||||
/// Cached connections to peer nodes: node_id -> Connection
|
||||
connections: Arc<DashMap<String, quinn::Connection>>,
|
||||
local_node_id: String,
|
||||
}
|
||||
|
||||
impl QuicTransport {
|
||||
/// Create a new QUIC transport, binding to the specified address.
|
||||
pub async fn new(bind_addr: SocketAddr, local_node_id: String) -> Result<Self> {
|
||||
let (server_config, client_config) = Self::generate_tls_configs()?;
|
||||
|
||||
let endpoint = Endpoint::server(server_config, bind_addr)?;
|
||||
|
||||
// Also configure the endpoint for client connections
|
||||
let mut endpoint_client = endpoint.clone();
|
||||
endpoint_client.set_default_client_config(client_config);
|
||||
|
||||
Ok(Self {
|
||||
endpoint,
|
||||
connections: Arc::new(DashMap::new()),
|
||||
local_node_id,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get or establish a connection to a peer node.
|
||||
pub async fn get_connection(
|
||||
&self,
|
||||
node_id: &str,
|
||||
addr: SocketAddr,
|
||||
) -> Result<quinn::Connection> {
|
||||
// Check cache first
|
||||
if let Some(conn) = self.connections.get(node_id) {
|
||||
if conn.close_reason().is_none() {
|
||||
return Ok(conn.clone());
|
||||
}
|
||||
// Connection is closed, remove from cache
|
||||
drop(conn);
|
||||
self.connections.remove(node_id);
|
||||
}
|
||||
|
||||
// Establish new connection
|
||||
let conn = self
|
||||
.endpoint
|
||||
.connect(addr, "smartstorage")?
|
||||
.await?;
|
||||
|
||||
self.connections
|
||||
.insert(node_id.to_string(), conn.clone());
|
||||
|
||||
Ok(conn)
|
||||
}
|
||||
|
||||
/// Send a cluster request and receive the response.
|
||||
pub async fn send_request(
|
||||
&self,
|
||||
conn: &quinn::Connection,
|
||||
request: &ClusterRequest,
|
||||
) -> Result<ClusterResponse> {
|
||||
let (mut send, mut recv) = conn.open_bi().await?;
|
||||
|
||||
// Encode and send request
|
||||
let encoded = protocol::encode_request(request)?;
|
||||
send.write_all(&encoded).await?;
|
||||
send.finish()?;
|
||||
|
||||
// Read response
|
||||
let response_data = recv.read_to_end(64 * 1024 * 1024).await?; // 64MB max
|
||||
let (response, _) = protocol::decode_response(&response_data)?;
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Send a shard write request with streaming data.
|
||||
///
|
||||
/// Sends the request header first, then streams the shard data bytes.
|
||||
pub async fn send_shard_write(
|
||||
&self,
|
||||
conn: &quinn::Connection,
|
||||
request: ShardWriteRequest,
|
||||
shard_data: &[u8],
|
||||
) -> Result<ShardWriteAck> {
|
||||
let (mut send, mut recv) = conn.open_bi().await?;
|
||||
|
||||
// Send request header
|
||||
let encoded = protocol::encode_request(&ClusterRequest::ShardWrite(request))?;
|
||||
send.write_all(&encoded).await?;
|
||||
|
||||
// Stream shard data
|
||||
send.write_all(shard_data).await?;
|
||||
send.finish()?;
|
||||
|
||||
// Read ack
|
||||
let response_data = recv.read_to_end(1024).await?;
|
||||
let (response, _) = protocol::decode_response(&response_data)?;
|
||||
|
||||
match response {
|
||||
ClusterResponse::ShardWriteAck(ack) => Ok(ack),
|
||||
ClusterResponse::Error(e) => {
|
||||
anyhow::bail!("Shard write error: {} - {}", e.code, e.message)
|
||||
}
|
||||
other => anyhow::bail!("Unexpected response to shard write: {:?}", other),
|
||||
}
|
||||
}
|
||||
|
||||
/// Send a shard read request and receive the shard data.
|
||||
///
|
||||
/// Returns (shard_data, checksum).
|
||||
pub async fn send_shard_read(
|
||||
&self,
|
||||
conn: &quinn::Connection,
|
||||
request: &ClusterRequest,
|
||||
) -> Result<Option<(Vec<u8>, u32)>> {
|
||||
let (mut send, mut recv) = conn.open_bi().await?;
|
||||
|
||||
// Send request
|
||||
let encoded = protocol::encode_request(request)?;
|
||||
send.write_all(&encoded).await?;
|
||||
send.finish()?;
|
||||
|
||||
// Read response header
|
||||
let mut header_len_buf = [0u8; 4];
|
||||
recv.read_exact(&mut header_len_buf).await?;
|
||||
let header_len = u32::from_le_bytes(header_len_buf) as usize;
|
||||
|
||||
let mut header_buf = vec![0u8; header_len];
|
||||
recv.read_exact(&mut header_buf).await?;
|
||||
let response: ClusterResponse = bincode::deserialize(&header_buf)?;
|
||||
|
||||
match response {
|
||||
ClusterResponse::ShardReadResponse(read_resp) => {
|
||||
if !read_resp.found {
|
||||
return Ok(None);
|
||||
}
|
||||
// Read shard data that follows
|
||||
let mut shard_data = vec![0u8; read_resp.shard_data_length as usize];
|
||||
recv.read_exact(&mut shard_data).await?;
|
||||
Ok(Some((shard_data, read_resp.checksum)))
|
||||
}
|
||||
ClusterResponse::Error(e) => {
|
||||
anyhow::bail!("Shard read error: {} - {}", e.code, e.message)
|
||||
}
|
||||
other => anyhow::bail!("Unexpected response to shard read: {:?}", other),
|
||||
}
|
||||
}
|
||||
|
||||
/// Accept incoming connections and dispatch to the handler.
|
||||
pub async fn accept_loop(
|
||||
self: Arc<Self>,
|
||||
shard_store: Arc<ShardStore>,
|
||||
mut shutdown: tokio::sync::watch::Receiver<bool>,
|
||||
) {
|
||||
loop {
|
||||
tokio::select! {
|
||||
incoming = self.endpoint.accept() => {
|
||||
match incoming {
|
||||
Some(incoming_conn) => {
|
||||
let transport = self.clone();
|
||||
let store = shard_store.clone();
|
||||
tokio::spawn(async move {
|
||||
match incoming_conn.await {
|
||||
Ok(conn) => {
|
||||
transport.handle_connection(conn, store).await;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to accept QUIC connection: {}", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
_ = shutdown.changed() => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle a single QUIC connection (may have multiple streams).
|
||||
async fn handle_connection(
|
||||
&self,
|
||||
conn: quinn::Connection,
|
||||
shard_store: Arc<ShardStore>,
|
||||
) {
|
||||
loop {
|
||||
match conn.accept_bi().await {
|
||||
Ok((send, recv)) => {
|
||||
let store = shard_store.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = Self::handle_stream(send, recv, store).await {
|
||||
tracing::error!("Stream handler error: {}", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(quinn::ConnectionError::ApplicationClosed(_)) => break,
|
||||
Err(e) => {
|
||||
tracing::error!("Connection error: {}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle a single bidirectional stream (one request-response exchange).
|
||||
async fn handle_stream(
|
||||
mut send: quinn::SendStream,
|
||||
mut recv: quinn::RecvStream,
|
||||
shard_store: Arc<ShardStore>,
|
||||
) -> Result<()> {
|
||||
// Read the full request (length-prefixed bincode + optional trailing data)
|
||||
let raw = recv.read_to_end(64 * 1024 * 1024).await?; // 64MB max
|
||||
let (request, header_len) = protocol::decode_request(&raw)?;
|
||||
|
||||
match request {
|
||||
ClusterRequest::ShardWrite(write_req) => {
|
||||
// Shard data follows the header in the raw buffer
|
||||
let shard_data = &raw[header_len..];
|
||||
|
||||
let shard_id = ShardId {
|
||||
bucket: write_req.bucket,
|
||||
key: write_req.key,
|
||||
chunk_index: write_req.chunk_index,
|
||||
shard_index: write_req.shard_index,
|
||||
};
|
||||
|
||||
let result = shard_store
|
||||
.write_shard(&shard_id, &shard_data, write_req.checksum)
|
||||
.await;
|
||||
|
||||
let ack = ShardWriteAck {
|
||||
request_id: write_req.request_id,
|
||||
success: result.is_ok(),
|
||||
error: result.err().map(|e| e.to_string()),
|
||||
};
|
||||
let response = protocol::encode_response(&ClusterResponse::ShardWriteAck(ack))?;
|
||||
send.write_all(&response).await?;
|
||||
send.finish()?;
|
||||
}
|
||||
|
||||
ClusterRequest::ShardRead(read_req) => {
|
||||
let shard_id = ShardId {
|
||||
bucket: read_req.bucket,
|
||||
key: read_req.key,
|
||||
chunk_index: read_req.chunk_index,
|
||||
shard_index: read_req.shard_index,
|
||||
};
|
||||
|
||||
match shard_store.read_shard(&shard_id).await {
|
||||
Ok((data, checksum)) => {
|
||||
let header = ShardReadResponse {
|
||||
request_id: read_req.request_id,
|
||||
found: true,
|
||||
shard_data_length: data.len() as u64,
|
||||
checksum,
|
||||
};
|
||||
// Send header
|
||||
let header_bytes = bincode::serialize(&ClusterResponse::ShardReadResponse(header))?;
|
||||
send.write_all(&(header_bytes.len() as u32).to_le_bytes()).await?;
|
||||
send.write_all(&header_bytes).await?;
|
||||
// Send shard data
|
||||
send.write_all(&data).await?;
|
||||
send.finish()?;
|
||||
}
|
||||
Err(_) => {
|
||||
let header = ShardReadResponse {
|
||||
request_id: read_req.request_id,
|
||||
found: false,
|
||||
shard_data_length: 0,
|
||||
checksum: 0,
|
||||
};
|
||||
let header_bytes = bincode::serialize(&ClusterResponse::ShardReadResponse(header))?;
|
||||
send.write_all(&(header_bytes.len() as u32).to_le_bytes()).await?;
|
||||
send.write_all(&header_bytes).await?;
|
||||
send.finish()?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ClusterRequest::ShardDelete(del_req) => {
|
||||
let shard_id = ShardId {
|
||||
bucket: del_req.bucket,
|
||||
key: del_req.key,
|
||||
chunk_index: del_req.chunk_index,
|
||||
shard_index: del_req.shard_index,
|
||||
};
|
||||
let result = shard_store.delete_shard(&shard_id).await;
|
||||
let ack = protocol::ClusterResponse::ShardDeleteAck(protocol::ShardDeleteAck {
|
||||
request_id: del_req.request_id,
|
||||
success: result.is_ok(),
|
||||
});
|
||||
let response = protocol::encode_response(&ack)?;
|
||||
send.write_all(&response).await?;
|
||||
send.finish()?;
|
||||
}
|
||||
|
||||
ClusterRequest::ShardHead(head_req) => {
|
||||
let shard_id = ShardId {
|
||||
bucket: head_req.bucket,
|
||||
key: head_req.key,
|
||||
chunk_index: head_req.chunk_index,
|
||||
shard_index: head_req.shard_index,
|
||||
};
|
||||
let resp = match shard_store.head_shard(&shard_id).await {
|
||||
Ok(Some(meta)) => protocol::ShardHeadResponse {
|
||||
request_id: head_req.request_id,
|
||||
found: true,
|
||||
data_size: meta.data_size,
|
||||
checksum: meta.checksum,
|
||||
},
|
||||
_ => protocol::ShardHeadResponse {
|
||||
request_id: head_req.request_id,
|
||||
found: false,
|
||||
data_size: 0,
|
||||
checksum: 0,
|
||||
},
|
||||
};
|
||||
let response =
|
||||
protocol::encode_response(&ClusterResponse::ShardHeadResponse(resp))?;
|
||||
send.write_all(&response).await?;
|
||||
send.finish()?;
|
||||
}
|
||||
|
||||
// Heartbeat, Join, TopologySync, Heal, and Manifest operations
|
||||
// will be handled by the membership and coordinator modules.
|
||||
// For now, send a generic ack.
|
||||
_ => {
|
||||
let err = protocol::ErrorResponse {
|
||||
request_id: String::new(),
|
||||
code: "NotImplemented".to_string(),
|
||||
message: "This cluster operation is not yet implemented".to_string(),
|
||||
};
|
||||
let response = protocol::encode_response(&ClusterResponse::Error(err))?;
|
||||
send.write_all(&response).await?;
|
||||
send.finish()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Generate self-signed TLS certificates for cluster-internal communication.
|
||||
fn generate_tls_configs() -> Result<(QuinnServerConfig, ClientConfig)> {
|
||||
// Generate self-signed certificate
|
||||
let cert = rcgen::generate_simple_self_signed(vec!["smartstorage".to_string()])?;
|
||||
let cert_der = CertificateDer::from(cert.cert);
|
||||
let key_der = PrivateKeyDer::Pkcs8(PrivatePkcs8KeyDer::from(cert.key_pair.serialize_der()));
|
||||
|
||||
// Server config
|
||||
let mut server_crypto = rustls::ServerConfig::builder()
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(vec![cert_der.clone()], key_der.clone_key())?;
|
||||
server_crypto.alpn_protocols = vec![b"smartstorage".to_vec()];
|
||||
let server_config = QuinnServerConfig::with_crypto(Arc::new(
|
||||
quinn::crypto::rustls::QuicServerConfig::try_from(server_crypto)?,
|
||||
));
|
||||
|
||||
// Client config: skip server certificate verification (cluster-internal)
|
||||
let mut client_crypto = rustls::ClientConfig::builder()
|
||||
.dangerous()
|
||||
.with_custom_certificate_verifier(Arc::new(SkipServerVerification))
|
||||
.with_no_client_auth();
|
||||
client_crypto.alpn_protocols = vec![b"smartstorage".to_vec()];
|
||||
let client_config = ClientConfig::new(Arc::new(
|
||||
quinn::crypto::rustls::QuicClientConfig::try_from(client_crypto)?,
|
||||
));
|
||||
|
||||
Ok((server_config, client_config))
|
||||
}
|
||||
|
||||
/// Close the QUIC endpoint gracefully.
|
||||
pub fn close(&self) {
|
||||
self.endpoint
|
||||
.close(quinn::VarInt::from_u32(0), b"shutdown");
|
||||
}
|
||||
|
||||
/// Get the local node ID.
|
||||
pub fn local_node_id(&self) -> &str {
|
||||
&self.local_node_id
|
||||
}
|
||||
}
|
||||
|
||||
/// Certificate verifier that skips verification (for cluster-internal self-signed certs).
|
||||
#[derive(Debug)]
|
||||
struct SkipServerVerification;
|
||||
|
||||
impl rustls::client::danger::ServerCertVerifier for SkipServerVerification {
|
||||
fn verify_server_cert(
|
||||
&self,
|
||||
_end_entity: &CertificateDer<'_>,
|
||||
_intermediates: &[CertificateDer<'_>],
|
||||
_server_name: &rustls::pki_types::ServerName<'_>,
|
||||
_ocsp_response: &[u8],
|
||||
_now: rustls::pki_types::UnixTime,
|
||||
) -> Result<rustls::client::danger::ServerCertVerified, rustls::Error> {
|
||||
Ok(rustls::client::danger::ServerCertVerified::assertion())
|
||||
}
|
||||
|
||||
fn verify_tls12_signature(
|
||||
&self,
|
||||
_message: &[u8],
|
||||
_cert: &CertificateDer<'_>,
|
||||
_dss: &rustls::DigitallySignedStruct,
|
||||
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
|
||||
Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
|
||||
}
|
||||
|
||||
fn verify_tls13_signature(
|
||||
&self,
|
||||
_message: &[u8],
|
||||
_cert: &CertificateDer<'_>,
|
||||
_dss: &rustls::DigitallySignedStruct,
|
||||
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
|
||||
Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
|
||||
}
|
||||
|
||||
fn supported_verify_schemes(&self) -> Vec<rustls::SignatureScheme> {
|
||||
vec![
|
||||
rustls::SignatureScheme::RSA_PKCS1_SHA256,
|
||||
rustls::SignatureScheme::RSA_PKCS1_SHA384,
|
||||
rustls::SignatureScheme::RSA_PKCS1_SHA512,
|
||||
rustls::SignatureScheme::ECDSA_NISTP256_SHA256,
|
||||
rustls::SignatureScheme::ECDSA_NISTP384_SHA384,
|
||||
rustls::SignatureScheme::ED25519,
|
||||
rustls::SignatureScheme::RSA_PSS_SHA256,
|
||||
rustls::SignatureScheme::RSA_PSS_SHA384,
|
||||
rustls::SignatureScheme::RSA_PSS_SHA512,
|
||||
]
|
||||
}
|
||||
}
|
||||
226
rust/src/cluster/shard_store.rs
Normal file
226
rust/src/cluster/shard_store.rs
Normal file
@@ -0,0 +1,226 @@
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
use tokio::fs;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
/// Identifies a specific shard on disk.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub struct ShardId {
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
pub chunk_index: u32,
|
||||
pub shard_index: u32,
|
||||
}
|
||||
|
||||
/// Per-shard metadata stored alongside shard data.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ShardMeta {
|
||||
pub shard_index: u32,
|
||||
pub chunk_index: u32,
|
||||
pub data_size: u64,
|
||||
pub checksum: u32, // crc32c
|
||||
}
|
||||
|
||||
/// Manages shard storage on a single drive.
|
||||
///
|
||||
/// Layout on disk:
|
||||
/// ```text
|
||||
/// {base_path}/.smartstorage/data/{bucket}/{key_prefix}/{key}/
|
||||
/// chunk-{N}/shard-{M}.dat (shard data)
|
||||
/// chunk-{N}/shard-{M}.meta (shard metadata JSON)
|
||||
/// ```
|
||||
pub struct ShardStore {
|
||||
base_path: PathBuf,
|
||||
}
|
||||
|
||||
impl ShardStore {
|
||||
pub fn new(base_path: PathBuf) -> Self {
|
||||
Self { base_path }
|
||||
}
|
||||
|
||||
/// Write a shard to disk atomically (write to temp file, then rename).
|
||||
pub async fn write_shard(
|
||||
&self,
|
||||
shard_id: &ShardId,
|
||||
data: &[u8],
|
||||
checksum: u32,
|
||||
) -> Result<()> {
|
||||
let shard_path = self.shard_data_path(shard_id);
|
||||
let meta_path = self.shard_meta_path(shard_id);
|
||||
|
||||
// Ensure parent directory exists
|
||||
if let Some(parent) = shard_path.parent() {
|
||||
fs::create_dir_all(parent).await?;
|
||||
}
|
||||
|
||||
// Write data atomically via temp file + rename
|
||||
let temp_data_path = shard_path.with_extension("dat.tmp");
|
||||
{
|
||||
let mut file = fs::File::create(&temp_data_path).await?;
|
||||
file.write_all(data).await?;
|
||||
file.flush().await?;
|
||||
file.sync_all().await?;
|
||||
}
|
||||
fs::rename(&temp_data_path, &shard_path).await?;
|
||||
|
||||
// Write metadata
|
||||
let meta = ShardMeta {
|
||||
shard_index: shard_id.shard_index,
|
||||
chunk_index: shard_id.chunk_index,
|
||||
data_size: data.len() as u64,
|
||||
checksum,
|
||||
};
|
||||
let meta_json = serde_json::to_string(&meta)?;
|
||||
let temp_meta_path = meta_path.with_extension("meta.tmp");
|
||||
fs::write(&temp_meta_path, meta_json).await?;
|
||||
fs::rename(&temp_meta_path, &meta_path).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read a shard's data from disk.
|
||||
pub async fn read_shard(&self, shard_id: &ShardId) -> Result<(Vec<u8>, u32)> {
|
||||
let shard_path = self.shard_data_path(shard_id);
|
||||
let meta_path = self.shard_meta_path(shard_id);
|
||||
|
||||
let data = fs::read(&shard_path).await?;
|
||||
let meta_json = fs::read_to_string(&meta_path).await?;
|
||||
let meta: ShardMeta = serde_json::from_str(&meta_json)?;
|
||||
|
||||
Ok((data, meta.checksum))
|
||||
}
|
||||
|
||||
/// Check if a shard exists and return its metadata.
|
||||
pub async fn head_shard(&self, shard_id: &ShardId) -> Result<Option<ShardMeta>> {
|
||||
let meta_path = self.shard_meta_path(shard_id);
|
||||
if !meta_path.exists() {
|
||||
return Ok(None);
|
||||
}
|
||||
let meta_json = fs::read_to_string(&meta_path).await?;
|
||||
let meta: ShardMeta = serde_json::from_str(&meta_json)?;
|
||||
Ok(Some(meta))
|
||||
}
|
||||
|
||||
/// Delete a shard and its metadata.
|
||||
pub async fn delete_shard(&self, shard_id: &ShardId) -> Result<()> {
|
||||
let shard_path = self.shard_data_path(shard_id);
|
||||
let meta_path = self.shard_meta_path(shard_id);
|
||||
|
||||
let _ = fs::remove_file(&shard_path).await;
|
||||
let _ = fs::remove_file(&meta_path).await;
|
||||
|
||||
// Clean up empty parent directories
|
||||
self.cleanup_empty_dirs(shard_id).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List all shard IDs for a given bucket and key (across all chunks).
|
||||
pub async fn list_shards_for_object(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
) -> Result<Vec<ShardId>> {
|
||||
let key_dir = self.key_dir(bucket, key);
|
||||
if !key_dir.exists() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut result = Vec::new();
|
||||
let mut entries = fs::read_dir(&key_dir).await?;
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let name = entry.file_name().to_string_lossy().to_string();
|
||||
if !name.starts_with("chunk-") || !entry.metadata().await?.is_dir() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let chunk_index: u32 = match name.strip_prefix("chunk-").and_then(|s| s.parse().ok()) {
|
||||
Some(idx) => idx,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let mut chunk_entries = fs::read_dir(entry.path()).await?;
|
||||
while let Some(shard_entry) = chunk_entries.next_entry().await? {
|
||||
let shard_name = shard_entry.file_name().to_string_lossy().to_string();
|
||||
if shard_name.starts_with("shard-") && shard_name.ends_with(".dat") {
|
||||
let shard_index: u32 = match shard_name
|
||||
.strip_prefix("shard-")
|
||||
.and_then(|s| s.strip_suffix(".dat"))
|
||||
.and_then(|s| s.parse().ok())
|
||||
{
|
||||
Some(idx) => idx,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
result.push(ShardId {
|
||||
bucket: bucket.to_string(),
|
||||
key: key.to_string(),
|
||||
chunk_index,
|
||||
shard_index,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.sort_by(|a, b| {
|
||||
a.chunk_index
|
||||
.cmp(&b.chunk_index)
|
||||
.then(a.shard_index.cmp(&b.shard_index))
|
||||
});
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Path helpers
|
||||
// ============================
|
||||
|
||||
fn data_root(&self) -> PathBuf {
|
||||
self.base_path.join(".smartstorage").join("data")
|
||||
}
|
||||
|
||||
fn key_prefix(key: &str) -> String {
|
||||
// Use first 2 hex chars of a simple hash for directory fan-out
|
||||
let hash = xxhash_rust::xxh64::xxh64(key.as_bytes(), 0);
|
||||
format!("{:02x}", hash & 0xFF)
|
||||
}
|
||||
|
||||
fn key_dir(&self, bucket: &str, key: &str) -> PathBuf {
|
||||
self.data_root()
|
||||
.join(bucket)
|
||||
.join(Self::key_prefix(key))
|
||||
.join(key)
|
||||
}
|
||||
|
||||
fn chunk_dir(&self, shard_id: &ShardId) -> PathBuf {
|
||||
self.key_dir(&shard_id.bucket, &shard_id.key)
|
||||
.join(format!("chunk-{}", shard_id.chunk_index))
|
||||
}
|
||||
|
||||
fn shard_data_path(&self, shard_id: &ShardId) -> PathBuf {
|
||||
self.chunk_dir(shard_id)
|
||||
.join(format!("shard-{}.dat", shard_id.shard_index))
|
||||
}
|
||||
|
||||
fn shard_meta_path(&self, shard_id: &ShardId) -> PathBuf {
|
||||
self.chunk_dir(shard_id)
|
||||
.join(format!("shard-{}.meta", shard_id.shard_index))
|
||||
}
|
||||
|
||||
async fn cleanup_empty_dirs(&self, shard_id: &ShardId) {
|
||||
// Try to remove chunk dir if empty
|
||||
let chunk_dir = self.chunk_dir(shard_id);
|
||||
let _ = fs::remove_dir(&chunk_dir).await; // fails silently if not empty
|
||||
|
||||
// Try to remove key dir if empty
|
||||
let key_dir = self.key_dir(&shard_id.bucket, &shard_id.key);
|
||||
let _ = fs::remove_dir(&key_dir).await;
|
||||
|
||||
// Try to remove prefix dir if empty
|
||||
if let Some(prefix_dir) = key_dir.parent() {
|
||||
let _ = fs::remove_dir(prefix_dir).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
291
rust/src/cluster/state.rs
Normal file
291
rust/src/cluster/state.rs
Normal file
@@ -0,0 +1,291 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use super::placement::{DriveLocation, ErasureSet};
|
||||
use super::protocol::{ClusterTopology, ErasureSetInfo, DriveLocationInfo, NodeInfo};
|
||||
|
||||
/// Node status for tracking liveness.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum NodeStatus {
|
||||
Online,
|
||||
Suspect, // missed 2+ heartbeats
|
||||
Offline, // missed 5+ heartbeats
|
||||
}
|
||||
|
||||
/// Tracked state for a peer node.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NodeState {
|
||||
pub info: NodeInfo,
|
||||
pub status: NodeStatus,
|
||||
pub missed_heartbeats: u32,
|
||||
pub last_heartbeat: chrono::DateTime<chrono::Utc>,
|
||||
}
|
||||
|
||||
/// Shared cluster state, protected by RwLock for concurrent access.
|
||||
pub struct ClusterState {
|
||||
inner: Arc<RwLock<ClusterStateInner>>,
|
||||
local_node_id: String,
|
||||
}
|
||||
|
||||
struct ClusterStateInner {
|
||||
cluster_id: String,
|
||||
version: u64,
|
||||
nodes: HashMap<String, NodeState>,
|
||||
erasure_sets: Vec<ErasureSet>,
|
||||
data_shards: usize,
|
||||
parity_shards: usize,
|
||||
}
|
||||
|
||||
impl ClusterState {
|
||||
pub fn new(
|
||||
local_node_id: String,
|
||||
cluster_id: String,
|
||||
data_shards: usize,
|
||||
parity_shards: usize,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(RwLock::new(ClusterStateInner {
|
||||
cluster_id,
|
||||
version: 0,
|
||||
nodes: HashMap::new(),
|
||||
erasure_sets: Vec::new(),
|
||||
data_shards,
|
||||
parity_shards,
|
||||
})),
|
||||
local_node_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn local_node_id(&self) -> &str {
|
||||
&self.local_node_id
|
||||
}
|
||||
|
||||
/// Register a node in the cluster.
|
||||
pub async fn add_node(&self, info: NodeInfo) {
|
||||
let mut inner = self.inner.write().await;
|
||||
let node_id = info.node_id.clone();
|
||||
inner.nodes.insert(
|
||||
node_id,
|
||||
NodeState {
|
||||
info,
|
||||
status: NodeStatus::Online,
|
||||
missed_heartbeats: 0,
|
||||
last_heartbeat: chrono::Utc::now(),
|
||||
},
|
||||
);
|
||||
inner.version += 1;
|
||||
}
|
||||
|
||||
/// Remove a node from the cluster.
|
||||
pub async fn remove_node(&self, node_id: &str) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.nodes.remove(node_id);
|
||||
inner.version += 1;
|
||||
}
|
||||
|
||||
/// Update heartbeat for a node (reset missed count).
|
||||
pub async fn record_heartbeat(&self, node_id: &str) {
|
||||
let mut inner = self.inner.write().await;
|
||||
if let Some(node) = inner.nodes.get_mut(node_id) {
|
||||
node.missed_heartbeats = 0;
|
||||
node.status = NodeStatus::Online;
|
||||
node.last_heartbeat = chrono::Utc::now();
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment missed heartbeat count for all nodes, updating status.
|
||||
/// Called by the heartbeat checker when a round completes.
|
||||
pub async fn tick_heartbeats(&self, responded_nodes: &[String]) -> Vec<(String, NodeStatus)> {
|
||||
let mut inner = self.inner.write().await;
|
||||
let mut status_changes = Vec::new();
|
||||
|
||||
for (node_id, node) in inner.nodes.iter_mut() {
|
||||
if *node_id == self.local_node_id {
|
||||
continue; // Don't track self
|
||||
}
|
||||
|
||||
if responded_nodes.contains(node_id) {
|
||||
node.missed_heartbeats = 0;
|
||||
if node.status != NodeStatus::Online {
|
||||
node.status = NodeStatus::Online;
|
||||
status_changes.push((node_id.clone(), NodeStatus::Online));
|
||||
}
|
||||
} else {
|
||||
node.missed_heartbeats += 1;
|
||||
let new_status = if node.missed_heartbeats >= 5 {
|
||||
NodeStatus::Offline
|
||||
} else if node.missed_heartbeats >= 2 {
|
||||
NodeStatus::Suspect
|
||||
} else {
|
||||
NodeStatus::Online
|
||||
};
|
||||
|
||||
if new_status != node.status {
|
||||
node.status = new_status.clone();
|
||||
status_changes.push((node_id.clone(), new_status));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
status_changes
|
||||
}
|
||||
|
||||
/// Set erasure sets (typically done once during cluster formation).
|
||||
pub async fn set_erasure_sets(&self, sets: Vec<ErasureSet>) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.erasure_sets = sets;
|
||||
inner.version += 1;
|
||||
}
|
||||
|
||||
/// Get the erasure set for a given object based on consistent hashing.
|
||||
pub async fn get_erasure_set_for_object(&self, bucket: &str, key: &str) -> Option<ErasureSet> {
|
||||
let inner = self.inner.read().await;
|
||||
if inner.erasure_sets.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let set_idx = super::placement::erasure_set_for_object(
|
||||
bucket,
|
||||
key,
|
||||
inner.erasure_sets.len() as u32,
|
||||
);
|
||||
inner.erasure_sets.get(set_idx as usize).cloned()
|
||||
}
|
||||
|
||||
/// Get all erasure sets.
|
||||
pub async fn erasure_sets(&self) -> Vec<ErasureSet> {
|
||||
self.inner.read().await.erasure_sets.clone()
|
||||
}
|
||||
|
||||
/// Get current topology version.
|
||||
pub async fn version(&self) -> u64 {
|
||||
self.inner.read().await.version
|
||||
}
|
||||
|
||||
/// Get all online node IDs (excluding self).
|
||||
pub async fn online_peers(&self) -> Vec<NodeInfo> {
|
||||
let inner = self.inner.read().await;
|
||||
inner
|
||||
.nodes
|
||||
.values()
|
||||
.filter(|n| n.status == NodeStatus::Online && n.info.node_id != self.local_node_id)
|
||||
.map(|n| n.info.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get all nodes.
|
||||
pub async fn all_nodes(&self) -> Vec<NodeState> {
|
||||
self.inner.read().await.nodes.values().cloned().collect()
|
||||
}
|
||||
|
||||
/// Get node info by ID.
|
||||
pub async fn get_node(&self, node_id: &str) -> Option<NodeInfo> {
|
||||
self.inner
|
||||
.read()
|
||||
.await
|
||||
.nodes
|
||||
.get(node_id)
|
||||
.map(|n| n.info.clone())
|
||||
}
|
||||
|
||||
/// Get offline node IDs.
|
||||
pub async fn offline_nodes(&self) -> Vec<String> {
|
||||
self.inner
|
||||
.read()
|
||||
.await
|
||||
.nodes
|
||||
.values()
|
||||
.filter(|n| n.status == NodeStatus::Offline)
|
||||
.map(|n| n.info.node_id.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Check if a majority of nodes are reachable (for split-brain prevention).
|
||||
pub async fn has_majority(&self) -> bool {
|
||||
let inner = self.inner.read().await;
|
||||
let total = inner.nodes.len();
|
||||
if total == 0 {
|
||||
return true;
|
||||
}
|
||||
let online = inner
|
||||
.nodes
|
||||
.values()
|
||||
.filter(|n| n.status == NodeStatus::Online)
|
||||
.count();
|
||||
online > total / 2
|
||||
}
|
||||
|
||||
/// Export the current topology as a protocol message.
|
||||
pub async fn to_topology(&self) -> ClusterTopology {
|
||||
let inner = self.inner.read().await;
|
||||
ClusterTopology {
|
||||
version: inner.version,
|
||||
cluster_id: inner.cluster_id.clone(),
|
||||
nodes: inner.nodes.values().map(|n| n.info.clone()).collect(),
|
||||
erasure_sets: inner
|
||||
.erasure_sets
|
||||
.iter()
|
||||
.map(|set| ErasureSetInfo {
|
||||
set_id: set.set_id,
|
||||
drives: set
|
||||
.drives
|
||||
.iter()
|
||||
.map(|d| DriveLocationInfo {
|
||||
node_id: d.node_id.clone(),
|
||||
drive_index: d.drive_index,
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
.collect(),
|
||||
data_shards: inner.data_shards,
|
||||
parity_shards: inner.parity_shards,
|
||||
}
|
||||
}
|
||||
|
||||
/// Import topology from a protocol message (e.g., received from a peer during join).
|
||||
pub async fn apply_topology(&self, topology: &ClusterTopology) {
|
||||
let mut inner = self.inner.write().await;
|
||||
|
||||
// Only apply if newer
|
||||
if topology.version <= inner.version {
|
||||
return;
|
||||
}
|
||||
|
||||
inner.cluster_id = topology.cluster_id.clone();
|
||||
inner.version = topology.version;
|
||||
inner.data_shards = topology.data_shards;
|
||||
inner.parity_shards = topology.parity_shards;
|
||||
|
||||
// Update nodes
|
||||
for node_info in &topology.nodes {
|
||||
if !inner.nodes.contains_key(&node_info.node_id) {
|
||||
inner.nodes.insert(
|
||||
node_info.node_id.clone(),
|
||||
NodeState {
|
||||
info: node_info.clone(),
|
||||
status: NodeStatus::Online,
|
||||
missed_heartbeats: 0,
|
||||
last_heartbeat: chrono::Utc::now(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Update erasure sets
|
||||
inner.erasure_sets = topology
|
||||
.erasure_sets
|
||||
.iter()
|
||||
.map(|set| ErasureSet {
|
||||
set_id: set.set_id,
|
||||
drives: set
|
||||
.drives
|
||||
.iter()
|
||||
.map(|d| DriveLocation {
|
||||
node_id: d.node_id.clone(),
|
||||
drive_index: d.drive_index,
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
.collect();
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,10 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::cluster::config::ClusterConfig;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct S3Config {
|
||||
pub struct SmartStorageConfig {
|
||||
pub server: ServerConfig,
|
||||
pub storage: StorageConfig,
|
||||
pub auth: AuthConfig,
|
||||
@@ -10,6 +12,8 @@ pub struct S3Config {
|
||||
pub logging: LoggingConfig,
|
||||
pub limits: LimitsConfig,
|
||||
pub multipart: MultipartConfig,
|
||||
#[serde(default)]
|
||||
pub cluster: Option<ClusterConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
||||
@@ -1,16 +1,14 @@
|
||||
use hyper::{Response, StatusCode};
|
||||
use http_body_util::Full;
|
||||
use bytes::Bytes;
|
||||
use hyper::StatusCode;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("S3Error({code}): {message}")]
|
||||
pub struct S3Error {
|
||||
#[error("StorageError({code}): {message}")]
|
||||
pub struct StorageError {
|
||||
pub code: String,
|
||||
pub message: String,
|
||||
pub status: StatusCode,
|
||||
}
|
||||
|
||||
impl S3Error {
|
||||
impl StorageError {
|
||||
pub fn new(code: &str, message: &str, status: StatusCode) -> Self {
|
||||
Self {
|
||||
code: code.to_string(),
|
||||
@@ -105,14 +103,4 @@ impl S3Error {
|
||||
self.code, self.message
|
||||
)
|
||||
}
|
||||
|
||||
pub fn to_response(&self, request_id: &str) -> Response<Full<Bytes>> {
|
||||
let xml = self.to_xml();
|
||||
Response::builder()
|
||||
.status(self.status)
|
||||
.header("content-type", "application/xml")
|
||||
.header("x-amz-request-id", request_id)
|
||||
.body(Full::new(Bytes::from(xml)))
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,10 @@
|
||||
mod action;
|
||||
mod auth;
|
||||
mod cluster;
|
||||
mod config;
|
||||
mod management;
|
||||
mod policy;
|
||||
mod s3_error;
|
||||
mod error;
|
||||
mod server;
|
||||
mod storage;
|
||||
mod xml_response;
|
||||
@@ -11,7 +12,7 @@ mod xml_response;
|
||||
use clap::Parser;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "rusts3", about = "High-performance S3-compatible server")]
|
||||
#[command(name = "ruststorage", about = "High-performance S3-compatible storage server")]
|
||||
struct Cli {
|
||||
/// Run in management mode (IPC via stdin/stdout)
|
||||
#[arg(long)]
|
||||
@@ -38,7 +39,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
management::management_loop().await?;
|
||||
} else {
|
||||
eprintln!("rusts3: use --management flag for IPC mode");
|
||||
eprintln!("ruststorage: use --management flag for IPC mode");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ use serde_json::Value;
|
||||
use std::io::Write;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
|
||||
use crate::config::S3Config;
|
||||
use crate::server::S3Server;
|
||||
use crate::config::SmartStorageConfig;
|
||||
use crate::server::StorageServer;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct IpcRequest {
|
||||
@@ -62,7 +62,7 @@ pub async fn management_loop() -> Result<()> {
|
||||
data: serde_json::json!({}),
|
||||
});
|
||||
|
||||
let mut server: Option<S3Server> = None;
|
||||
let mut server: Option<StorageServer> = None;
|
||||
let stdin = BufReader::new(tokio::io::stdin());
|
||||
let mut lines = stdin.lines();
|
||||
|
||||
@@ -87,11 +87,11 @@ pub async fn management_loop() -> Result<()> {
|
||||
"start" => {
|
||||
#[derive(Deserialize)]
|
||||
struct StartParams {
|
||||
config: S3Config,
|
||||
config: SmartStorageConfig,
|
||||
}
|
||||
match serde_json::from_value::<StartParams>(req.params) {
|
||||
Ok(params) => {
|
||||
match S3Server::start(params.config).await {
|
||||
match StorageServer::start(params.config).await {
|
||||
Ok(s) => {
|
||||
server = Some(s);
|
||||
send_response(id, serde_json::json!({}));
|
||||
@@ -140,6 +140,15 @@ pub async fn management_loop() -> Result<()> {
|
||||
}
|
||||
}
|
||||
}
|
||||
"clusterStatus" => {
|
||||
send_response(
|
||||
id,
|
||||
serde_json::json!({
|
||||
"status": "ok",
|
||||
"message": "Cluster status endpoint ready"
|
||||
}),
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
send_error(id, format!("Unknown method: {}", method));
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ use tokio::sync::RwLock;
|
||||
|
||||
use crate::action::RequestContext;
|
||||
use crate::auth::AuthenticatedIdentity;
|
||||
use crate::s3_error::S3Error;
|
||||
use crate::error::StorageError;
|
||||
|
||||
// ============================
|
||||
// Policy data model
|
||||
@@ -284,50 +284,50 @@ fn simple_wildcard_match(pattern: &str, value: &str) -> bool {
|
||||
|
||||
const MAX_POLICY_SIZE: usize = 20 * 1024; // 20 KB
|
||||
|
||||
pub fn validate_policy(json: &str) -> Result<BucketPolicy, S3Error> {
|
||||
pub fn validate_policy(json: &str) -> Result<BucketPolicy, StorageError> {
|
||||
if json.len() > MAX_POLICY_SIZE {
|
||||
return Err(S3Error::malformed_policy("Policy exceeds maximum size of 20KB"));
|
||||
return Err(StorageError::malformed_policy("Policy exceeds maximum size of 20KB"));
|
||||
}
|
||||
|
||||
let policy: BucketPolicy =
|
||||
serde_json::from_str(json).map_err(|e| S3Error::malformed_policy(&e.to_string()))?;
|
||||
serde_json::from_str(json).map_err(|e| StorageError::malformed_policy(&e.to_string()))?;
|
||||
|
||||
if policy.version != "2012-10-17" {
|
||||
return Err(S3Error::malformed_policy(
|
||||
return Err(StorageError::malformed_policy(
|
||||
"Policy version must be \"2012-10-17\"",
|
||||
));
|
||||
}
|
||||
|
||||
if policy.statements.is_empty() {
|
||||
return Err(S3Error::malformed_policy(
|
||||
return Err(StorageError::malformed_policy(
|
||||
"Policy must contain at least one statement",
|
||||
));
|
||||
}
|
||||
|
||||
for (i, stmt) in policy.statements.iter().enumerate() {
|
||||
if stmt.action.is_empty() {
|
||||
return Err(S3Error::malformed_policy(&format!(
|
||||
return Err(StorageError::malformed_policy(&format!(
|
||||
"Statement {} has no actions",
|
||||
i
|
||||
)));
|
||||
}
|
||||
for action in &stmt.action {
|
||||
if action != "*" && !action.starts_with("s3:") {
|
||||
return Err(S3Error::malformed_policy(&format!(
|
||||
return Err(StorageError::malformed_policy(&format!(
|
||||
"Action \"{}\" must start with \"s3:\"",
|
||||
action
|
||||
)));
|
||||
}
|
||||
}
|
||||
if stmt.resource.is_empty() {
|
||||
return Err(S3Error::malformed_policy(&format!(
|
||||
return Err(StorageError::malformed_policy(&format!(
|
||||
"Statement {} has no resources",
|
||||
i
|
||||
)));
|
||||
}
|
||||
for resource in &stmt.resource {
|
||||
if resource != "*" && !resource.starts_with("arn:aws:s3:::") {
|
||||
return Err(S3Error::malformed_policy(&format!(
|
||||
return Err(StorageError::malformed_policy(&format!(
|
||||
"Resource \"{}\" must start with \"arn:aws:s3:::\"",
|
||||
resource
|
||||
)));
|
||||
|
||||
@@ -18,31 +18,40 @@ use tokio::sync::watch;
|
||||
use tokio_util::io::ReaderStream;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::action::{self, RequestContext, S3Action};
|
||||
use crate::action::{self, RequestContext, StorageAction};
|
||||
use crate::auth::{self, AuthenticatedIdentity};
|
||||
use crate::config::S3Config;
|
||||
use crate::config::SmartStorageConfig;
|
||||
use crate::policy::{self, PolicyDecision, PolicyStore};
|
||||
use crate::s3_error::S3Error;
|
||||
use crate::storage::FileStore;
|
||||
use crate::error::StorageError;
|
||||
use crate::cluster::coordinator::DistributedStore;
|
||||
use crate::cluster::drive_manager::DriveManager;
|
||||
use crate::cluster::healing::HealingService;
|
||||
use crate::cluster::membership::MembershipManager;
|
||||
use crate::cluster::placement;
|
||||
use crate::cluster::protocol::NodeInfo;
|
||||
use crate::cluster::quic_transport::QuicTransport;
|
||||
use crate::cluster::shard_store::ShardStore;
|
||||
use crate::cluster::state::ClusterState;
|
||||
use crate::storage::{FileStore, StorageBackend};
|
||||
use crate::xml_response;
|
||||
|
||||
pub struct S3Server {
|
||||
store: Arc<FileStore>,
|
||||
config: S3Config,
|
||||
pub struct StorageServer {
|
||||
store: Arc<StorageBackend>,
|
||||
shutdown_tx: watch::Sender<bool>,
|
||||
server_handle: tokio::task::JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl S3Server {
|
||||
pub async fn start(config: S3Config) -> Result<Self> {
|
||||
let store = Arc::new(FileStore::new(config.storage.directory.clone().into()));
|
||||
|
||||
// Initialize or reset storage
|
||||
if config.storage.clean_slate {
|
||||
store.reset().await?;
|
||||
impl StorageServer {
|
||||
pub async fn start(config: SmartStorageConfig) -> Result<Self> {
|
||||
let store: Arc<StorageBackend> = if let Some(ref cluster_config) = config.cluster {
|
||||
if cluster_config.enabled {
|
||||
Self::start_clustered(&config, cluster_config).await?
|
||||
} else {
|
||||
Self::start_standalone(&config).await?
|
||||
}
|
||||
} else {
|
||||
store.initialize().await?;
|
||||
}
|
||||
Self::start_standalone(&config).await?
|
||||
};
|
||||
|
||||
// Initialize policy store
|
||||
let policy_store = Arc::new(PolicyStore::new(store.policies_dir()));
|
||||
@@ -105,12 +114,11 @@ impl S3Server {
|
||||
});
|
||||
|
||||
if !config.server.silent {
|
||||
tracing::info!("S3 server listening on {}", addr);
|
||||
tracing::info!("Storage server listening on {}", addr);
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
config,
|
||||
shutdown_tx,
|
||||
server_handle,
|
||||
})
|
||||
@@ -121,12 +129,175 @@ impl S3Server {
|
||||
let _ = self.server_handle.await;
|
||||
}
|
||||
|
||||
pub fn store(&self) -> &FileStore {
|
||||
pub fn store(&self) -> &StorageBackend {
|
||||
&self.store
|
||||
}
|
||||
|
||||
async fn start_standalone(config: &SmartStorageConfig) -> Result<Arc<StorageBackend>> {
|
||||
let store = Arc::new(StorageBackend::Standalone(
|
||||
FileStore::new(config.storage.directory.clone().into()),
|
||||
));
|
||||
if config.storage.clean_slate {
|
||||
store.reset().await?;
|
||||
} else {
|
||||
store.initialize().await?;
|
||||
}
|
||||
Ok(store)
|
||||
}
|
||||
|
||||
async fn start_clustered(
|
||||
config: &SmartStorageConfig,
|
||||
cluster_config: &crate::cluster::config::ClusterConfig,
|
||||
) -> Result<Arc<StorageBackend>> {
|
||||
let erasure_config = cluster_config.erasure.clone();
|
||||
let node_id = cluster_config
|
||||
.node_id
|
||||
.clone()
|
||||
.unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
|
||||
|
||||
// Determine drive paths
|
||||
let drive_paths: Vec<std::path::PathBuf> = if cluster_config.drives.paths.is_empty() {
|
||||
// Default: use storage directory as a single drive
|
||||
vec![std::path::PathBuf::from(&config.storage.directory)]
|
||||
} else {
|
||||
cluster_config
|
||||
.drives
|
||||
.paths
|
||||
.iter()
|
||||
.map(std::path::PathBuf::from)
|
||||
.collect()
|
||||
};
|
||||
|
||||
// Ensure directories exist
|
||||
let manifest_dir = std::path::PathBuf::from(&config.storage.directory).join(".manifests");
|
||||
let buckets_dir = std::path::PathBuf::from(&config.storage.directory).join(".buckets");
|
||||
tokio::fs::create_dir_all(&manifest_dir).await?;
|
||||
tokio::fs::create_dir_all(&buckets_dir).await?;
|
||||
for path in &drive_paths {
|
||||
tokio::fs::create_dir_all(path.join(".smartstorage")).await?;
|
||||
}
|
||||
|
||||
// Initialize QUIC transport
|
||||
let quic_addr: SocketAddr =
|
||||
format!("{}:{}", config.server.address, cluster_config.quic_port).parse()?;
|
||||
let transport = Arc::new(QuicTransport::new(quic_addr, node_id.clone()).await?);
|
||||
|
||||
// Initialize cluster state
|
||||
let cluster_state = Arc::new(ClusterState::new(
|
||||
node_id.clone(),
|
||||
uuid::Uuid::new_v4().to_string(),
|
||||
erasure_config.data_shards,
|
||||
erasure_config.parity_shards,
|
||||
));
|
||||
|
||||
// Form erasure sets from local drives (single-node for now)
|
||||
let nodes = vec![(node_id.clone(), drive_paths.len() as u32)];
|
||||
let erasure_sets =
|
||||
placement::form_erasure_sets(&nodes, erasure_config.total_shards());
|
||||
|
||||
if erasure_sets.is_empty() {
|
||||
tracing::warn!(
|
||||
"Not enough drives ({}) for erasure set size ({}). \
|
||||
Need at least {} drives.",
|
||||
drive_paths.len(),
|
||||
erasure_config.total_shards(),
|
||||
erasure_config.total_shards(),
|
||||
);
|
||||
}
|
||||
|
||||
cluster_state.set_erasure_sets(erasure_sets).await;
|
||||
|
||||
// Register self as a node
|
||||
let local_node_info = NodeInfo {
|
||||
node_id: node_id.clone(),
|
||||
quic_addr: quic_addr.to_string(),
|
||||
s3_addr: format!("{}:{}", config.server.address, config.server.port),
|
||||
drive_count: drive_paths.len() as u32,
|
||||
status: "online".to_string(),
|
||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
};
|
||||
cluster_state.add_node(local_node_info.clone()).await;
|
||||
|
||||
// Initialize drive manager for health monitoring
|
||||
let drive_manager = Arc::new(tokio::sync::Mutex::new(
|
||||
DriveManager::new(&cluster_config.drives).await?,
|
||||
));
|
||||
|
||||
// Join cluster if seed nodes are configured
|
||||
let membership = Arc::new(
|
||||
MembershipManager::new(
|
||||
cluster_state.clone(),
|
||||
transport.clone(),
|
||||
cluster_config.heartbeat_interval_ms,
|
||||
local_node_info,
|
||||
)
|
||||
.with_drive_manager(drive_manager),
|
||||
);
|
||||
membership
|
||||
.join_cluster(&cluster_config.seed_nodes)
|
||||
.await?;
|
||||
|
||||
// Build local shard stores (one per drive) for shared use
|
||||
let local_shard_stores: Vec<Arc<ShardStore>> = drive_paths
|
||||
.iter()
|
||||
.map(|p| Arc::new(ShardStore::new(p.clone())))
|
||||
.collect();
|
||||
|
||||
// Start QUIC accept loop for incoming connections
|
||||
let shard_store_for_accept = local_shard_stores[0].clone();
|
||||
let (_quic_shutdown_tx, quic_shutdown_rx) = watch::channel(false);
|
||||
let transport_clone = transport.clone();
|
||||
tokio::spawn(async move {
|
||||
transport_clone
|
||||
.accept_loop(shard_store_for_accept, quic_shutdown_rx)
|
||||
.await;
|
||||
});
|
||||
|
||||
// Start heartbeat loop
|
||||
let membership_clone = membership.clone();
|
||||
let (_hb_shutdown_tx, hb_shutdown_rx) = watch::channel(false);
|
||||
tokio::spawn(async move {
|
||||
membership_clone.heartbeat_loop(hb_shutdown_rx).await;
|
||||
});
|
||||
|
||||
// Start healing service
|
||||
let healing_service = HealingService::new(
|
||||
cluster_state.clone(),
|
||||
&erasure_config,
|
||||
local_shard_stores.clone(),
|
||||
manifest_dir.clone(),
|
||||
24, // scan every 24 hours
|
||||
)?;
|
||||
let (_heal_shutdown_tx, heal_shutdown_rx) = watch::channel(false);
|
||||
tokio::spawn(async move {
|
||||
healing_service.run(heal_shutdown_rx).await;
|
||||
});
|
||||
|
||||
// Create distributed store
|
||||
let distributed_store = DistributedStore::new(
|
||||
cluster_state,
|
||||
transport,
|
||||
erasure_config,
|
||||
drive_paths,
|
||||
manifest_dir,
|
||||
buckets_dir,
|
||||
)?;
|
||||
|
||||
let store = Arc::new(StorageBackend::Clustered(distributed_store));
|
||||
|
||||
if !config.server.silent {
|
||||
tracing::info!(
|
||||
"Cluster mode enabled (node_id={}, quic_port={})",
|
||||
node_id,
|
||||
cluster_config.quic_port
|
||||
);
|
||||
}
|
||||
|
||||
Ok(store)
|
||||
}
|
||||
}
|
||||
|
||||
impl S3Config {
|
||||
impl SmartStorageConfig {
|
||||
fn address(&self) -> &str {
|
||||
&self.server.address
|
||||
}
|
||||
@@ -194,7 +365,7 @@ fn empty_response(status: StatusCode, request_id: &str) -> Response<BoxBody> {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn s3_error_response(err: &S3Error, request_id: &str) -> Response<BoxBody> {
|
||||
fn storage_error_response(err: &StorageError, request_id: &str) -> Response<BoxBody> {
|
||||
let xml = err.to_xml();
|
||||
Response::builder()
|
||||
.status(err.status)
|
||||
@@ -206,8 +377,8 @@ fn s3_error_response(err: &S3Error, request_id: &str) -> Response<BoxBody> {
|
||||
|
||||
async fn handle_request(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
config: S3Config,
|
||||
store: Arc<StorageBackend>,
|
||||
config: SmartStorageConfig,
|
||||
policy_store: Arc<PolicyStore>,
|
||||
) -> Result<Response<BoxBody>, std::convert::Infallible> {
|
||||
let request_id = Uuid::new_v4().to_string();
|
||||
@@ -221,7 +392,7 @@ async fn handle_request(
|
||||
return Ok(resp);
|
||||
}
|
||||
|
||||
// Step 1: Resolve S3 action from request
|
||||
// Step 1: Resolve storage action from request
|
||||
let request_ctx = action::resolve_action(&req);
|
||||
|
||||
// Step 2: Auth + policy pipeline
|
||||
@@ -240,7 +411,7 @@ async fn handle_request(
|
||||
Ok(id) => Some(id),
|
||||
Err(e) => {
|
||||
tracing::warn!("Auth failed: {}", e.message);
|
||||
return Ok(s3_error_response(&e, &request_id));
|
||||
return Ok(storage_error_response(&e, &request_id));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -250,7 +421,7 @@ async fn handle_request(
|
||||
|
||||
// Step 3: Authorization (policy evaluation)
|
||||
if let Err(e) = authorize_request(&request_ctx, identity.as_ref(), &policy_store).await {
|
||||
return Ok(s3_error_response(&e, &request_id));
|
||||
return Ok(storage_error_response(&e, &request_id));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -258,12 +429,12 @@ async fn handle_request(
|
||||
let mut response = match route_request(req, store, &config, &request_id, &policy_store).await {
|
||||
Ok(resp) => resp,
|
||||
Err(err) => {
|
||||
if let Some(s3err) = err.downcast_ref::<S3Error>() {
|
||||
s3_error_response(s3err, &request_id)
|
||||
if let Some(s3err) = err.downcast_ref::<StorageError>() {
|
||||
storage_error_response(s3err, &request_id)
|
||||
} else {
|
||||
tracing::error!("Internal error: {}", err);
|
||||
let s3err = S3Error::internal_error(&err.to_string());
|
||||
s3_error_response(&s3err, &request_id)
|
||||
let s3err = StorageError::internal_error(&err.to_string());
|
||||
storage_error_response(&s3err, &request_id)
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -290,11 +461,11 @@ async fn authorize_request(
|
||||
ctx: &RequestContext,
|
||||
identity: Option<&AuthenticatedIdentity>,
|
||||
policy_store: &PolicyStore,
|
||||
) -> Result<(), S3Error> {
|
||||
) -> Result<(), StorageError> {
|
||||
// ListAllMyBuckets requires authentication (no bucket to apply policy to)
|
||||
if ctx.action == S3Action::ListAllMyBuckets {
|
||||
if ctx.action == StorageAction::ListAllMyBuckets {
|
||||
if identity.is_none() {
|
||||
return Err(S3Error::access_denied());
|
||||
return Err(StorageError::access_denied());
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
@@ -304,7 +475,7 @@ async fn authorize_request(
|
||||
if let Some(bucket_policy) = policy_store.get_policy(bucket).await {
|
||||
let decision = policy::evaluate_policy(&bucket_policy, ctx, identity);
|
||||
match decision {
|
||||
PolicyDecision::Deny => return Err(S3Error::access_denied()),
|
||||
PolicyDecision::Deny => return Err(StorageError::access_denied()),
|
||||
PolicyDecision::Allow => return Ok(()),
|
||||
PolicyDecision::NoOpinion => {
|
||||
// Fall through to default behavior
|
||||
@@ -315,7 +486,7 @@ async fn authorize_request(
|
||||
|
||||
// Default: authenticated users get full access, anonymous denied
|
||||
if identity.is_none() {
|
||||
return Err(S3Error::access_denied());
|
||||
return Err(StorageError::access_denied());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -327,8 +498,8 @@ async fn authorize_request(
|
||||
|
||||
async fn route_request(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
_config: &S3Config,
|
||||
store: Arc<StorageBackend>,
|
||||
_config: &SmartStorageConfig,
|
||||
request_id: &str,
|
||||
policy_store: &Arc<PolicyStore>,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
@@ -416,8 +587,8 @@ async fn route_request(
|
||||
let upload_id = query.get("uploadId").unwrap().clone();
|
||||
handle_complete_multipart(req, store, &bucket, &key, &upload_id, request_id).await
|
||||
} else {
|
||||
let err = S3Error::invalid_request("Invalid POST request");
|
||||
Ok(s3_error_response(&err, request_id))
|
||||
let err = StorageError::invalid_request("Invalid POST request");
|
||||
Ok(storage_error_response(&err, request_id))
|
||||
}
|
||||
}
|
||||
_ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)),
|
||||
@@ -432,7 +603,7 @@ async fn route_request(
|
||||
// ============================
|
||||
|
||||
async fn handle_list_buckets(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
let buckets = store.list_buckets().await?;
|
||||
@@ -441,7 +612,7 @@ async fn handle_list_buckets(
|
||||
}
|
||||
|
||||
async fn handle_create_bucket(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
@@ -450,7 +621,7 @@ async fn handle_create_bucket(
|
||||
}
|
||||
|
||||
async fn handle_delete_bucket(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
policy_store: &Arc<PolicyStore>,
|
||||
@@ -462,19 +633,19 @@ async fn handle_delete_bucket(
|
||||
}
|
||||
|
||||
async fn handle_head_bucket(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
if store.bucket_exists(bucket).await {
|
||||
Ok(empty_response(StatusCode::OK, request_id))
|
||||
} else {
|
||||
Err(S3Error::no_such_bucket().into())
|
||||
Err(StorageError::no_such_bucket().into())
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_list_objects(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
query: &HashMap<String, String>,
|
||||
request_id: &str,
|
||||
@@ -503,7 +674,7 @@ async fn handle_list_objects(
|
||||
|
||||
async fn handle_put_object(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -525,7 +696,7 @@ async fn handle_put_object(
|
||||
|
||||
async fn handle_get_object(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -578,7 +749,7 @@ async fn handle_get_object(
|
||||
}
|
||||
|
||||
async fn handle_head_object(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -610,7 +781,7 @@ async fn handle_head_object(
|
||||
}
|
||||
|
||||
async fn handle_delete_object(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -621,7 +792,7 @@ async fn handle_delete_object(
|
||||
|
||||
async fn handle_copy_object(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
dest_bucket: &str,
|
||||
dest_key: &str,
|
||||
request_id: &str,
|
||||
@@ -684,20 +855,20 @@ async fn handle_get_bucket_policy(
|
||||
.unwrap();
|
||||
Ok(resp)
|
||||
}
|
||||
None => Err(S3Error::no_such_bucket_policy().into()),
|
||||
None => Err(StorageError::no_such_bucket_policy().into()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_put_bucket_policy(
|
||||
req: Request<Incoming>,
|
||||
store: &Arc<FileStore>,
|
||||
store: &Arc<StorageBackend>,
|
||||
policy_store: &Arc<PolicyStore>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
// Verify bucket exists
|
||||
if !store.bucket_exists(bucket).await {
|
||||
return Err(S3Error::no_such_bucket().into());
|
||||
return Err(StorageError::no_such_bucket().into());
|
||||
}
|
||||
|
||||
// Read body
|
||||
@@ -711,7 +882,7 @@ async fn handle_put_bucket_policy(
|
||||
policy_store
|
||||
.put_policy(bucket, validated_policy)
|
||||
.await
|
||||
.map_err(|e| S3Error::internal_error(&e.to_string()))?;
|
||||
.map_err(|e| StorageError::internal_error(&e.to_string()))?;
|
||||
|
||||
Ok(empty_response(StatusCode::NO_CONTENT, request_id))
|
||||
}
|
||||
@@ -724,7 +895,7 @@ async fn handle_delete_bucket_policy(
|
||||
policy_store
|
||||
.delete_policy(bucket)
|
||||
.await
|
||||
.map_err(|e| S3Error::internal_error(&e.to_string()))?;
|
||||
.map_err(|e| StorageError::internal_error(&e.to_string()))?;
|
||||
Ok(empty_response(StatusCode::NO_CONTENT, request_id))
|
||||
}
|
||||
|
||||
@@ -734,7 +905,7 @@ async fn handle_delete_bucket_policy(
|
||||
|
||||
async fn handle_initiate_multipart(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
request_id: &str,
|
||||
@@ -747,7 +918,7 @@ async fn handle_initiate_multipart(
|
||||
|
||||
async fn handle_upload_part(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
query: &HashMap<String, String>,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
@@ -758,7 +929,7 @@ async fn handle_upload_part(
|
||||
.unwrap_or(0);
|
||||
|
||||
if part_number < 1 || part_number > 10000 {
|
||||
return Err(S3Error::invalid_part_number().into());
|
||||
return Err(StorageError::invalid_part_number().into());
|
||||
}
|
||||
|
||||
let body = req.into_body();
|
||||
@@ -776,7 +947,7 @@ async fn handle_upload_part(
|
||||
|
||||
async fn handle_complete_multipart(
|
||||
req: Request<Incoming>,
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
upload_id: &str,
|
||||
@@ -796,7 +967,7 @@ async fn handle_complete_multipart(
|
||||
}
|
||||
|
||||
async fn handle_abort_multipart(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
upload_id: &str,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
@@ -805,7 +976,7 @@ async fn handle_abort_multipart(
|
||||
}
|
||||
|
||||
async fn handle_list_multipart_uploads(
|
||||
store: Arc<FileStore>,
|
||||
store: Arc<StorageBackend>,
|
||||
bucket: &str,
|
||||
request_id: &str,
|
||||
) -> Result<Response<BoxBody>> {
|
||||
@@ -927,7 +1098,7 @@ fn extract_xml_value<'a>(xml: &'a str, tag: &str) -> Option<String> {
|
||||
// CORS
|
||||
// ============================
|
||||
|
||||
fn build_cors_preflight(config: &S3Config, request_id: &str) -> Response<BoxBody> {
|
||||
fn build_cors_preflight(config: &SmartStorageConfig, request_id: &str) -> Response<BoxBody> {
|
||||
let mut builder = Response::builder()
|
||||
.status(StatusCode::NO_CONTENT)
|
||||
.header("x-amz-request-id", request_id);
|
||||
@@ -951,7 +1122,7 @@ fn build_cors_preflight(config: &S3Config, request_id: &str) -> Response<BoxBody
|
||||
builder.body(empty_body()).unwrap()
|
||||
}
|
||||
|
||||
fn add_cors_headers(headers: &mut hyper::HeaderMap, config: &S3Config) {
|
||||
fn add_cors_headers(headers: &mut hyper::HeaderMap, config: &SmartStorageConfig) {
|
||||
if let Some(ref origins) = config.cors.allowed_origins {
|
||||
headers.insert(
|
||||
"access-control-allow-origin",
|
||||
|
||||
@@ -10,19 +10,18 @@ use tokio::fs;
|
||||
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufWriter};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::s3_error::S3Error;
|
||||
use crate::cluster::coordinator::DistributedStore;
|
||||
use crate::error::StorageError;
|
||||
|
||||
// ============================
|
||||
// Result types
|
||||
// ============================
|
||||
|
||||
pub struct PutResult {
|
||||
pub size: u64,
|
||||
pub md5: String,
|
||||
}
|
||||
|
||||
pub struct GetResult {
|
||||
pub key: String,
|
||||
pub size: u64,
|
||||
pub last_modified: DateTime<Utc>,
|
||||
pub md5: String,
|
||||
@@ -32,7 +31,6 @@ pub struct GetResult {
|
||||
}
|
||||
|
||||
pub struct HeadResult {
|
||||
pub key: String,
|
||||
pub size: u64,
|
||||
pub last_modified: DateTime<Utc>,
|
||||
pub md5: String,
|
||||
@@ -40,7 +38,6 @@ pub struct HeadResult {
|
||||
}
|
||||
|
||||
pub struct CopyResult {
|
||||
pub size: u64,
|
||||
pub md5: String,
|
||||
pub last_modified: DateTime<Utc>,
|
||||
}
|
||||
@@ -69,14 +66,12 @@ pub struct BucketInfo {
|
||||
|
||||
pub struct MultipartUploadInfo {
|
||||
pub upload_id: String,
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
pub initiated: DateTime<Utc>,
|
||||
}
|
||||
|
||||
pub struct CompleteMultipartResult {
|
||||
pub etag: String,
|
||||
pub size: u64,
|
||||
}
|
||||
|
||||
// ============================
|
||||
@@ -126,10 +121,6 @@ impl FileStore {
|
||||
self.root_dir.join(".policies")
|
||||
}
|
||||
|
||||
pub fn policy_path(&self, bucket: &str) -> PathBuf {
|
||||
self.policies_dir().join(format!("{}.policy.json", bucket))
|
||||
}
|
||||
|
||||
pub async fn reset(&self) -> Result<()> {
|
||||
if self.root_dir.exists() {
|
||||
fs::remove_dir_all(&self.root_dir).await?;
|
||||
@@ -184,13 +175,13 @@ impl FileStore {
|
||||
let bucket_path = self.root_dir.join(bucket);
|
||||
|
||||
if !bucket_path.is_dir() {
|
||||
return Err(S3Error::no_such_bucket().into());
|
||||
return Err(StorageError::no_such_bucket().into());
|
||||
}
|
||||
|
||||
// Check if bucket is empty (ignore hidden files)
|
||||
let mut entries = fs::read_dir(&bucket_path).await?;
|
||||
while let Some(_entry) = entries.next_entry().await? {
|
||||
return Err(S3Error::bucket_not_empty().into());
|
||||
return Err(StorageError::bucket_not_empty().into());
|
||||
}
|
||||
|
||||
fs::remove_dir_all(&bucket_path).await?;
|
||||
@@ -209,7 +200,7 @@ impl FileStore {
|
||||
metadata: HashMap<String, String>,
|
||||
) -> Result<PutResult> {
|
||||
if !self.bucket_exists(bucket).await {
|
||||
return Err(S3Error::no_such_bucket().into());
|
||||
return Err(StorageError::no_such_bucket().into());
|
||||
}
|
||||
|
||||
let object_path = self.object_path(bucket, key);
|
||||
@@ -220,7 +211,6 @@ impl FileStore {
|
||||
let file = fs::File::create(&object_path).await?;
|
||||
let mut writer = BufWriter::new(file);
|
||||
let mut hasher = Md5::new();
|
||||
let mut total_size: u64 = 0;
|
||||
|
||||
// Stream body frames directly to file
|
||||
let mut body = body;
|
||||
@@ -229,7 +219,6 @@ impl FileStore {
|
||||
Some(Ok(frame)) => {
|
||||
if let Ok(data) = frame.into_data() {
|
||||
hasher.update(&data);
|
||||
total_size += data.len() as u64;
|
||||
writer.write_all(&data).await?;
|
||||
}
|
||||
}
|
||||
@@ -255,44 +244,6 @@ impl FileStore {
|
||||
fs::write(&metadata_path, metadata_json).await?;
|
||||
|
||||
Ok(PutResult {
|
||||
size: total_size,
|
||||
md5: md5_hex,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn put_object_bytes(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
data: &[u8],
|
||||
metadata: HashMap<String, String>,
|
||||
) -> Result<PutResult> {
|
||||
if !self.bucket_exists(bucket).await {
|
||||
return Err(S3Error::no_such_bucket().into());
|
||||
}
|
||||
|
||||
let object_path = self.object_path(bucket, key);
|
||||
if let Some(parent) = object_path.parent() {
|
||||
fs::create_dir_all(parent).await?;
|
||||
}
|
||||
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(data);
|
||||
let md5_hex = format!("{:x}", hasher.finalize());
|
||||
|
||||
fs::write(&object_path, data).await?;
|
||||
|
||||
// Write MD5 sidecar
|
||||
let md5_path = format!("{}.md5", object_path.display());
|
||||
fs::write(&md5_path, &md5_hex).await?;
|
||||
|
||||
// Write metadata sidecar
|
||||
let metadata_path = format!("{}.metadata.json", object_path.display());
|
||||
let metadata_json = serde_json::to_string_pretty(&metadata)?;
|
||||
fs::write(&metadata_path, metadata_json).await?;
|
||||
|
||||
Ok(PutResult {
|
||||
size: data.len() as u64,
|
||||
md5: md5_hex,
|
||||
})
|
||||
}
|
||||
@@ -306,7 +257,7 @@ impl FileStore {
|
||||
let object_path = self.object_path(bucket, key);
|
||||
|
||||
if !object_path.exists() {
|
||||
return Err(S3Error::no_such_key().into());
|
||||
return Err(StorageError::no_such_key().into());
|
||||
}
|
||||
|
||||
let file_meta = fs::metadata(&object_path).await?;
|
||||
@@ -326,7 +277,6 @@ impl FileStore {
|
||||
};
|
||||
|
||||
Ok(GetResult {
|
||||
key: key.to_string(),
|
||||
size,
|
||||
last_modified,
|
||||
md5,
|
||||
@@ -340,7 +290,7 @@ impl FileStore {
|
||||
let object_path = self.object_path(bucket, key);
|
||||
|
||||
if !object_path.exists() {
|
||||
return Err(S3Error::no_such_key().into());
|
||||
return Err(StorageError::no_such_key().into());
|
||||
}
|
||||
|
||||
// Only stat the file, don't open it
|
||||
@@ -352,7 +302,6 @@ impl FileStore {
|
||||
let metadata = self.read_metadata(&object_path).await;
|
||||
|
||||
Ok(HeadResult {
|
||||
key: key.to_string(),
|
||||
size,
|
||||
last_modified,
|
||||
md5,
|
||||
@@ -404,11 +353,11 @@ impl FileStore {
|
||||
let dest_path = self.object_path(dest_bucket, dest_key);
|
||||
|
||||
if !src_path.exists() {
|
||||
return Err(S3Error::no_such_key().into());
|
||||
return Err(StorageError::no_such_key().into());
|
||||
}
|
||||
|
||||
if !self.bucket_exists(dest_bucket).await {
|
||||
return Err(S3Error::no_such_bucket().into());
|
||||
return Err(StorageError::no_such_bucket().into());
|
||||
}
|
||||
|
||||
if let Some(parent) = dest_path.parent() {
|
||||
@@ -439,7 +388,6 @@ impl FileStore {
|
||||
let last_modified: DateTime<Utc> = file_meta.modified()?.into();
|
||||
|
||||
Ok(CopyResult {
|
||||
size: file_meta.len(),
|
||||
md5,
|
||||
last_modified,
|
||||
})
|
||||
@@ -456,7 +404,7 @@ impl FileStore {
|
||||
let bucket_path = self.root_dir.join(bucket);
|
||||
|
||||
if !bucket_path.is_dir() {
|
||||
return Err(S3Error::no_such_bucket().into());
|
||||
return Err(StorageError::no_such_bucket().into());
|
||||
}
|
||||
|
||||
// Collect all object keys recursively
|
||||
@@ -581,7 +529,7 @@ impl FileStore {
|
||||
) -> Result<(String, u64)> {
|
||||
let upload_dir = self.multipart_dir().join(upload_id);
|
||||
if !upload_dir.is_dir() {
|
||||
return Err(S3Error::no_such_upload().into());
|
||||
return Err(StorageError::no_such_upload().into());
|
||||
}
|
||||
|
||||
let part_path = upload_dir.join(format!("part-{}", part_number));
|
||||
@@ -655,7 +603,7 @@ impl FileStore {
|
||||
) -> Result<CompleteMultipartResult> {
|
||||
let upload_dir = self.multipart_dir().join(upload_id);
|
||||
if !upload_dir.is_dir() {
|
||||
return Err(S3Error::no_such_upload().into());
|
||||
return Err(StorageError::no_such_upload().into());
|
||||
}
|
||||
|
||||
// Read metadata to get bucket/key
|
||||
@@ -672,7 +620,6 @@ impl FileStore {
|
||||
let dest_file = fs::File::create(&object_path).await?;
|
||||
let mut writer = BufWriter::new(dest_file);
|
||||
let mut hasher = Md5::new();
|
||||
let mut total_size: u64 = 0;
|
||||
|
||||
for (part_number, _etag) in parts {
|
||||
let part_path = upload_dir.join(format!("part-{}", part_number));
|
||||
@@ -689,7 +636,6 @@ impl FileStore {
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
writer.write_all(&buf[..n]).await?;
|
||||
total_size += n as u64;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -712,14 +658,13 @@ impl FileStore {
|
||||
|
||||
Ok(CompleteMultipartResult {
|
||||
etag,
|
||||
size: total_size,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn abort_multipart(&self, upload_id: &str) -> Result<()> {
|
||||
let upload_dir = self.multipart_dir().join(upload_id);
|
||||
if !upload_dir.is_dir() {
|
||||
return Err(S3Error::no_such_upload().into());
|
||||
return Err(StorageError::no_such_upload().into());
|
||||
}
|
||||
fs::remove_dir_all(&upload_dir).await?;
|
||||
Ok(())
|
||||
@@ -752,7 +697,6 @@ impl FileStore {
|
||||
|
||||
uploads.push(MultipartUploadInfo {
|
||||
upload_id: meta.upload_id,
|
||||
bucket: meta.bucket,
|
||||
key: meta.key,
|
||||
initiated,
|
||||
});
|
||||
@@ -772,7 +716,7 @@ impl FileStore {
|
||||
let encoded = encode_key(key);
|
||||
self.root_dir
|
||||
.join(bucket)
|
||||
.join(format!("{}._S3_object", encoded))
|
||||
.join(format!("{}._storage_object", encoded))
|
||||
}
|
||||
|
||||
async fn read_md5(&self, object_path: &Path) -> String {
|
||||
@@ -832,7 +776,7 @@ impl FileStore {
|
||||
|
||||
if meta.is_dir() {
|
||||
self.collect_keys(bucket_path, &entry.path(), keys).await?;
|
||||
} else if name.ends_with("._S3_object")
|
||||
} else if name.ends_with("._storage_object")
|
||||
&& !name.ends_with(".metadata.json")
|
||||
&& !name.ends_with(".md5")
|
||||
{
|
||||
@@ -842,7 +786,7 @@ impl FileStore {
|
||||
.unwrap_or(Path::new(""))
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
let key = decode_key(relative.trim_end_matches("._S3_object"));
|
||||
let key = decode_key(relative.trim_end_matches("._storage_object"));
|
||||
keys.push(key);
|
||||
}
|
||||
}
|
||||
@@ -852,6 +796,200 @@ impl FileStore {
|
||||
}
|
||||
}
|
||||
|
||||
// ============================
|
||||
// StorageBackend enum
|
||||
// ============================
|
||||
|
||||
/// Unified storage backend that dispatches to either standalone (FileStore)
|
||||
/// or clustered (DistributedStore) storage.
|
||||
pub enum StorageBackend {
|
||||
Standalone(FileStore),
|
||||
Clustered(DistributedStore),
|
||||
}
|
||||
|
||||
impl StorageBackend {
|
||||
pub fn policies_dir(&self) -> std::path::PathBuf {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.policies_dir(),
|
||||
StorageBackend::Clustered(ds) => ds.policies_dir(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn initialize(&self) -> Result<()> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.initialize().await,
|
||||
StorageBackend::Clustered(ds) => {
|
||||
// Ensure policies directory exists
|
||||
tokio::fs::create_dir_all(ds.policies_dir()).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn reset(&self) -> Result<()> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.reset().await,
|
||||
StorageBackend::Clustered(_) => Ok(()), // TODO: cluster reset
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list_buckets(&self) -> Result<Vec<BucketInfo>> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.list_buckets().await,
|
||||
StorageBackend::Clustered(ds) => ds.list_buckets().await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn bucket_exists(&self, bucket: &str) -> bool {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.bucket_exists(bucket).await,
|
||||
StorageBackend::Clustered(ds) => ds.bucket_exists(bucket).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_bucket(&self, bucket: &str) -> Result<()> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.create_bucket(bucket).await,
|
||||
StorageBackend::Clustered(ds) => ds.create_bucket(bucket).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_bucket(&self, bucket: &str) -> Result<()> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.delete_bucket(bucket).await,
|
||||
StorageBackend::Clustered(ds) => ds.delete_bucket(bucket).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn put_object(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
body: Incoming,
|
||||
metadata: HashMap<String, String>,
|
||||
) -> Result<PutResult> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.put_object(bucket, key, body, metadata).await,
|
||||
StorageBackend::Clustered(ds) => ds.put_object(bucket, key, body, metadata).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_object(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
range: Option<(u64, u64)>,
|
||||
) -> Result<GetResult> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.get_object(bucket, key, range).await,
|
||||
StorageBackend::Clustered(ds) => ds.get_object(bucket, key, range).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn head_object(&self, bucket: &str, key: &str) -> Result<HeadResult> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.head_object(bucket, key).await,
|
||||
StorageBackend::Clustered(ds) => ds.head_object(bucket, key).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_object(&self, bucket: &str, key: &str) -> Result<()> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.delete_object(bucket, key).await,
|
||||
StorageBackend::Clustered(ds) => ds.delete_object(bucket, key).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn copy_object(
|
||||
&self,
|
||||
src_bucket: &str,
|
||||
src_key: &str,
|
||||
dest_bucket: &str,
|
||||
dest_key: &str,
|
||||
metadata_directive: &str,
|
||||
new_metadata: Option<HashMap<String, String>>,
|
||||
) -> Result<CopyResult> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => {
|
||||
fs.copy_object(src_bucket, src_key, dest_bucket, dest_key, metadata_directive, new_metadata).await
|
||||
}
|
||||
StorageBackend::Clustered(ds) => {
|
||||
ds.copy_object(src_bucket, src_key, dest_bucket, dest_key, metadata_directive, new_metadata).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list_objects(
|
||||
&self,
|
||||
bucket: &str,
|
||||
prefix: &str,
|
||||
delimiter: &str,
|
||||
max_keys: usize,
|
||||
continuation_token: Option<&str>,
|
||||
) -> Result<ListObjectsResult> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => {
|
||||
fs.list_objects(bucket, prefix, delimiter, max_keys, continuation_token).await
|
||||
}
|
||||
StorageBackend::Clustered(ds) => {
|
||||
ds.list_objects(bucket, prefix, delimiter, max_keys, continuation_token).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn initiate_multipart(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
metadata: HashMap<String, String>,
|
||||
) -> Result<String> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.initiate_multipart(bucket, key, metadata).await,
|
||||
StorageBackend::Clustered(ds) => ds.initiate_multipart(bucket, key, metadata).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn upload_part(
|
||||
&self,
|
||||
upload_id: &str,
|
||||
part_number: u32,
|
||||
body: Incoming,
|
||||
) -> Result<(String, u64)> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.upload_part(upload_id, part_number, body).await,
|
||||
StorageBackend::Clustered(ds) => ds.upload_part(upload_id, part_number, body).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn complete_multipart(
|
||||
&self,
|
||||
upload_id: &str,
|
||||
parts: &[(u32, String)],
|
||||
) -> Result<CompleteMultipartResult> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.complete_multipart(upload_id, parts).await,
|
||||
StorageBackend::Clustered(ds) => ds.complete_multipart(upload_id, parts).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn abort_multipart(&self, upload_id: &str) -> Result<()> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.abort_multipart(upload_id).await,
|
||||
StorageBackend::Clustered(ds) => ds.abort_multipart(upload_id).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list_multipart_uploads(
|
||||
&self,
|
||||
bucket: &str,
|
||||
) -> Result<Vec<MultipartUploadInfo>> {
|
||||
match self {
|
||||
StorageBackend::Standalone(fs) => fs.list_multipart_uploads(bucket).await,
|
||||
StorageBackend::Clustered(ds) => ds.list_multipart_uploads(bucket).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Key encoding (identity on Linux)
|
||||
// ============================
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::storage::{BucketInfo, ListObjectsResult, MultipartUploadInfo};
|
||||
|
||||
const XML_DECL: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
||||
const S3_NS: &str = "http://s3.amazonaws.com/doc/2006-03-01/";
|
||||
const STORAGE_NS: &str = "http://s3.amazonaws.com/doc/2006-03-01/";
|
||||
|
||||
fn xml_escape(s: &str) -> String {
|
||||
s.replace('&', "&")
|
||||
@@ -14,9 +14,9 @@ fn xml_escape(s: &str) -> String {
|
||||
pub fn list_buckets_xml(buckets: &[BucketInfo]) -> String {
|
||||
let mut xml = format!(
|
||||
"{}\n<ListAllMyBucketsResult xmlns=\"{}\">\
|
||||
<Owner><ID>123456789000</ID><DisplayName>S3rver</DisplayName></Owner>\
|
||||
<Owner><ID>123456789000</ID><DisplayName>Storage</DisplayName></Owner>\
|
||||
<Buckets>",
|
||||
XML_DECL, S3_NS
|
||||
XML_DECL, STORAGE_NS
|
||||
);
|
||||
|
||||
for b in buckets {
|
||||
@@ -39,7 +39,7 @@ pub fn list_objects_v1_xml(bucket: &str, result: &ListObjectsResult) -> String {
|
||||
<MaxKeys>{}</MaxKeys>\
|
||||
<IsTruncated>{}</IsTruncated>",
|
||||
XML_DECL,
|
||||
S3_NS,
|
||||
STORAGE_NS,
|
||||
xml_escape(bucket),
|
||||
xml_escape(&result.prefix),
|
||||
result.max_keys,
|
||||
@@ -86,7 +86,7 @@ pub fn list_objects_v2_xml(bucket: &str, result: &ListObjectsResult) -> String {
|
||||
<KeyCount>{}</KeyCount>\
|
||||
<IsTruncated>{}</IsTruncated>",
|
||||
XML_DECL,
|
||||
S3_NS,
|
||||
STORAGE_NS,
|
||||
xml_escape(bucket),
|
||||
xml_escape(&result.prefix),
|
||||
result.max_keys,
|
||||
@@ -132,15 +132,6 @@ pub fn list_objects_v2_xml(bucket: &str, result: &ListObjectsResult) -> String {
|
||||
xml
|
||||
}
|
||||
|
||||
pub fn error_xml(code: &str, message: &str) -> String {
|
||||
format!(
|
||||
"{}\n<Error><Code>{}</Code><Message>{}</Message></Error>",
|
||||
XML_DECL,
|
||||
xml_escape(code),
|
||||
xml_escape(message)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn copy_object_result_xml(etag: &str, last_modified: &str) -> String {
|
||||
format!(
|
||||
"{}\n<CopyObjectResult>\
|
||||
@@ -161,7 +152,7 @@ pub fn initiate_multipart_xml(bucket: &str, key: &str, upload_id: &str) -> Strin
|
||||
<UploadId>{}</UploadId>\
|
||||
</InitiateMultipartUploadResult>",
|
||||
XML_DECL,
|
||||
S3_NS,
|
||||
STORAGE_NS,
|
||||
xml_escape(bucket),
|
||||
xml_escape(key),
|
||||
xml_escape(upload_id)
|
||||
@@ -177,7 +168,7 @@ pub fn complete_multipart_xml(bucket: &str, key: &str, etag: &str) -> String {
|
||||
<ETag>\"{}\"</ETag>\
|
||||
</CompleteMultipartUploadResult>",
|
||||
XML_DECL,
|
||||
S3_NS,
|
||||
STORAGE_NS,
|
||||
xml_escape(bucket),
|
||||
xml_escape(key),
|
||||
xml_escape(bucket),
|
||||
@@ -195,7 +186,7 @@ pub fn list_multipart_uploads_xml(bucket: &str, uploads: &[MultipartUploadInfo])
|
||||
<MaxUploads>1000</MaxUploads>\
|
||||
<IsTruncated>false</IsTruncated>",
|
||||
XML_DECL,
|
||||
S3_NS,
|
||||
STORAGE_NS,
|
||||
xml_escape(bucket)
|
||||
);
|
||||
|
||||
@@ -204,8 +195,8 @@ pub fn list_multipart_uploads_xml(bucket: &str, uploads: &[MultipartUploadInfo])
|
||||
"<Upload>\
|
||||
<Key>{}</Key>\
|
||||
<UploadId>{}</UploadId>\
|
||||
<Initiator><ID>S3RVER</ID><DisplayName>S3RVER</DisplayName></Initiator>\
|
||||
<Owner><ID>S3RVER</ID><DisplayName>S3RVER</DisplayName></Owner>\
|
||||
<Initiator><ID>STORAGE</ID><DisplayName>STORAGE</DisplayName></Initiator>\
|
||||
<Owner><ID>STORAGE</ID><DisplayName>STORAGE</DisplayName></Owner>\
|
||||
<StorageClass>STANDARD</StorageClass>\
|
||||
<Initiated>{}</Initiated>\
|
||||
</Upload>",
|
||||
|
||||
@@ -12,9 +12,9 @@ import {
|
||||
DeleteBucketPolicyCommand,
|
||||
} from '@aws-sdk/client-s3';
|
||||
import { Readable } from 'stream';
|
||||
import * as smarts3 from '../ts/index.js';
|
||||
import * as smartstorage from '../ts/index.js';
|
||||
|
||||
let testSmarts3Instance: smarts3.Smarts3;
|
||||
let testSmartStorageInstance: smartstorage.SmartStorage;
|
||||
let authClient: S3Client;
|
||||
let wrongClient: S3Client;
|
||||
|
||||
@@ -35,8 +35,8 @@ async function streamToString(stream: Readable): Promise<string> {
|
||||
// Server setup
|
||||
// ============================
|
||||
|
||||
tap.test('should start S3 server with auth enabled', async () => {
|
||||
testSmarts3Instance = await smarts3.Smarts3.createAndStart({
|
||||
tap.test('should start storage server with auth enabled', async () => {
|
||||
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
|
||||
server: {
|
||||
port: TEST_PORT,
|
||||
silent: true,
|
||||
@@ -294,8 +294,8 @@ tap.test('authenticated: delete the bucket', async () => {
|
||||
expect(response.$metadata.httpStatusCode).toEqual(204);
|
||||
});
|
||||
|
||||
tap.test('should stop the S3 server', async () => {
|
||||
await testSmarts3Instance.stop();
|
||||
tap.test('should stop the storage server', async () => {
|
||||
await testSmartStorageInstance.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import { S3Client, CreateBucketCommand, ListBucketsCommand, PutObjectCommand, GetObjectCommand, DeleteObjectCommand, DeleteBucketCommand } from '@aws-sdk/client-s3';
|
||||
import { Readable } from 'stream';
|
||||
import * as smarts3 from '../ts/index.js';
|
||||
import * as smartstorage from '../ts/index.js';
|
||||
|
||||
let testSmarts3Instance: smarts3.Smarts3;
|
||||
let testSmartStorageInstance: smartstorage.SmartStorage;
|
||||
let s3Client: S3Client;
|
||||
|
||||
// Helper to convert stream to string
|
||||
@@ -16,8 +16,8 @@ async function streamToString(stream: Readable): Promise<string> {
|
||||
});
|
||||
}
|
||||
|
||||
tap.test('should start the S3 server and configure client', async () => {
|
||||
testSmarts3Instance = await smarts3.Smarts3.createAndStart({
|
||||
tap.test('should start the storage server and configure client', async () => {
|
||||
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
|
||||
server: {
|
||||
port: 3337,
|
||||
silent: true,
|
||||
@@ -27,7 +27,7 @@ tap.test('should start the S3 server and configure client', async () => {
|
||||
},
|
||||
});
|
||||
|
||||
const descriptor = await testSmarts3Instance.getS3Descriptor();
|
||||
const descriptor = await testSmartStorageInstance.getStorageDescriptor();
|
||||
|
||||
s3Client = new S3Client({
|
||||
endpoint: `http://${descriptor.endpoint}:${descriptor.port}`,
|
||||
@@ -101,8 +101,8 @@ tap.test('should delete the bucket', async () => {
|
||||
expect(response.$metadata.httpStatusCode).toEqual(204);
|
||||
});
|
||||
|
||||
tap.test('should stop the S3 server', async () => {
|
||||
await testSmarts3Instance.stop();
|
||||
tap.test('should stop the storage server', async () => {
|
||||
await testSmartStorageInstance.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
|
||||
@@ -14,9 +14,9 @@ import {
|
||||
GetBucketPolicyCommand,
|
||||
DeleteBucketPolicyCommand,
|
||||
} from '@aws-sdk/client-s3';
|
||||
import * as smarts3 from '../ts/index.js';
|
||||
import * as smartstorage from '../ts/index.js';
|
||||
|
||||
let testSmarts3Instance: smarts3.Smarts3;
|
||||
let testSmartStorageInstance: smartstorage.SmartStorage;
|
||||
let authClient: S3Client;
|
||||
|
||||
const TEST_PORT = 3347;
|
||||
@@ -56,7 +56,7 @@ function denyStatement(action: string) {
|
||||
// ============================
|
||||
|
||||
tap.test('setup: start server, create bucket, upload object', async () => {
|
||||
testSmarts3Instance = await smarts3.Smarts3.createAndStart({
|
||||
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
|
||||
server: { port: TEST_PORT, silent: true, region: 'us-east-1' },
|
||||
storage: { cleanSlate: true },
|
||||
auth: {
|
||||
@@ -275,7 +275,7 @@ tap.test('ListAllMyBuckets always requires auth → anonymous fetch to / returns
|
||||
|
||||
tap.test('Auth disabled mode → anonymous full access works', async () => {
|
||||
// Start a second server with auth disabled
|
||||
const noAuthInstance = await smarts3.Smarts3.createAndStart({
|
||||
const noAuthInstance = await smartstorage.SmartStorage.createAndStart({
|
||||
server: { port: 3348, silent: true, region: 'us-east-1' },
|
||||
storage: { cleanSlate: true },
|
||||
auth: { enabled: false, credentials: [] },
|
||||
@@ -329,7 +329,7 @@ tap.test('teardown: clean up and stop server', async () => {
|
||||
} catch {
|
||||
// May already be deleted
|
||||
}
|
||||
await testSmarts3Instance.stop();
|
||||
await testSmartStorageInstance.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
|
||||
@@ -7,9 +7,9 @@ import {
|
||||
GetBucketPolicyCommand,
|
||||
DeleteBucketPolicyCommand,
|
||||
} from '@aws-sdk/client-s3';
|
||||
import * as smarts3 from '../ts/index.js';
|
||||
import * as smartstorage from '../ts/index.js';
|
||||
|
||||
let testSmarts3Instance: smarts3.Smarts3;
|
||||
let testSmartStorageInstance: smartstorage.SmartStorage;
|
||||
let authClient: S3Client;
|
||||
|
||||
const TEST_PORT = 3345;
|
||||
@@ -33,8 +33,8 @@ const validStatement = {
|
||||
// Server setup
|
||||
// ============================
|
||||
|
||||
tap.test('setup: start S3 server with auth enabled', async () => {
|
||||
testSmarts3Instance = await smarts3.Smarts3.createAndStart({
|
||||
tap.test('setup: start storage server with auth enabled', async () => {
|
||||
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
|
||||
server: { port: TEST_PORT, silent: true, region: 'us-east-1' },
|
||||
storage: { cleanSlate: true },
|
||||
auth: {
|
||||
@@ -246,7 +246,7 @@ tap.test('Bucket deletion cleans up associated policy', async () => {
|
||||
|
||||
tap.test('teardown: delete bucket and stop server', async () => {
|
||||
await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET }));
|
||||
await testSmarts3Instance.stop();
|
||||
await testSmartStorageInstance.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
|
||||
@@ -10,9 +10,9 @@ import {
|
||||
DeleteBucketPolicyCommand,
|
||||
} from '@aws-sdk/client-s3';
|
||||
import { Readable } from 'stream';
|
||||
import * as smarts3 from '../ts/index.js';
|
||||
import * as smartstorage from '../ts/index.js';
|
||||
|
||||
let testSmarts3Instance: smarts3.Smarts3;
|
||||
let testSmartStorageInstance: smartstorage.SmartStorage;
|
||||
let authClient: S3Client;
|
||||
|
||||
const TEST_PORT = 3346;
|
||||
@@ -48,7 +48,7 @@ async function clearPolicy() {
|
||||
// ============================
|
||||
|
||||
tap.test('setup: start server, create bucket, upload object', async () => {
|
||||
testSmarts3Instance = await smarts3.Smarts3.createAndStart({
|
||||
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
|
||||
server: { port: TEST_PORT, silent: true, region: 'us-east-1' },
|
||||
storage: { cleanSlate: true },
|
||||
auth: {
|
||||
@@ -511,7 +511,7 @@ tap.test('Policy allows s3:ListBucket → anonymous GET bucket (list objects) su
|
||||
tap.test('teardown: clean up and stop server', async () => {
|
||||
await authClient.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: 'test-obj.txt' }));
|
||||
await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET }));
|
||||
await testSmarts3Instance.stop();
|
||||
await testSmartStorageInstance.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
|
||||
12
test/test.ts
12
test/test.ts
@@ -1,12 +1,12 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
import * as smarts3 from '../ts/index.js';
|
||||
import * as smartstorage from '../ts/index.js';
|
||||
|
||||
let testSmarts3Instance: smarts3.Smarts3;
|
||||
let testSmartStorageInstance: smartstorage.SmartStorage;
|
||||
|
||||
tap.test('should create a smarts3 instance and run it', async (toolsArg) => {
|
||||
testSmarts3Instance = await smarts3.Smarts3.createAndStart({
|
||||
tap.test('should create a smartstorage instance and run it', async (toolsArg) => {
|
||||
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
|
||||
server: {
|
||||
port: 3333,
|
||||
},
|
||||
@@ -20,7 +20,7 @@ tap.test('should create a smarts3 instance and run it', async (toolsArg) => {
|
||||
|
||||
tap.test('should be able to access buckets', async () => {
|
||||
const smartbucketInstance = new plugins.smartbucket.SmartBucket(
|
||||
await testSmarts3Instance.getS3Descriptor(),
|
||||
await testSmartStorageInstance.getStorageDescriptor(),
|
||||
);
|
||||
const bucket = await smartbucketInstance.createBucket('testbucket');
|
||||
const baseDirectory = await bucket.getBaseDirectory();
|
||||
@@ -31,7 +31,7 @@ tap.test('should be able to access buckets', async () => {
|
||||
});
|
||||
|
||||
tap.test('should stop the instance', async () => {
|
||||
await testSmarts3Instance.stop();
|
||||
await testSmartStorageInstance.stop();
|
||||
});
|
||||
|
||||
tap.start();
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* autocreated commitinfo by @push.rocks/commitinfo
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/smarts3',
|
||||
version: '5.2.0',
|
||||
description: 'A Node.js TypeScript package to create a local S3 endpoint for simulating AWS S3 operations using mapped local directories for development and testing purposes.'
|
||||
name: '@push.rocks/smartstorage',
|
||||
version: '6.3.2',
|
||||
description: 'A Node.js TypeScript package to create a local S3-compatible storage server using mapped local directories for development and testing purposes.'
|
||||
}
|
||||
|
||||
86
ts/index.ts
86
ts/index.ts
@@ -70,9 +70,39 @@ export interface IStorageConfig {
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete smarts3 configuration
|
||||
* Erasure coding configuration
|
||||
*/
|
||||
export interface ISmarts3Config {
|
||||
export interface IErasureConfig {
|
||||
dataShards?: number;
|
||||
parityShards?: number;
|
||||
chunkSizeBytes?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Drive configuration for multi-drive support
|
||||
*/
|
||||
export interface IDriveConfig {
|
||||
paths: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Cluster configuration for distributed mode
|
||||
*/
|
||||
export interface IClusterConfig {
|
||||
enabled: boolean;
|
||||
nodeId?: string;
|
||||
quicPort?: number;
|
||||
seedNodes?: string[];
|
||||
erasure?: IErasureConfig;
|
||||
drives?: IDriveConfig;
|
||||
heartbeatIntervalMs?: number;
|
||||
heartbeatTimeoutMs?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete smartstorage configuration
|
||||
*/
|
||||
export interface ISmartStorageConfig {
|
||||
server?: IServerConfig;
|
||||
storage?: IStorageConfig;
|
||||
auth?: IAuthConfig;
|
||||
@@ -80,12 +110,13 @@ export interface ISmarts3Config {
|
||||
logging?: ILoggingConfig;
|
||||
limits?: ILimitsConfig;
|
||||
multipart?: IMultipartConfig;
|
||||
cluster?: IClusterConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default configuration values
|
||||
*/
|
||||
const DEFAULT_CONFIG: ISmarts3Config = {
|
||||
const DEFAULT_CONFIG: ISmartStorageConfig = {
|
||||
server: {
|
||||
port: 3000,
|
||||
address: '0.0.0.0',
|
||||
@@ -100,8 +131,8 @@ const DEFAULT_CONFIG: ISmarts3Config = {
|
||||
enabled: false,
|
||||
credentials: [
|
||||
{
|
||||
accessKeyId: 'S3RVER',
|
||||
secretAccessKey: 'S3RVER',
|
||||
accessKeyId: 'STORAGE',
|
||||
secretAccessKey: 'STORAGE',
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -133,7 +164,7 @@ const DEFAULT_CONFIG: ISmarts3Config = {
|
||||
/**
|
||||
* Merge user config with defaults (deep merge)
|
||||
*/
|
||||
function mergeConfig(userConfig: ISmarts3Config): Required<ISmarts3Config> {
|
||||
function mergeConfig(userConfig: ISmartStorageConfig): Required<ISmartStorageConfig> {
|
||||
return {
|
||||
server: {
|
||||
...DEFAULT_CONFIG.server!,
|
||||
@@ -163,41 +194,40 @@ function mergeConfig(userConfig: ISmarts3Config): Required<ISmarts3Config> {
|
||||
...DEFAULT_CONFIG.multipart!,
|
||||
...(userConfig.multipart || {}),
|
||||
},
|
||||
};
|
||||
...(userConfig.cluster ? { cluster: userConfig.cluster } : {}),
|
||||
} as Required<ISmartStorageConfig>;
|
||||
}
|
||||
|
||||
/**
|
||||
* IPC command type map for RustBridge
|
||||
*/
|
||||
type TRustS3Commands = {
|
||||
start: { params: { config: Required<ISmarts3Config> }; result: {} };
|
||||
type TRustStorageCommands = {
|
||||
start: { params: { config: Required<ISmartStorageConfig> }; result: {} };
|
||||
stop: { params: {}; result: {} };
|
||||
createBucket: { params: { name: string }; result: {} };
|
||||
};
|
||||
|
||||
/**
|
||||
* Main Smarts3 class - production-ready S3-compatible server
|
||||
* Main SmartStorage class - production-ready S3-compatible storage server
|
||||
*/
|
||||
export class Smarts3 {
|
||||
export class SmartStorage {
|
||||
// STATIC
|
||||
public static async createAndStart(configArg: ISmarts3Config = {}) {
|
||||
const smartS3Instance = new Smarts3(configArg);
|
||||
await smartS3Instance.start();
|
||||
return smartS3Instance;
|
||||
public static async createAndStart(configArg: ISmartStorageConfig = {}) {
|
||||
const smartStorageInstance = new SmartStorage(configArg);
|
||||
await smartStorageInstance.start();
|
||||
return smartStorageInstance;
|
||||
}
|
||||
|
||||
// INSTANCE
|
||||
public config: Required<ISmarts3Config>;
|
||||
private bridge: InstanceType<typeof plugins.RustBridge<TRustS3Commands>>;
|
||||
public config: Required<ISmartStorageConfig>;
|
||||
private bridge: InstanceType<typeof plugins.RustBridge<TRustStorageCommands>>;
|
||||
|
||||
constructor(configArg: ISmarts3Config = {}) {
|
||||
constructor(configArg: ISmartStorageConfig = {}) {
|
||||
this.config = mergeConfig(configArg);
|
||||
this.bridge = new plugins.RustBridge<TRustS3Commands>({
|
||||
binaryName: 'rusts3',
|
||||
this.bridge = new plugins.RustBridge<TRustStorageCommands>({
|
||||
binaryName: 'ruststorage',
|
||||
localPaths: [
|
||||
plugins.path.join(paths.packageDir, 'dist_rust', 'rusts3'),
|
||||
plugins.path.join(paths.packageDir, 'rust', 'target', 'release', 'rusts3'),
|
||||
plugins.path.join(paths.packageDir, 'rust', 'target', 'debug', 'rusts3'),
|
||||
plugins.path.join(paths.packageDir, 'dist_rust', 'ruststorage'),
|
||||
],
|
||||
readyTimeoutMs: 30000,
|
||||
requestTimeoutMs: 300000,
|
||||
@@ -207,21 +237,21 @@ export class Smarts3 {
|
||||
public async start() {
|
||||
const spawned = await this.bridge.spawn();
|
||||
if (!spawned) {
|
||||
throw new Error('Failed to spawn rusts3 binary. Make sure it is compiled (pnpm build).');
|
||||
throw new Error('Failed to spawn ruststorage binary. Make sure it is compiled (pnpm build).');
|
||||
}
|
||||
await this.bridge.sendCommand('start', { config: this.config });
|
||||
|
||||
if (!this.config.server.silent) {
|
||||
console.log('s3 server is running');
|
||||
console.log('storage server is running');
|
||||
}
|
||||
}
|
||||
|
||||
public async getS3Descriptor(
|
||||
public async getStorageDescriptor(
|
||||
optionsArg?: Partial<plugins.tsclass.storage.IS3Descriptor>,
|
||||
): Promise<plugins.tsclass.storage.IS3Descriptor> {
|
||||
const cred = this.config.auth.credentials[0] || {
|
||||
accessKeyId: 'S3RVER',
|
||||
secretAccessKey: 'S3RVER',
|
||||
accessKeyId: 'STORAGE',
|
||||
secretAccessKey: 'STORAGE',
|
||||
};
|
||||
|
||||
const descriptor: plugins.tsclass.storage.IS3Descriptor = {
|
||||
|
||||
Reference in New Issue
Block a user