Compare commits

...

9 Commits

Author SHA1 Message Date
a009d990d0 v6.3.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-21 22:04:36 +00:00
08d545f5db feat(readme): document distributed cluster mode, erasure coding, and QUIC-based architecture 2026-03-21 22:04:36 +00:00
a0a282c712 v6.2.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-21 22:00:41 +00:00
3eb0045676 feat(cluster): add shard healing, drive health heartbeats, and clustered policy directory support 2026-03-21 22:00:41 +00:00
639eb5d36c v6.1.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-21 21:50:42 +00:00
d12d321079 feat(cluster): add clustered storage backend with QUIC transport, erasure coding, and shard management 2026-03-21 21:50:42 +00:00
4fcd05d3c6 v6.0.1
Some checks failed
Default (tags) / security (push) Successful in 34s
Default (tags) / test (push) Failing after 35s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-14 23:46:12 +00:00
503e25ff98 fix(rust-bridge): update smartrust and limit RustBridge binary lookup to dist_rust 2026-03-14 23:46:12 +00:00
bba0855218 BREAKING CHANGE(core): rebrand from smarts3 to smartstorage
Some checks failed
Default (tags) / security (push) Successful in 43s
Default (tags) / test (push) Failing after 26s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
- Package renamed from @push.rocks/smarts3 to @push.rocks/smartstorage
- Class: Smarts3 → SmartStorage, Interface: ISmarts3Config → ISmartStorageConfig
- Method: getS3Descriptor → getStorageDescriptor
- Rust binary: rusts3 → ruststorage
- Rust types: S3Error→StorageError, S3Action→StorageAction, S3Config→SmartStorageConfig, S3Server→StorageServer
- On-disk file extension: ._S3_object → ._storage_object
- Default credentials: S3RVER → STORAGE
- All internal S3 branding removed; AWS S3 protocol compatibility fully maintained
2026-03-14 15:20:30 +00:00
40 changed files with 8782 additions and 3810 deletions

View File

@@ -1,5 +1,48 @@
# Changelog # Changelog
## 2026-03-21 - 6.3.0 - feat(readme)
document distributed cluster mode, erasure coding, and QUIC-based architecture
- Expand README overview and feature matrix to highlight clustering, multi-drive awareness, and distributed storage capabilities
- Add standalone and cluster mode usage examples plus cluster configuration options
- Document clustering internals including erasure coding, quorum behavior, QUIC transport, self-healing, and on-disk layout
## 2026-03-21 - 6.2.0 - feat(cluster)
add shard healing, drive health heartbeats, and clustered policy directory support
- implements manifest-based healing that scans affected shards on offline nodes, reconstructs data with erasure coding, and rewrites recovered shards to local storage
- includes drive status reporting in membership heartbeats by wiring DriveManager health checks into cluster heartbeat messages
- adds clustered policies directory initialization and exposes policy storage paths from the distributed coordinator
- extends distributed coordinator support for remote shard read and delete operations plus multipart upload session metadata
## 2026-03-21 - 6.1.0 - feat(cluster)
add clustered storage backend with QUIC transport, erasure coding, and shard management
- introduces cluster configuration in Rust and TypeScript, including seed nodes, drive paths, heartbeat settings, and erasure coding options
- adds core cluster modules for membership, topology state, object manifests, placement, shard storage, drive management, healing scaffolding, and inter-node protocol handling
- adds QUIC-based transport for cluster communication and integrates a distributed storage backend alongside the existing standalone FileStore
- updates the server startup path to initialize standalone or clustered storage based on configuration and exposes a basic clusterStatus management endpoint
- refreshes build and dependency versions to support the new clustered storage implementation
## 2026-03-14 - 6.0.1 - fix(rust-bridge)
update smartrust and limit RustBridge binary lookup to dist_rust
- Bumps @push.rocks/smartrust from ^1.0.0 to ^1.3.2.
- Removes rust target debug and release fallback paths from RustBridge local binary resolution, relying on dist_rust/ruststorage.
## 2026-03-14 - 6.0.0 - BREAKING CHANGE(core)
Rebrand from smarts3 to smartstorage
- Package renamed from @push.rocks/smarts3 to @push.rocks/smartstorage
- Class renamed from Smarts3 to SmartStorage (no backward-compatible re-export)
- Interface renamed from ISmarts3Config to ISmartStorageConfig
- Method renamed from getS3Descriptor to getStorageDescriptor
- Rust binary renamed from rusts3 to ruststorage
- Rust types renamed: S3Error→StorageError, S3Action→StorageAction, S3Config→SmartStorageConfig, S3Server→StorageServer
- On-disk file extension changed from ._S3_object to ._storage_object (BREAKING for existing stored data)
- Default credentials changed from S3RVER to STORAGE
- All internal S3 branding removed; AWS S3 protocol compatibility (IAM actions, ARNs, SigV4) fully maintained
## 2026-02-17 - 5.3.0 - feat(auth) ## 2026-02-17 - 5.3.0 - feat(auth)
add AWS SigV4 authentication and bucket policy support add AWS SigV4 authentication and bucket policy support

View File

@@ -10,14 +10,15 @@
"module": { "module": {
"githost": "code.foss.global", "githost": "code.foss.global",
"gitscope": "push.rocks", "gitscope": "push.rocks",
"gitrepo": "smarts3", "gitrepo": "smartstorage",
"description": "A Node.js TypeScript package to create a local S3 endpoint for simulating AWS S3 operations using mapped local directories for development and testing purposes.", "description": "A Node.js TypeScript package to create a local S3-compatible storage server using mapped local directories for development and testing purposes.",
"npmPackagename": "@push.rocks/smarts3", "npmPackagename": "@push.rocks/smartstorage",
"license": "MIT", "license": "MIT",
"projectDomain": "push.rocks", "projectDomain": "push.rocks",
"keywords": [ "keywords": [
"S3 Mock Server", "smartstorage",
"Local S3", "S3 Compatible",
"Local Storage Server",
"Node.js", "Node.js",
"TypeScript", "TypeScript",
"Local Development", "Local Development",
@@ -26,8 +27,8 @@
"File Storage", "File Storage",
"AWS S3 Compatibility", "AWS S3 Compatibility",
"Development Tool", "Development Tool",
"S3 Endpoint", "Storage Endpoint",
"S3 Simulation", "Storage Simulation",
"Bucket Management", "Bucket Management",
"File Upload", "File Upload",
"CI/CD Integration", "CI/CD Integration",

View File

@@ -1,27 +1,28 @@
{ {
"name": "@push.rocks/smarts3", "name": "@push.rocks/smartstorage",
"version": "5.3.0", "version": "6.3.0",
"private": false, "private": false,
"description": "A Node.js TypeScript package to create a local S3 endpoint for simulating AWS S3 operations using mapped local directories for development and testing purposes.", "description": "A Node.js TypeScript package to create a local S3-compatible storage server using mapped local directories for development and testing purposes.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",
"typings": "dist_ts/index.d.ts", "typings": "dist_ts/index.d.ts",
"type": "module", "type": "module",
"author": "Lossless GmbH", "author": "Lossless GmbH",
"license": "MIT", "license": "MIT",
"scripts": { "scripts": {
"test:before": "(tsrust)",
"test": "(tstest test/ --web --verbose --logfile --timeout 60)", "test": "(tstest test/ --web --verbose --logfile --timeout 60)",
"build": "(tsrust && tsbuild --web --allowimplicitany)", "build": "(tsrust && tsbuild tsfolders --allowimplicitany)",
"buildDocs": "tsdoc" "buildDocs": "tsdoc"
}, },
"devDependencies": { "devDependencies": {
"@aws-sdk/client-s3": "^3.937.0", "@aws-sdk/client-s3": "^3.1014.0",
"@git.zone/tsbuild": "^3.1.0", "@git.zone/tsbuild": "^4.3.0",
"@git.zone/tsbundle": "^2.5.2", "@git.zone/tsbundle": "^2.9.1",
"@git.zone/tsrun": "^2.0.0", "@git.zone/tsrun": "^2.0.1",
"@git.zone/tstest": "^3.1.0",
"@push.rocks/smartbucket": "^4.3.0",
"@git.zone/tsrust": "^1.3.0", "@git.zone/tsrust": "^1.3.0",
"@types/node": "^22.9.0" "@git.zone/tstest": "^3.5.0",
"@push.rocks/smartbucket": "^4.5.1",
"@types/node": "^25.5.0"
}, },
"browserslist": [ "browserslist": [
"last 1 chrome versions" "last 1 chrome versions"
@@ -41,12 +42,13 @@
], ],
"dependencies": { "dependencies": {
"@push.rocks/smartpath": "^6.0.0", "@push.rocks/smartpath": "^6.0.0",
"@push.rocks/smartrust": "^1.0.0", "@push.rocks/smartrust": "^1.3.2",
"@tsclass/tsclass": "^9.3.0" "@tsclass/tsclass": "^9.5.0"
}, },
"keywords": [ "keywords": [
"S3 Mock Server", "smartstorage",
"Local S3", "S3 Compatible",
"Local Storage Server",
"Node.js", "Node.js",
"TypeScript", "TypeScript",
"Local Development", "Local Development",
@@ -55,20 +57,20 @@
"File Storage", "File Storage",
"AWS S3 Compatibility", "AWS S3 Compatibility",
"Development Tool", "Development Tool",
"S3 Endpoint", "Storage Endpoint",
"S3 Simulation", "Storage Simulation",
"Bucket Management", "Bucket Management",
"File Upload", "File Upload",
"CI/CD Integration", "CI/CD Integration",
"Developer Onboarding" "Developer Onboarding"
], ],
"homepage": "https://code.foss.global/push.rocks/smarts3#readme", "homepage": "https://code.foss.global/push.rocks/smartstorage#readme",
"repository": { "repository": {
"type": "git", "type": "git",
"url": "https://code.foss.global/push.rocks/smarts3.git" "url": "ssh://git@code.foss.global:29419/push.rocks/smartstorage.git"
}, },
"bugs": { "bugs": {
"url": "https://code.foss.global/push.rocks/smarts3/issues" "url": "https://code.foss.global/push.rocks/smartstorage/issues"
}, },
"packageManager": "pnpm@10.14.0+sha512.ad27a79641b49c3e481a16a805baa71817a04bbe06a38d17e60e2eaee83f6a146c6a688125f5792e48dd5ba30e7da52a5cda4c3992b9ccf333f9ce223af84748", "packageManager": "pnpm@10.14.0+sha512.ad27a79641b49c3e481a16a805baa71817a04bbe06a38d17e60e2eaee83f6a146c6a688125f5792e48dd5ba30e7da52a5cda4c3992b9ccf333f9ce223af84748",
"pnpm": { "pnpm": {

6243
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
# Production-Readiness Plan for smarts3 # Production-Readiness Plan for smartstorage
**Goal:** Make smarts3 production-ready as a MinIO alternative for use cases where: **Goal:** Make smartstorage production-ready as a MinIO alternative for use cases where:
- Running MinIO is out of scope - Running MinIO is out of scope
- You have a program written for S3 and want to use the local filesystem - You have a program written for S3 and want to use the local filesystem
- You need a lightweight, zero-dependency S3-compatible server - You need a lightweight, zero-dependency S3-compatible server
@@ -31,7 +31,7 @@
### 1. Multipart Upload Support 🚀 **HIGHEST PRIORITY** ### 1. Multipart Upload Support 🚀 **HIGHEST PRIORITY**
**Why:** Essential for uploading files >5MB efficiently. Without this, smarts3 can't handle real-world production workloads. **Why:** Essential for uploading files >5MB efficiently. Without this, smartstorage can't handle real-world production workloads.
**Implementation Required:** **Implementation Required:**
- `POST /:bucket/:key?uploads` - CreateMultipartUpload - `POST /:bucket/:key?uploads` - CreateMultipartUpload
@@ -46,13 +46,13 @@
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/controllers/multipart.controller.ts` (new) - `ts/controllers/multipart.controller.ts` (new)
- `ts/classes/filesystem-store.ts` (add multipart methods) - `ts/classes/filesystem-store.ts` (add multipart methods)
- `ts/classes/smarts3-server.ts` (add multipart routes) - `ts/classes/smartstorage-server.ts` (add multipart routes)
--- ---
### 2. Configurable Authentication 🔐 ### 2. Configurable Authentication 🔐
**Why:** Currently hardcoded credentials ('S3RVER'/'S3RVER'). Production needs custom credentials. **Why:** Currently hardcoded credentials ('STORAGE'/'STORAGE'). Production needs custom credentials.
**Implementation Required:** **Implementation Required:**
- Support custom access keys and secrets via configuration - Support custom access keys and secrets via configuration
@@ -75,7 +75,7 @@ interface IAuthConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/auth-middleware.ts` (new) - `ts/classes/auth-middleware.ts` (new)
- `ts/classes/signature-validator.ts` (new) - `ts/classes/signature-validator.ts` (new)
- `ts/classes/smarts3-server.ts` (integrate auth middleware) - `ts/classes/smartstorage-server.ts` (integrate auth middleware)
- `ts/index.ts` (add auth config options) - `ts/index.ts` (add auth config options)
--- ---
@@ -105,7 +105,7 @@ interface ICorsConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/cors-middleware.ts` (new) - `ts/classes/cors-middleware.ts` (new)
- `ts/classes/smarts3-server.ts` (integrate CORS middleware) - `ts/classes/smartstorage-server.ts` (integrate CORS middleware)
- `ts/index.ts` (add CORS config options) - `ts/index.ts` (add CORS config options)
--- ---
@@ -131,7 +131,7 @@ interface ISslConfig {
``` ```
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/smarts3-server.ts` (add HTTPS server creation) - `ts/classes/smartstorage-server.ts` (add HTTPS server creation)
- `ts/index.ts` (add SSL config options) - `ts/index.ts` (add SSL config options)
--- ---
@@ -147,7 +147,7 @@ interface ISslConfig {
- Sensible production defaults - Sensible production defaults
- Example configurations for common use cases - Example configurations for common use cases
**Configuration File Example (`smarts3.config.json`):** **Configuration File Example (`smartstorage.config.json`):**
```json ```json
{ {
"server": { "server": {
@@ -220,7 +220,7 @@ interface ISslConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/logger.ts` (new - use @push.rocks/smartlog?) - `ts/classes/logger.ts` (new - use @push.rocks/smartlog?)
- `ts/classes/access-logger-middleware.ts` (new) - `ts/classes/access-logger-middleware.ts` (new)
- `ts/classes/smarts3-server.ts` (replace console.log with logger) - `ts/classes/smartstorage-server.ts` (replace console.log with logger)
- All controller files (use structured logging) - All controller files (use structured logging)
--- ---
@@ -238,7 +238,7 @@ interface ISslConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/controllers/health.controller.ts` (new) - `ts/controllers/health.controller.ts` (new)
- `ts/classes/metrics-collector.ts` (new) - `ts/classes/metrics-collector.ts` (new)
- `ts/classes/smarts3-server.ts` (add health routes) - `ts/classes/smartstorage-server.ts` (add health routes)
--- ---
@@ -266,7 +266,7 @@ interface ISslConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/validation-middleware.ts` (new) - `ts/classes/validation-middleware.ts` (new)
- `ts/utils/validators.ts` (new) - `ts/utils/validators.ts` (new)
- `ts/classes/smarts3-server.ts` (integrate validation middleware) - `ts/classes/smartstorage-server.ts` (integrate validation middleware)
--- ---
@@ -291,7 +291,7 @@ interface ISslConfig {
- SIGTERM/SIGINT handling - SIGTERM/SIGINT handling
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/smarts3-server.ts` (add graceful shutdown logic) - `ts/classes/smartstorage-server.ts` (add graceful shutdown logic)
- `ts/index.ts` (add signal handlers) - `ts/index.ts` (add signal handlers)
--- ---
@@ -336,7 +336,7 @@ interface ISslConfig {
4. ✅ Production configuration system 4. ✅ Production configuration system
5. ✅ Production logging 5. ✅ Production logging
**Outcome:** smarts3 can handle real production workloads **Outcome:** smartstorage can handle real production workloads
--- ---
@@ -350,7 +350,7 @@ interface ISslConfig {
9. ✅ Graceful shutdown 9. ✅ Graceful shutdown
10. ✅ Batch operations 10. ✅ Batch operations
**Outcome:** smarts3 is operationally mature **Outcome:** smartstorage is operationally mature
--- ---
@@ -363,7 +363,7 @@ interface ISslConfig {
13. ✅ Comprehensive test suite 13. ✅ Comprehensive test suite
14. ✅ Documentation updates 14. ✅ Documentation updates
**Outcome:** smarts3 has broad S3 API compatibility **Outcome:** smartstorage has broad S3 API compatibility
--- ---
@@ -375,7 +375,7 @@ interface ISslConfig {
16. ✅ Performance optimization 16. ✅ Performance optimization
17. ✅ Advanced features based on user feedback 17. ✅ Advanced features based on user feedback
**Outcome:** smarts3 is a complete MinIO alternative **Outcome:** smartstorage is a complete MinIO alternative
--- ---
@@ -392,7 +392,7 @@ interface ISslConfig {
## 🎯 Target Use Cases ## 🎯 Target Use Cases
**With this plan implemented, smarts3 will be a solid MinIO alternative for:** **With this plan implemented, smartstorage will be a solid MinIO alternative for:**
**Local S3 development** - Fast, simple, no Docker required **Local S3 development** - Fast, simple, no Docker required
**Testing S3 integrations** - Reliable, repeatable tests **Testing S3 integrations** - Reliable, repeatable tests

View File

@@ -1,10 +1,10 @@
# Project Hints for smarts3 # Project Hints for smartstorage
## Current State (v6.0.0-dev) ## Current State (v6.0.0)
- **Rust-powered S3 server** via `@push.rocks/smartrust` IPC bridge - **Rust-powered S3-compatible storage server** via `@push.rocks/smartrust` IPC bridge
- High-performance: streaming I/O, zero-copy, backpressure, range seek - High-performance: streaming I/O, zero-copy, backpressure, range seek
- TypeScript is thin IPC wrapper; all HTTP/storage/routing in Rust binary `rusts3` - TypeScript is thin IPC wrapper; all HTTP/storage/routing in Rust binary `ruststorage`
- Full S3 compatibility: PUT, GET, HEAD, DELETE for objects and buckets - Full S3 compatibility: PUT, GET, HEAD, DELETE for objects and buckets
- Multipart upload support (streaming, no OOM) - Multipart upload support (streaming, no OOM)
- **Real AWS SigV4 authentication** (cryptographic signature verification) - **Real AWS SigV4 authentication** (cryptographic signature verification)
@@ -18,37 +18,37 @@
- `main.rs` - Clap CLI, management mode entry - `main.rs` - Clap CLI, management mode entry
- `config.rs` - Serde config structs matching TS interfaces (includes `region`) - `config.rs` - Serde config structs matching TS interfaces (includes `region`)
- `management.rs` - IPC loop (newline-delimited JSON over stdin/stdout) - `management.rs` - IPC loop (newline-delimited JSON over stdin/stdout)
- `server.rs` - hyper 1.x HTTP server, routing, CORS, auth+policy pipeline, all S3 handlers - `server.rs` - hyper 1.x HTTP server, routing, CORS, auth+policy pipeline, all S3-compatible handlers
- `storage.rs` - FileStore: filesystem-backed storage, multipart manager, `.policies/` dir - `storage.rs` - FileStore: filesystem-backed storage, multipart manager, `.policies/` dir
- `xml_response.rs` - S3 XML response builders - `xml_response.rs` - S3-compatible XML response builders
- `s3_error.rs` - S3 error codes with HTTP status mapping - `error.rs` - StorageError codes with HTTP status mapping
- `auth.rs` - AWS SigV4 signature verification (HMAC-SHA256, clock skew, constant-time compare) - `auth.rs` - AWS SigV4 signature verification (HMAC-SHA256, clock skew, constant-time compare)
- `action.rs` - S3Action enum + request-to-IAM-action resolver + RequestContext - `action.rs` - StorageAction enum + request-to-IAM-action resolver + RequestContext
- `policy.rs` - BucketPolicy model, evaluation engine (Deny > Allow > NoOpinion), PolicyStore (RwLock cache + disk) - `policy.rs` - BucketPolicy model, evaluation engine (Deny > Allow > NoOpinion), PolicyStore (RwLock cache + disk)
### TypeScript Bridge (`ts/`) ### TypeScript Bridge (`ts/`)
- `ts/index.ts` - Smarts3 class with RustBridge<TRustS3Commands> - `ts/index.ts` - SmartStorage class with RustBridge<TRustStorageCommands>
- `ts/plugins.ts` - path, smartpath, RustBridge, tsclass - `ts/plugins.ts` - path, smartpath, RustBridge, tsclass
- `ts/paths.ts` - packageDir, bucketsDir defaults - `ts/paths.ts` - packageDir, bucketsDir defaults
### IPC Commands ### IPC Commands
| Command | Params | Action | | Command | Params | Action |
|---------|--------|--------| |---------|--------|--------|
| `start` | `{ config: ISmarts3Config }` | Init storage + HTTP server | | `start` | `{ config: ISmartStorageConfig }` | Init storage + HTTP server |
| `stop` | `{}` | Graceful shutdown | | `stop` | `{}` | Graceful shutdown |
| `createBucket` | `{ name: string }` | Create bucket directory | | `createBucket` | `{ name: string }` | Create bucket directory |
### Storage Layout (backward-compatible) ### Storage Layout
- Objects: `{root}/{bucket}/{key}._S3_object` - Objects: `{root}/{bucket}/{key}._storage_object`
- Metadata: `{root}/{bucket}/{key}._S3_object.metadata.json` - Metadata: `{root}/{bucket}/{key}._storage_object.metadata.json`
- MD5: `{root}/{bucket}/{key}._S3_object.md5` - MD5: `{root}/{bucket}/{key}._storage_object.md5`
- Multipart: `{root}/.multipart/{upload_id}/part-{N}` - Multipart: `{root}/.multipart/{upload_id}/part-{N}`
- Policies: `{root}/.policies/{bucket}.policy.json` - Policies: `{root}/.policies/{bucket}.policy.json`
## Build ## Build
- `pnpm build` runs `tsrust && tsbuild --web --allowimplicitany` - `pnpm build` runs `tsrust && tsbuild --web --allowimplicitany`
- `tsrust` compiles Rust to `dist_rust/rusts3` - `tsrust` compiles Rust to `dist_rust/ruststorage`
- Targets: linux_amd64, linux_arm64 (configured in npmextra.json) - Targets: linux_amd64, linux_arm64 (configured in npmextra.json)
## Dependencies ## Dependencies

335
readme.md
View File

@@ -1,81 +1,119 @@
# @push.rocks/smarts3 🚀 # @push.rocks/smartstorage
A high-performance, S3-compatible local server powered by a **Rust core** with a clean TypeScript API. Drop-in replacement for AWS S3 during development and testing — no cloud, no Docker, no MinIO. Just `npm install` and go. A high-performance, S3-compatible storage server powered by a **Rust core** with a clean TypeScript API. Runs standalone for dev/test — or scales out as a **distributed, erasure-coded cluster** with QUIC-based inter-node communication. No cloud, no Docker. Just `npm install` and go. 🚀
## Issue Reporting and Security ## Issue Reporting and Security
For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly. For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly.
## 🌟 Why smarts3? ## Why smartstorage?
| Feature | smarts3 | MinIO | s3rver | | Feature | smartstorage | MinIO | s3rver |
|---------|---------|-------|--------| |---------|-------------|-------|--------|
| Install | `pnpm add` | Docker / binary | `npm install` | | Install | `pnpm add` | Docker / binary | `npm install` |
| Startup time | ~20ms | seconds | ~200ms | | Startup time | ~20ms | seconds | ~200ms |
| Large file uploads | Streaming, zero-copy | ✅ | ❌ OOM risk | | Large file uploads | Streaming, zero-copy | Yes | OOM risk |
| Range requests | Seek-based | ✅ | ❌ Full read | | Range requests | Seek-based | Yes | Full read |
| Language | Rust + TypeScript | Go | JavaScript | | Language | Rust + TypeScript | Go | JavaScript |
| Multipart uploads | ✅ Full support | | | | Multipart uploads | ✅ Full support | Yes | No |
| Auth | AWS SigV4 (full verification) | Full IAM | Basic | | Auth | AWS SigV4 (full verification) | Full IAM | Basic |
| Bucket policies | IAM-style evaluation | | | | Bucket policies | IAM-style evaluation | Yes | No |
| Clustering | ✅ Erasure-coded, QUIC | Yes | No |
| Multi-drive awareness | ✅ Per-drive health | Yes | No |
### Core Features ### Core Features
- **Rust-powered HTTP server** — hyper 1.x with streaming I/O, zero-copy, backpressure - 🦀 **Rust-powered HTTP server** — hyper 1.x with streaming I/O, zero-copy, backpressure
- 🔄 **Full S3 API compatibility** — works with AWS SDK v3, SmartBucket, any S3 client - 📦 **Full S3-compatible API** — works with AWS SDK v3, SmartBucket, any S3 client
- 📂 **Filesystem-backed storage** — buckets map to directories, objects to files - 💾 **Filesystem-backed storage** — buckets map to directories, objects to files
- 📤 **Streaming multipart uploads** — large files without memory pressure - 📤 **Streaming multipart uploads** — large files without memory pressure
- 🎯 **Byte-range requests**`seek()` directly to the requested byte offset - 📐 **Byte-range requests**`seek()` directly to the requested byte offset
- 🔐 **AWS SigV4 authentication** — full signature verification with constant-time comparison and 15-min clock skew enforcement - 🔐 **AWS SigV4 authentication** — full signature verification with constant-time comparison
- 📜 **Bucket policies** — IAM-style JSON policies with Allow/Deny evaluation, wildcard matching, and anonymous access support - 📋 **Bucket policies** — IAM-style JSON policies with Allow/Deny evaluation and wildcard matching
- 🌐 **CORS middleware** — configurable cross-origin support - 🌐 **CORS middleware** — configurable cross-origin support
- 📊 **Structured logging** — tracing-based, error through debug levels
- 🧹 **Clean slate mode** — wipe storage on startup for test isolation - 🧹 **Clean slate mode** — wipe storage on startup for test isolation
- 🧪 **Test-first design** — start/stop in milliseconds, no port conflicts - **Test-first design** — start/stop in milliseconds, no port conflicts
## 📦 Installation ### Clustering Features
- 🔗 **Erasure coding** — Reed-Solomon (configurable k data + m parity shards) for storage efficiency and fault tolerance
- 🚄 **QUIC transport** — multiplexed, encrypted inter-node communication via `quinn` with zero head-of-line blocking
- 💽 **Multi-drive awareness** — each node manages multiple independent storage paths with health monitoring
- 🤝 **Cluster membership** — static seed config + runtime join, heartbeat-based failure detection
- ✍️ **Quorum writes** — data is only acknowledged after k+1 shards are persisted
- 📖 **Quorum reads** — reconstruct from any k available shards, local-first fast path
- 🩹 **Self-healing** — background scanner detects and reconstructs missing/corrupt shards
## Installation
```bash ```bash
pnpm add @push.rocks/smarts3 -D pnpm add @push.rocks/smartstorage -D
``` ```
> **Note:** The package ships with precompiled Rust binaries for `linux_amd64` and `linux_arm64`. No Rust toolchain needed on your machine. > **Note:** The package ships with precompiled Rust binaries for `linux_amd64` and `linux_arm64`. No Rust toolchain needed on your machine.
## 🚀 Quick Start ## Quick Start
### Standalone Mode (Dev & Test)
```typescript ```typescript
import { Smarts3 } from '@push.rocks/smarts3'; import { SmartStorage } from '@push.rocks/smartstorage';
// Start a local S3 server // Start a local S3-compatible storage server
const s3 = await Smarts3.createAndStart({ const storage = await SmartStorage.createAndStart({
server: { port: 3000 }, server: { port: 3000 },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
}); });
// Create a bucket // Create a bucket
await s3.createBucket('my-bucket'); await storage.createBucket('my-bucket');
// Get connection details for any S3 client // Get connection details for any S3 client
const descriptor = await s3.getS3Descriptor(); const descriptor = await storage.getStorageDescriptor();
// → { endpoint: 'localhost', port: 3000, accessKey: 'S3RVER', accessSecret: 'S3RVER', useSsl: false } // → { endpoint: 'localhost', port: 3000, accessKey: 'STORAGE', accessSecret: 'STORAGE', useSsl: false }
// When done // When done
await s3.stop(); await storage.stop();
``` ```
## 📖 Configuration ### Cluster Mode (Distributed)
```typescript
import { SmartStorage } from '@push.rocks/smartstorage';
const storage = await SmartStorage.createAndStart({
server: { port: 3000 },
cluster: {
enabled: true,
nodeId: 'node-1',
quicPort: 4000,
seedNodes: ['192.168.1.11:4000', '192.168.1.12:4000'],
erasure: {
dataShards: 4, // k: minimum shards to reconstruct data
parityShards: 2, // m: fault tolerance (can lose up to m shards)
},
drives: {
paths: ['/mnt/disk1', '/mnt/disk2', '/mnt/disk3'],
},
},
});
```
Objects are automatically split into chunks (default 4 MB), erasure-coded into 6 shards (4 data + 2 parity), and distributed across drives/nodes. Any 4 of 6 shards can reconstruct the original data.
## Configuration
All config fields are optional — sensible defaults are applied automatically. All config fields are optional — sensible defaults are applied automatically.
```typescript ```typescript
import { Smarts3, ISmarts3Config } from '@push.rocks/smarts3'; import { SmartStorage, ISmartStorageConfig } from '@push.rocks/smartstorage';
const config: ISmarts3Config = { const config: ISmartStorageConfig = {
server: { server: {
port: 3000, // Default: 3000 port: 3000, // Default: 3000
address: '0.0.0.0', // Default: '0.0.0.0' address: '0.0.0.0', // Default: '0.0.0.0'
silent: false, // Default: false silent: false, // Default: false
region: 'us-east-1', // Default: 'us-east-1' — used for SigV4 signing region: 'us-east-1', // Default: 'us-east-1' — used for SigV4 signing
}, },
storage: { storage: {
directory: './my-data', // Default: .nogit/bucketsDir directory: './my-data', // Default: .nogit/bucketsDir
@@ -111,16 +149,32 @@ const config: ISmarts3Config = {
expirationDays: 7, expirationDays: 7,
cleanupIntervalMinutes: 60, cleanupIntervalMinutes: 60,
}, },
cluster: { // Optional — omit for standalone mode
enabled: true,
nodeId: 'node-1', // Auto-generated UUID if omitted
quicPort: 4000, // Default: 4000
seedNodes: [], // Addresses of existing cluster members
erasure: {
dataShards: 4, // Default: 4
parityShards: 2, // Default: 2
chunkSizeBytes: 4194304, // Default: 4 MB
},
drives: {
paths: ['/mnt/disk1', '/mnt/disk2'],
},
heartbeatIntervalMs: 5000, // Default: 5000
heartbeatTimeoutMs: 30000, // Default: 30000
},
}; };
const s3 = await Smarts3.createAndStart(config); const storage = await SmartStorage.createAndStart(config);
``` ```
### Common Configurations ### Common Configurations
**CI/CD testing** — silent, clean, fast: **CI/CD testing** — silent, clean, fast:
```typescript ```typescript
const s3 = await Smarts3.createAndStart({ const storage = await SmartStorage.createAndStart({
server: { port: 9999, silent: true }, server: { port: 9999, silent: true },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
}); });
@@ -128,7 +182,7 @@ const s3 = await Smarts3.createAndStart({
**Auth enabled:** **Auth enabled:**
```typescript ```typescript
const s3 = await Smarts3.createAndStart({ const storage = await SmartStorage.createAndStart({
auth: { auth: {
enabled: true, enabled: true,
credentials: [{ accessKeyId: 'test', secretAccessKey: 'test123' }], credentials: [{ accessKeyId: 'test', secretAccessKey: 'test123' }],
@@ -138,7 +192,7 @@ const s3 = await Smarts3.createAndStart({
**CORS for local web dev:** **CORS for local web dev:**
```typescript ```typescript
const s3 = await Smarts3.createAndStart({ const storage = await SmartStorage.createAndStart({
cors: { cors: {
enabled: true, enabled: true,
allowedOrigins: ['http://localhost:5173'], allowedOrigins: ['http://localhost:5173'],
@@ -147,12 +201,12 @@ const s3 = await Smarts3.createAndStart({
}); });
``` ```
## 📤 Usage with AWS SDK v3 ## Usage with AWS SDK v3
```typescript ```typescript
import { S3Client, PutObjectCommand, GetObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3'; import { S3Client, PutObjectCommand, GetObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3';
const descriptor = await s3.getS3Descriptor(); const descriptor = await storage.getStorageDescriptor();
const client = new S3Client({ const client = new S3Client({
endpoint: `http://${descriptor.endpoint}:${descriptor.port}`, endpoint: `http://${descriptor.endpoint}:${descriptor.port}`,
@@ -161,14 +215,14 @@ const client = new S3Client({
accessKeyId: descriptor.accessKey, accessKeyId: descriptor.accessKey,
secretAccessKey: descriptor.accessSecret, secretAccessKey: descriptor.accessSecret,
}, },
forcePathStyle: true, // Required for path-style S3 forcePathStyle: true, // Required for path-style access
}); });
// Upload // Upload
await client.send(new PutObjectCommand({ await client.send(new PutObjectCommand({
Bucket: 'my-bucket', Bucket: 'my-bucket',
Key: 'hello.txt', Key: 'hello.txt',
Body: 'Hello, S3!', Body: 'Hello, Storage!',
ContentType: 'text/plain', ContentType: 'text/plain',
})); }));
@@ -177,7 +231,7 @@ const { Body } = await client.send(new GetObjectCommand({
Bucket: 'my-bucket', Bucket: 'my-bucket',
Key: 'hello.txt', Key: 'hello.txt',
})); }));
const content = await Body.transformToString(); // "Hello, S3!" const content = await Body.transformToString(); // "Hello, Storage!"
// Delete // Delete
await client.send(new DeleteObjectCommand({ await client.send(new DeleteObjectCommand({
@@ -186,12 +240,12 @@ await client.send(new DeleteObjectCommand({
})); }));
``` ```
## 🪣 Usage with SmartBucket ## Usage with SmartBucket
```typescript ```typescript
import { SmartBucket } from '@push.rocks/smartbucket'; import { SmartBucket } from '@push.rocks/smartbucket';
const smartbucket = new SmartBucket(await s3.getS3Descriptor()); const smartbucket = new SmartBucket(await storage.getStorageDescriptor());
const bucket = await smartbucket.createBucket('my-bucket'); const bucket = await smartbucket.createBucket('my-bucket');
const dir = await bucket.getBaseDirectory(); const dir = await bucket.getBaseDirectory();
@@ -205,9 +259,9 @@ const content = await dir.fastGet('docs/readme.txt');
const files = await dir.listFiles(); const files = await dir.listFiles();
``` ```
## 📤 Multipart Uploads ## Multipart Uploads
For files larger than 5 MB, use multipart uploads. smarts3 handles them with **streaming I/O** — parts are written directly to disk, never buffered in memory. For files larger than 5 MB, use multipart uploads. smartstorage handles them with **streaming I/O** — parts are written directly to disk, never buffered in memory. In cluster mode, each part is independently erasure-coded and distributed.
```typescript ```typescript
import { import {
@@ -244,9 +298,9 @@ await client.send(new CompleteMultipartUploadCommand({
})); }));
``` ```
## 📜 Bucket Policies ## Bucket Policies
smarts3 supports AWS-style bucket policies for fine-grained access control. Policies use the same IAM JSON format as real S3 — so you can develop and test your policy logic locally before deploying. smartstorage supports AWS-style bucket policies for fine-grained access control. Policies use the same IAM JSON format as real S3 — so you can develop and test your policy logic locally before deploying.
When `auth.enabled` is `true`, the auth pipeline works as follows: When `auth.enabled` is `true`, the auth pipeline works as follows:
1. **Authenticate** — verify the AWS SigV4 signature (anonymous requests skip this step) 1. **Authenticate** — verify the AWS SigV4 signature (anonymous requests skip this step)
@@ -255,8 +309,6 @@ When `auth.enabled` is `true`, the auth pipeline works as follows:
### Setting a Bucket Policy ### Setting a Bucket Policy
Use the S3 `PutBucketPolicy` API (or any S3 client that supports it):
```typescript ```typescript
import { PutBucketPolicyCommand } from '@aws-sdk/client-s3'; import { PutBucketPolicyCommand } from '@aws-sdk/client-s3';
@@ -294,38 +346,113 @@ await client.send(new PutBucketPolicyCommand({
Deleting a bucket automatically removes its associated policy. Deleting a bucket automatically removes its associated policy.
## 🧪 Testing Integration ## Clustering Deep Dive 🔗
smartstorage can run as a distributed storage cluster where multiple nodes cooperate to store and retrieve data with built-in redundancy.
### How It Works
```
Client ──HTTP PUT──▶ Node A (coordinator)
├─ Split object into 4 MB chunks
├─ Erasure-code each chunk (4 data + 2 parity = 6 shards)
├──QUIC──▶ Node B (shard writes)
├──QUIC──▶ Node C (shard writes)
└─ Local disk (shard writes)
```
1. **Any node can coordinate** — the client connects to any cluster member
2. **Objects are chunked** — large objects split into fixed-size pieces (default 4 MB)
3. **Each chunk is erasure-coded** — Reed-Solomon produces k data + m parity shards
4. **Shards are distributed** — placed across different nodes and drives for fault isolation
5. **Quorum guarantees consistency** — writes need k+1 acks, reads need k shards
### Erasure Coding
With the default `4+2` configuration:
- Storage overhead: **33%** (vs. 200% for 3x replication)
- Fault tolerance: **any 2 drives/nodes can fail** simultaneously
- Read efficiency: only **4 of 6 shards** needed to reconstruct data
| Config | Total Shards | Overhead | Tolerance | Min Nodes |
|--------|-------------|----------|-----------|-----------|
| 4+2 | 6 | 33% | 2 failures | 3 |
| 6+3 | 9 | 50% | 3 failures | 5 |
| 2+1 | 3 | 50% | 1 failure | 2 |
### QUIC Transport
Inter-node communication uses [QUIC](https://en.wikipedia.org/wiki/QUIC) via the `quinn` library:
- 🔒 **Built-in TLS** — self-signed certs auto-generated at cluster init
- 🔀 **Multiplexed streams** — concurrent shard transfers without head-of-line blocking
-**Connection pooling** — persistent connections to peer nodes
- 🌊 **Natural backpressure** — QUIC flow control prevents overloading slow peers
### Cluster Membership
- **Static seed nodes** — initial cluster defined in config
- **Runtime join** — new nodes can join a running cluster
- **Heartbeat monitoring** — every 5s (configurable), with suspect/offline detection
- **Split-brain prevention** — nodes only mark peers offline when they have majority
### Self-Healing
A background scanner periodically (default: every 24h):
1. Checks shard checksums (CRC32C) for bit-rot detection
2. Identifies shards on offline nodes
3. Reconstructs missing shards from remaining data using Reed-Solomon
4. Places healed shards on healthy drives
Healing runs at low priority to avoid impacting foreground I/O.
### Erasure Set Formation
Drives are organized into fixed **erasure sets** at cluster initialization:
```
3 nodes × 4 drives each = 12 drives total
With 6-shard erasure sets → 2 erasure sets
Set 0: Node1-Disk0, Node2-Disk0, Node3-Disk0, Node1-Disk1, Node2-Disk1, Node3-Disk1
Set 1: Node1-Disk2, Node2-Disk2, Node3-Disk2, Node1-Disk3, Node2-Disk3, Node3-Disk3
```
Drives are interleaved across nodes for maximum fault isolation. New nodes form new erasure sets — existing data is never rebalanced.
## Testing Integration
```typescript ```typescript
import { Smarts3 } from '@push.rocks/smarts3'; import { SmartStorage } from '@push.rocks/smartstorage';
import { tap, expect } from '@git.zone/tstest/tapbundle'; import { tap, expect } from '@git.zone/tstest/tapbundle';
let s3: Smarts3; let storage: SmartStorage;
tap.test('setup', async () => { tap.test('setup', async () => {
s3 = await Smarts3.createAndStart({ storage = await SmartStorage.createAndStart({
server: { port: 4567, silent: true }, server: { port: 4567, silent: true },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
}); });
}); });
tap.test('should store and retrieve objects', async () => { tap.test('should store and retrieve objects', async () => {
await s3.createBucket('test'); await storage.createBucket('test');
// ... your test logic using AWS SDK or SmartBucket // ... your test logic using AWS SDK or SmartBucket
}); });
tap.test('teardown', async () => { tap.test('teardown', async () => {
await s3.stop(); await storage.stop();
}); });
export default tap.start(); export default tap.start();
``` ```
## 🔧 API Reference ## API Reference
### `Smarts3` Class ### `SmartStorage` Class
#### `static createAndStart(config?: ISmarts3Config): Promise<Smarts3>` #### `static createAndStart(config?: ISmartStorageConfig): Promise<SmartStorage>`
Create and start a server in one call. Create and start a server in one call.
@@ -339,11 +466,11 @@ Gracefully stop the server and kill the Rust process.
#### `createBucket(name: string): Promise<{ name: string }>` #### `createBucket(name: string): Promise<{ name: string }>`
Create an S3 bucket. Create a storage bucket.
#### `getS3Descriptor(options?): Promise<IS3Descriptor>` #### `getStorageDescriptor(options?): Promise<IS3Descriptor>`
Get connection details for S3 clients. Returns: Get connection details for S3-compatible clients. Returns:
| Field | Type | Description | | Field | Type | Description |
|-------|------|-------------| |-------|------|-------------|
@@ -353,36 +480,42 @@ Get connection details for S3 clients. Returns:
| `accessSecret` | `string` | Secret key from first configured credential | | `accessSecret` | `string` | Secret key from first configured credential |
| `useSsl` | `boolean` | Always `false` (plain HTTP) | | `useSsl` | `boolean` | Always `false` (plain HTTP) |
## 🏗️ Architecture ## Architecture
smarts3 uses a **hybrid Rust + TypeScript** architecture: smartstorage uses a **hybrid Rust + TypeScript** architecture:
``` ```
┌─────────────────────────────────┐ ┌──────────────────────────────────────────────
│ Your Code (AWS SDK, etc.) │ │ Your Code (AWS SDK, SmartBucket, etc.)
│ ↕ HTTP (localhost:3000) │ │ ↕ HTTP (localhost:3000)
├─────────────────────────────────┤ ├──────────────────────────────────────────────
│ rusts3 binary (Rust) │ │ ruststorage binary (Rust)
│ ├─ hyper 1.x HTTP server │ │ ├─ hyper 1.x HTTP server
│ ├─ S3 path-style routing │ │ ├─ S3 path-style routing
│ ├─ Streaming storage layer │ ├─ StorageBackend (Standalone or Clustered)
├─ Multipart manager │ ├─ FileStore (single-node mode)
├─ SigV4 auth + policy engine │ └─ DistributedStore (cluster mode)
├─ CORS middleware │ ├─ ErasureCoder (Reed-Solomon)
└─ S3 XML response builder │ ├─ ShardStore (per-drive storage)
├─────────────────────────────────┤ │ │ ├─ QuicTransport (quinn) │
TypeScript (thin IPC wrapper) │ ├─ ClusterState & Membership
├─ Smarts3 class │ └─ HealingService
│ ├─ RustBridge (stdin/stdout) │ ├─ SigV4 auth + policy engine
─ Config & S3 descriptor ─ CORS middleware
└─────────────────────────────────┘ │ └─ S3 XML response builder │
├──────────────────────────────────────────────┤
│ TypeScript (thin IPC wrapper) │
│ ├─ SmartStorage class │
│ ├─ RustBridge (stdin/stdout JSON IPC) │
│ └─ Config & S3 descriptor │
└──────────────────────────────────────────────┘
``` ```
**Why Rust?** The TypeScript implementation had critical perf issues: OOM on multipart uploads (parts buffered in memory), double stream copying, file descriptor leaks on HEAD requests, full-file reads for range requests, and no backpressure. The Rust binary solves all of these with streaming I/O, zero-copy, and direct `seek()` for range requests. **Why Rust?** The original TypeScript implementation had critical perf issues: OOM on multipart uploads (parts buffered in memory), double stream copying, file descriptor leaks on HEAD requests, full-file reads for range requests, and no backpressure. The Rust binary solves all of these with streaming I/O, zero-copy, and direct `seek()` for range requests.
**IPC Protocol:** TypeScript spawns the `rusts3` binary with `--management` and communicates via newline-delimited JSON over stdin/stdout. Commands: `start`, `stop`, `createBucket`. **IPC Protocol:** TypeScript spawns the `ruststorage` binary with `--management` and communicates via newline-delimited JSON over stdin/stdout. Commands: `start`, `stop`, `createBucket`, `clusterStatus`.
### S3 Operations Supported ### S3-Compatible Operations
| Operation | Method | Path | | Operation | Method | Path |
|-----------|--------|------| |-----------|--------|------|
@@ -407,25 +540,39 @@ smarts3 uses a **hybrid Rust + TypeScript** architecture:
### On-Disk Format ### On-Disk Format
**Standalone mode:**
``` ```
{storage.directory}/ {storage.directory}/
{bucket}/ {bucket}/
{key}._S3_object # Object data {key}._storage_object # Object data
{key}._S3_object.metadata.json # Metadata (content-type, x-amz-meta-*, etc.) {key}._storage_object.metadata.json # Metadata (content-type, x-amz-meta-*, etc.)
{key}._S3_object.md5 # Cached MD5 hash {key}._storage_object.md5 # Cached MD5 hash
.multipart/ .multipart/
{upload-id}/ {upload-id}/
metadata.json # Upload metadata (bucket, key, parts) metadata.json # Upload metadata
part-1 # Part data files part-1, part-2, ... # Part data files
part-2
...
.policies/ .policies/
{bucket}.policy.json # Bucket policy (IAM JSON format) {bucket}.policy.json # Bucket policy (IAM JSON format)
``` ```
## 🔗 Related Packages **Cluster mode:**
```
{drive_path}/.smartstorage/
format.json # Drive metadata (cluster ID, erasure set)
data/{bucket}/{key_hash}/{key}/
chunk-{N}/shard-{M}.dat # Erasure-coded shard data
chunk-{N}/shard-{M}.meta # Shard metadata (checksum, size)
- [`@push.rocks/smartbucket`](https://code.foss.global/push.rocks/smartbucket) — High-level S3 abstraction layer {storage.directory}/
.manifests/{bucket}/
{key}.manifest.json # Object manifest (shard placements, checksums)
.buckets/{bucket}/ # Bucket metadata
.policies/{bucket}.policy.json # Bucket policies
```
## Related Packages
- [`@push.rocks/smartbucket`](https://code.foss.global/push.rocks/smartbucket) — High-level S3-compatible abstraction layer
- [`@push.rocks/smartrust`](https://code.foss.global/push.rocks/smartrust) — TypeScript ↔ Rust IPC bridge - [`@push.rocks/smartrust`](https://code.foss.global/push.rocks/smartrust) — TypeScript ↔ Rust IPC bridge
- [`@git.zone/tsrust`](https://code.foss.global/git.zone/tsrust) — Rust cross-compilation for npm packages - [`@git.zone/tsrust`](https://code.foss.global/git.zone/tsrust) — Rust cross-compilation for npm packages

979
rust/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +1,10 @@
[package] [package]
name = "rusts3" name = "ruststorage"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
[[bin]] [[bin]]
name = "rusts3" name = "ruststorage"
path = "src/main.rs" path = "src/main.rs"
[dependencies] [dependencies]
@@ -28,6 +28,16 @@ percent-encoding = "2"
url = "2" url = "2"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
futures-core = "0.3" futures-core = "0.3"
futures = "0.3"
async-trait = "0.1"
reed-solomon-erasure = { version = "6", features = ["simd-accel"] }
xxhash-rust = { version = "0.8", features = ["xxh64"] }
crc32c = "0.6"
bincode = "1"
quinn = "0.11"
rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
rcgen = "0.13"
dashmap = "6"
hmac = "0.12" hmac = "0.12"
sha2 = "0.10" sha2 = "0.10"
hex = "0.4" hex = "0.4"

View File

@@ -2,9 +2,9 @@ use hyper::body::Incoming;
use hyper::{Method, Request}; use hyper::{Method, Request};
use std::collections::HashMap; use std::collections::HashMap;
/// S3 actions that map to IAM permission strings. /// Storage actions that map to IAM permission strings.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum S3Action { pub enum StorageAction {
ListAllMyBuckets, ListAllMyBuckets,
CreateBucket, CreateBucket,
DeleteBucket, DeleteBucket,
@@ -25,28 +25,28 @@ pub enum S3Action {
DeleteBucketPolicy, DeleteBucketPolicy,
} }
impl S3Action { impl StorageAction {
/// Return the IAM-style action string (e.g. "s3:GetObject"). /// Return the IAM-style action string (e.g. "s3:GetObject").
pub fn iam_action(&self) -> &'static str { pub fn iam_action(&self) -> &'static str {
match self { match self {
S3Action::ListAllMyBuckets => "s3:ListAllMyBuckets", StorageAction::ListAllMyBuckets => "s3:ListAllMyBuckets",
S3Action::CreateBucket => "s3:CreateBucket", StorageAction::CreateBucket => "s3:CreateBucket",
S3Action::DeleteBucket => "s3:DeleteBucket", StorageAction::DeleteBucket => "s3:DeleteBucket",
S3Action::HeadBucket => "s3:ListBucket", StorageAction::HeadBucket => "s3:ListBucket",
S3Action::ListBucket => "s3:ListBucket", StorageAction::ListBucket => "s3:ListBucket",
S3Action::GetObject => "s3:GetObject", StorageAction::GetObject => "s3:GetObject",
S3Action::HeadObject => "s3:GetObject", StorageAction::HeadObject => "s3:GetObject",
S3Action::PutObject => "s3:PutObject", StorageAction::PutObject => "s3:PutObject",
S3Action::DeleteObject => "s3:DeleteObject", StorageAction::DeleteObject => "s3:DeleteObject",
S3Action::CopyObject => "s3:PutObject", StorageAction::CopyObject => "s3:PutObject",
S3Action::ListBucketMultipartUploads => "s3:ListBucketMultipartUploads", StorageAction::ListBucketMultipartUploads => "s3:ListBucketMultipartUploads",
S3Action::AbortMultipartUpload => "s3:AbortMultipartUpload", StorageAction::AbortMultipartUpload => "s3:AbortMultipartUpload",
S3Action::InitiateMultipartUpload => "s3:PutObject", StorageAction::InitiateMultipartUpload => "s3:PutObject",
S3Action::UploadPart => "s3:PutObject", StorageAction::UploadPart => "s3:PutObject",
S3Action::CompleteMultipartUpload => "s3:PutObject", StorageAction::CompleteMultipartUpload => "s3:PutObject",
S3Action::GetBucketPolicy => "s3:GetBucketPolicy", StorageAction::GetBucketPolicy => "s3:GetBucketPolicy",
S3Action::PutBucketPolicy => "s3:PutBucketPolicy", StorageAction::PutBucketPolicy => "s3:PutBucketPolicy",
S3Action::DeleteBucketPolicy => "s3:DeleteBucketPolicy", StorageAction::DeleteBucketPolicy => "s3:DeleteBucketPolicy",
} }
} }
} }
@@ -54,7 +54,7 @@ impl S3Action {
/// Context extracted from a request, used for policy evaluation. /// Context extracted from a request, used for policy evaluation.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct RequestContext { pub struct RequestContext {
pub action: S3Action, pub action: StorageAction,
pub bucket: Option<String>, pub bucket: Option<String>,
pub key: Option<String>, pub key: Option<String>,
} }
@@ -70,7 +70,7 @@ impl RequestContext {
} }
} }
/// Resolve the S3 action from an incoming HTTP request. /// Resolve the storage action from an incoming HTTP request.
pub fn resolve_action(req: &Request<Incoming>) -> RequestContext { pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
let method = req.method().clone(); let method = req.method().clone();
let path = req.uri().path().to_string(); let path = req.uri().path().to_string();
@@ -87,7 +87,7 @@ pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
0 => { 0 => {
// Root: GET / -> ListBuckets // Root: GET / -> ListBuckets
RequestContext { RequestContext {
action: S3Action::ListAllMyBuckets, action: StorageAction::ListAllMyBuckets,
bucket: None, bucket: None,
key: None, key: None,
} }
@@ -98,15 +98,15 @@ pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
let has_uploads = query.contains_key("uploads"); let has_uploads = query.contains_key("uploads");
let action = match (&method, has_policy, has_uploads) { let action = match (&method, has_policy, has_uploads) {
(&Method::GET, true, _) => S3Action::GetBucketPolicy, (&Method::GET, true, _) => StorageAction::GetBucketPolicy,
(&Method::PUT, true, _) => S3Action::PutBucketPolicy, (&Method::PUT, true, _) => StorageAction::PutBucketPolicy,
(&Method::DELETE, true, _) => S3Action::DeleteBucketPolicy, (&Method::DELETE, true, _) => StorageAction::DeleteBucketPolicy,
(&Method::GET, _, true) => S3Action::ListBucketMultipartUploads, (&Method::GET, _, true) => StorageAction::ListBucketMultipartUploads,
(&Method::GET, _, _) => S3Action::ListBucket, (&Method::GET, _, _) => StorageAction::ListBucket,
(&Method::PUT, _, _) => S3Action::CreateBucket, (&Method::PUT, _, _) => StorageAction::CreateBucket,
(&Method::DELETE, _, _) => S3Action::DeleteBucket, (&Method::DELETE, _, _) => StorageAction::DeleteBucket,
(&Method::HEAD, _, _) => S3Action::HeadBucket, (&Method::HEAD, _, _) => StorageAction::HeadBucket,
_ => S3Action::ListBucket, _ => StorageAction::ListBucket,
}; };
RequestContext { RequestContext {
@@ -125,16 +125,16 @@ pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
let has_uploads = query.contains_key("uploads"); let has_uploads = query.contains_key("uploads");
let action = match &method { let action = match &method {
&Method::PUT if has_part_number && has_upload_id => S3Action::UploadPart, &Method::PUT if has_part_number && has_upload_id => StorageAction::UploadPart,
&Method::PUT if has_copy_source => S3Action::CopyObject, &Method::PUT if has_copy_source => StorageAction::CopyObject,
&Method::PUT => S3Action::PutObject, &Method::PUT => StorageAction::PutObject,
&Method::GET => S3Action::GetObject, &Method::GET => StorageAction::GetObject,
&Method::HEAD => S3Action::HeadObject, &Method::HEAD => StorageAction::HeadObject,
&Method::DELETE if has_upload_id => S3Action::AbortMultipartUpload, &Method::DELETE if has_upload_id => StorageAction::AbortMultipartUpload,
&Method::DELETE => S3Action::DeleteObject, &Method::DELETE => StorageAction::DeleteObject,
&Method::POST if has_uploads => S3Action::InitiateMultipartUpload, &Method::POST if has_uploads => StorageAction::InitiateMultipartUpload,
&Method::POST if has_upload_id => S3Action::CompleteMultipartUpload, &Method::POST if has_upload_id => StorageAction::CompleteMultipartUpload,
_ => S3Action::GetObject, _ => StorageAction::GetObject,
}; };
RequestContext { RequestContext {
@@ -144,7 +144,7 @@ pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
} }
} }
_ => RequestContext { _ => RequestContext {
action: S3Action::ListAllMyBuckets, action: StorageAction::ListAllMyBuckets,
bucket: None, bucket: None,
key: None, key: None,
}, },

View File

@@ -4,8 +4,8 @@ use hyper::Request;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::collections::HashMap; use std::collections::HashMap;
use crate::config::{Credential, S3Config}; use crate::config::{Credential, SmartStorageConfig};
use crate::s3_error::S3Error; use crate::error::StorageError;
type HmacSha256 = Hmac<Sha256>; type HmacSha256 = Hmac<Sha256>;
@@ -27,8 +27,8 @@ struct SigV4Header {
/// Verify the request's SigV4 signature. Returns the caller identity on success. /// Verify the request's SigV4 signature. Returns the caller identity on success.
pub fn verify_request( pub fn verify_request(
req: &Request<Incoming>, req: &Request<Incoming>,
config: &S3Config, config: &SmartStorageConfig,
) -> Result<AuthenticatedIdentity, S3Error> { ) -> Result<AuthenticatedIdentity, StorageError> {
let auth_header = req let auth_header = req
.headers() .headers()
.get("authorization") .get("authorization")
@@ -37,18 +37,18 @@ pub fn verify_request(
// Reject SigV2 // Reject SigV2
if auth_header.starts_with("AWS ") { if auth_header.starts_with("AWS ") {
return Err(S3Error::authorization_header_malformed()); return Err(StorageError::authorization_header_malformed());
} }
if !auth_header.starts_with("AWS4-HMAC-SHA256") { if !auth_header.starts_with("AWS4-HMAC-SHA256") {
return Err(S3Error::authorization_header_malformed()); return Err(StorageError::authorization_header_malformed());
} }
let parsed = parse_auth_header(auth_header)?; let parsed = parse_auth_header(auth_header)?;
// Look up credential // Look up credential
let credential = find_credential(&parsed.access_key_id, config) let credential = find_credential(&parsed.access_key_id, config)
.ok_or_else(S3Error::invalid_access_key_id)?; .ok_or_else(StorageError::invalid_access_key_id)?;
// Get x-amz-date // Get x-amz-date
let amz_date = req let amz_date = req
@@ -60,7 +60,7 @@ pub fn verify_request(
.get("date") .get("date")
.and_then(|v| v.to_str().ok()) .and_then(|v| v.to_str().ok())
}) })
.ok_or_else(|| S3Error::missing_security_header("Missing x-amz-date header"))?; .ok_or_else(|| StorageError::missing_security_header("Missing x-amz-date header"))?;
// Enforce 15-min clock skew // Enforce 15-min clock skew
check_clock_skew(amz_date)?; check_clock_skew(amz_date)?;
@@ -99,7 +99,7 @@ pub fn verify_request(
// Constant-time comparison // Constant-time comparison
if !constant_time_eq(computed_hex.as_bytes(), parsed.signature.as_bytes()) { if !constant_time_eq(computed_hex.as_bytes(), parsed.signature.as_bytes()) {
return Err(S3Error::signature_does_not_match()); return Err(StorageError::signature_does_not_match());
} }
Ok(AuthenticatedIdentity { Ok(AuthenticatedIdentity {
@@ -108,11 +108,11 @@ pub fn verify_request(
} }
/// Parse the Authorization header into its components. /// Parse the Authorization header into its components.
fn parse_auth_header(header: &str) -> Result<SigV4Header, S3Error> { fn parse_auth_header(header: &str) -> Result<SigV4Header, StorageError> {
// Format: AWS4-HMAC-SHA256 Credential=KEY/YYYYMMDD/region/s3/aws4_request, SignedHeaders=h1;h2, Signature=hex // Format: AWS4-HMAC-SHA256 Credential=KEY/YYYYMMDD/region/s3/aws4_request, SignedHeaders=h1;h2, Signature=hex
let after_algo = header let after_algo = header
.strip_prefix("AWS4-HMAC-SHA256") .strip_prefix("AWS4-HMAC-SHA256")
.ok_or_else(S3Error::authorization_header_malformed)? .ok_or_else(StorageError::authorization_header_malformed)?
.trim(); .trim();
let mut credential_str = None; let mut credential_str = None;
@@ -131,17 +131,17 @@ fn parse_auth_header(header: &str) -> Result<SigV4Header, S3Error> {
} }
let credential_str = credential_str let credential_str = credential_str
.ok_or_else(S3Error::authorization_header_malformed)?; .ok_or_else(StorageError::authorization_header_malformed)?;
let signed_headers_str = signed_headers_str let signed_headers_str = signed_headers_str
.ok_or_else(S3Error::authorization_header_malformed)?; .ok_or_else(StorageError::authorization_header_malformed)?;
let signature = signature_str let signature = signature_str
.ok_or_else(S3Error::authorization_header_malformed)? .ok_or_else(StorageError::authorization_header_malformed)?
.to_string(); .to_string();
// Parse credential: KEY/YYYYMMDD/region/s3/aws4_request // Parse credential: KEY/YYYYMMDD/region/s3/aws4_request
let cred_parts: Vec<&str> = credential_str.splitn(5, '/').collect(); let cred_parts: Vec<&str> = credential_str.splitn(5, '/').collect();
if cred_parts.len() < 5 { if cred_parts.len() < 5 {
return Err(S3Error::authorization_header_malformed()); return Err(StorageError::authorization_header_malformed());
} }
let access_key_id = cred_parts[0].to_string(); let access_key_id = cred_parts[0].to_string();
@@ -163,7 +163,7 @@ fn parse_auth_header(header: &str) -> Result<SigV4Header, S3Error> {
} }
/// Find a credential by access key ID. /// Find a credential by access key ID.
fn find_credential<'a>(access_key_id: &str, config: &'a S3Config) -> Option<&'a Credential> { fn find_credential<'a>(access_key_id: &str, config: &'a SmartStorageConfig) -> Option<&'a Credential> {
config config
.auth .auth
.credentials .credentials
@@ -172,17 +172,17 @@ fn find_credential<'a>(access_key_id: &str, config: &'a S3Config) -> Option<&'a
} }
/// Check clock skew (15 minutes max). /// Check clock skew (15 minutes max).
fn check_clock_skew(amz_date: &str) -> Result<(), S3Error> { fn check_clock_skew(amz_date: &str) -> Result<(), StorageError> {
// Parse ISO 8601 basic format: YYYYMMDDTHHMMSSZ // Parse ISO 8601 basic format: YYYYMMDDTHHMMSSZ
let parsed = chrono::NaiveDateTime::parse_from_str(amz_date, "%Y%m%dT%H%M%SZ") let parsed = chrono::NaiveDateTime::parse_from_str(amz_date, "%Y%m%dT%H%M%SZ")
.map_err(|_| S3Error::authorization_header_malformed())?; .map_err(|_| StorageError::authorization_header_malformed())?;
let request_time = chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(parsed, chrono::Utc); let request_time = chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(parsed, chrono::Utc);
let now = chrono::Utc::now(); let now = chrono::Utc::now();
let diff = (now - request_time).num_seconds().unsigned_abs(); let diff = (now - request_time).num_seconds().unsigned_abs();
if diff > 15 * 60 { if diff > 15 * 60 {
return Err(S3Error::request_time_too_skewed()); return Err(StorageError::request_time_too_skewed());
} }
Ok(()) Ok(())

View File

@@ -0,0 +1,95 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ClusterConfig {
pub enabled: bool,
#[serde(default)]
pub node_id: Option<String>,
#[serde(default = "default_quic_port")]
pub quic_port: u16,
#[serde(default)]
pub seed_nodes: Vec<String>,
#[serde(default)]
pub erasure: ErasureConfig,
#[serde(default)]
pub drives: DriveConfig,
#[serde(default = "default_heartbeat_interval")]
pub heartbeat_interval_ms: u64,
#[serde(default = "default_heartbeat_timeout")]
pub heartbeat_timeout_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ErasureConfig {
#[serde(default = "default_data_shards")]
pub data_shards: usize,
#[serde(default = "default_parity_shards")]
pub parity_shards: usize,
#[serde(default = "default_chunk_size")]
pub chunk_size_bytes: usize,
}
impl ErasureConfig {
pub fn total_shards(&self) -> usize {
self.data_shards + self.parity_shards
}
/// Minimum shards needed for a write to succeed (data_shards + 1)
pub fn write_quorum(&self) -> usize {
self.data_shards + 1
}
/// Minimum shards needed to reconstruct data
pub fn read_quorum(&self) -> usize {
self.data_shards
}
}
impl Default for ErasureConfig {
fn default() -> Self {
Self {
data_shards: default_data_shards(),
parity_shards: default_parity_shards(),
chunk_size_bytes: default_chunk_size(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DriveConfig {
#[serde(default)]
pub paths: Vec<String>,
}
impl Default for DriveConfig {
fn default() -> Self {
Self { paths: Vec::new() }
}
}
fn default_quic_port() -> u16 {
4000
}
fn default_heartbeat_interval() -> u64 {
5000
}
fn default_heartbeat_timeout() -> u64 {
30000
}
fn default_data_shards() -> usize {
4
}
fn default_parity_shards() -> usize {
2
}
fn default_chunk_size() -> usize {
4 * 1024 * 1024 // 4 MB
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,242 @@
use anyhow::Result;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use tokio::fs;
use super::config::DriveConfig;
// ============================
// Drive format (on-disk metadata)
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DriveFormat {
pub cluster_id: String,
pub erasure_set_id: u32,
pub drive_index_in_set: u32,
pub format_version: u32,
}
// ============================
// Drive state tracking
// ============================
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DriveStatus {
Online,
Degraded,
Offline,
Healing,
}
#[derive(Debug, Clone)]
pub struct DriveStats {
pub total_bytes: u64,
pub used_bytes: u64,
pub avg_write_latency_us: u64,
pub avg_read_latency_us: u64,
pub error_count: u64,
pub last_error: Option<String>,
pub last_check: DateTime<Utc>,
}
impl Default for DriveStats {
fn default() -> Self {
Self {
total_bytes: 0,
used_bytes: 0,
avg_write_latency_us: 0,
avg_read_latency_us: 0,
error_count: 0,
last_error: None,
last_check: Utc::now(),
}
}
}
#[derive(Debug)]
pub struct DriveState {
pub path: PathBuf,
pub format: Option<DriveFormat>,
pub status: DriveStatus,
pub stats: DriveStats,
}
// ============================
// Drive manager
// ============================
pub struct DriveManager {
drives: Vec<DriveState>,
}
impl DriveManager {
/// Initialize drive manager with configured drive paths.
pub async fn new(config: &DriveConfig) -> Result<Self> {
let mut drives = Vec::with_capacity(config.paths.len());
for path_str in &config.paths {
let path = PathBuf::from(path_str);
let storage_dir = path.join(".smartstorage");
// Ensure the drive directory exists
fs::create_dir_all(&storage_dir).await?;
// Try to read existing format
let format = Self::read_format(&storage_dir).await;
let status = if path.exists() {
DriveStatus::Online
} else {
DriveStatus::Offline
};
drives.push(DriveState {
path,
format,
status,
stats: DriveStats::default(),
});
}
Ok(Self { drives })
}
/// Format drives for a new cluster. Stamps each drive with cluster and erasure set info.
pub async fn format_drives(
&mut self,
cluster_id: &str,
erasure_set_assignments: &[(u32, u32)], // (erasure_set_id, drive_index_in_set)
) -> Result<()> {
if erasure_set_assignments.len() != self.drives.len() {
anyhow::bail!(
"Erasure set assignments count ({}) doesn't match drive count ({})",
erasure_set_assignments.len(),
self.drives.len()
);
}
for (drive, (set_id, drive_idx)) in
self.drives.iter_mut().zip(erasure_set_assignments.iter())
{
let format = DriveFormat {
cluster_id: cluster_id.to_string(),
erasure_set_id: *set_id,
drive_index_in_set: *drive_idx,
format_version: 1,
};
let storage_dir = drive.path.join(".smartstorage");
fs::create_dir_all(&storage_dir).await?;
let format_path = storage_dir.join("format.json");
let json = serde_json::to_string_pretty(&format)?;
fs::write(&format_path, json).await?;
drive.format = Some(format);
}
Ok(())
}
/// Get the number of drives managed.
pub fn drive_count(&self) -> usize {
self.drives.len()
}
/// Get a drive's state by index.
pub fn drive(&self, index: usize) -> Option<&DriveState> {
self.drives.get(index)
}
/// Get all drives.
pub fn drives(&self) -> &[DriveState] {
&self.drives
}
/// Get drives that are online.
pub fn online_drives(&self) -> Vec<usize> {
self.drives
.iter()
.enumerate()
.filter(|(_, d)| d.status == DriveStatus::Online)
.map(|(i, _)| i)
.collect()
}
/// Check health of a specific drive by writing and reading a probe file.
pub async fn check_drive_health(&mut self, index: usize) -> Result<DriveStatus> {
let drive = self
.drives
.get_mut(index)
.ok_or_else(|| anyhow::anyhow!("Drive index {} out of range", index))?;
let probe_path = drive.path.join(".smartstorage").join(".health_probe");
let start = std::time::Instant::now();
// Write probe
match fs::write(&probe_path, b"health_check").await {
Ok(()) => {}
Err(e) => {
drive.stats.error_count += 1;
drive.stats.last_error = Some(e.to_string());
drive.status = DriveStatus::Offline;
drive.stats.last_check = Utc::now();
return Ok(DriveStatus::Offline);
}
}
// Read probe
match fs::read(&probe_path).await {
Ok(_) => {}
Err(e) => {
drive.stats.error_count += 1;
drive.stats.last_error = Some(e.to_string());
drive.status = DriveStatus::Offline;
drive.stats.last_check = Utc::now();
return Ok(DriveStatus::Offline);
}
}
// Clean up probe
let _ = fs::remove_file(&probe_path).await;
let latency = start.elapsed();
drive.stats.avg_write_latency_us = latency.as_micros() as u64;
drive.stats.last_check = Utc::now();
// Mark degraded if latency is too high (>5 seconds)
if latency.as_secs() > 5 {
drive.status = DriveStatus::Degraded;
} else {
drive.status = DriveStatus::Online;
}
Ok(drive.status.clone())
}
/// Run health checks on all drives.
pub async fn check_all_drives(&mut self) -> Vec<(usize, DriveStatus)> {
let mut results = Vec::new();
let count = self.drives.len();
for i in 0..count {
match self.check_drive_health(i).await {
Ok(status) => results.push((i, status)),
Err(e) => {
tracing::error!(drive = i, error = %e, "Drive health check failed");
results.push((i, DriveStatus::Offline));
}
}
}
results
}
// Internal helpers
async fn read_format(storage_dir: &Path) -> Option<DriveFormat> {
let format_path = storage_dir.join("format.json");
let content = fs::read_to_string(&format_path).await.ok()?;
serde_json::from_str(&content).ok()
}
}

246
rust/src/cluster/erasure.rs Normal file
View File

@@ -0,0 +1,246 @@
use anyhow::Result;
use reed_solomon_erasure::galois_8::ReedSolomon;
use super::config::ErasureConfig;
/// Erasure coder that splits data into data+parity shards using Reed-Solomon.
///
/// Objects are processed in fixed-size chunks (stripes). Each chunk is independently
/// erasure-coded, enabling streaming encode/decode without buffering entire objects.
pub struct ErasureCoder {
rs: ReedSolomon,
config: ErasureConfig,
}
impl ErasureCoder {
pub fn new(config: &ErasureConfig) -> Result<Self> {
let rs = ReedSolomon::new(config.data_shards, config.parity_shards)
.map_err(|e| anyhow::anyhow!("Failed to create Reed-Solomon encoder: {:?}", e))?;
Ok(Self {
rs,
config: config.clone(),
})
}
pub fn config(&self) -> &ErasureConfig {
&self.config
}
/// Encode a single chunk of data into data+parity shards.
///
/// The input data is split into `data_shards` equal-size pieces (padded if needed),
/// then `parity_shards` parity pieces are computed.
///
/// Returns a Vec of length `data_shards + parity_shards`, where:
/// - indices 0..data_shards are data shards
/// - indices data_shards..total are parity shards
pub fn encode_chunk(&self, data: &[u8]) -> Result<Vec<Vec<u8>>> {
let k = self.config.data_shards;
let m = self.config.parity_shards;
// Compute shard size: each data shard holds ceil(data_len / k) bytes
let shard_size = (data.len() + k - 1) / k;
if shard_size == 0 {
anyhow::bail!("Cannot encode empty data");
}
// Pad input to fill exactly k shards
let mut padded = data.to_vec();
padded.resize(shard_size * k, 0);
// Split into k data shards
let mut shards: Vec<Vec<u8>> = padded.chunks(shard_size).map(|c| c.to_vec()).collect();
// Add m empty parity shards
for _ in 0..m {
shards.push(vec![0u8; shard_size]);
}
// Compute parity in-place
self.rs
.encode(&mut shards)
.map_err(|e| anyhow::anyhow!("Reed-Solomon encoding failed: {:?}", e))?;
Ok(shards)
}
/// Decode (reconstruct) original data from a partial set of shards.
///
/// `shards` must have length == total_shards (data + parity).
/// At least `data_shards` entries must be `Some`. Missing shards are `None`.
/// `original_size` is the original data size before padding, used to truncate.
///
/// Returns the reconstructed original data.
pub fn decode_chunk(
&self,
shards: &mut Vec<Option<Vec<u8>>>,
original_size: usize,
) -> Result<Vec<u8>> {
let k = self.config.data_shards;
let total = self.config.total_shards();
if shards.len() != total {
anyhow::bail!(
"Expected {} shards, got {}",
total,
shards.len()
);
}
let available = shards.iter().filter(|s| s.is_some()).count();
if available < k {
anyhow::bail!(
"Need at least {} shards for reconstruction, only {} available",
k,
available
);
}
// Reconstruct missing shards
self.rs
.reconstruct(shards)
.map_err(|e| anyhow::anyhow!("Reed-Solomon reconstruction failed: {:?}", e))?;
// Concatenate data shards (first k) and truncate to original size
let mut result = Vec::with_capacity(original_size);
for i in 0..k {
if let Some(ref shard) = shards[i] {
result.extend_from_slice(shard);
} else {
anyhow::bail!("Data shard {} missing after reconstruction", i);
}
}
result.truncate(original_size);
Ok(result)
}
/// Verify that all shards are consistent (no corruption).
pub fn verify(&self, shards: &[Vec<u8>]) -> Result<bool> {
let shard_refs: Vec<&[u8]> = shards.iter().map(|s| s.as_slice()).collect();
self.rs
.verify(&shard_refs)
.map_err(|e| anyhow::anyhow!("Reed-Solomon verification failed: {:?}", e))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_config() -> ErasureConfig {
ErasureConfig {
data_shards: 4,
parity_shards: 2,
chunk_size_bytes: 4 * 1024 * 1024,
}
}
#[test]
fn test_encode_decode_roundtrip() {
let coder = ErasureCoder::new(&test_config()).unwrap();
let original = b"Hello, erasure coding! This is a test of the Reed-Solomon implementation.";
let shards = coder.encode_chunk(original).unwrap();
assert_eq!(shards.len(), 6); // 4 data + 2 parity
// All shards should be the same size
let shard_size = shards[0].len();
for s in &shards {
assert_eq!(s.len(), shard_size);
}
// Reconstruct with all shards present
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
assert_eq!(&recovered, original);
}
#[test]
fn test_decode_with_missing_shards() {
let coder = ErasureCoder::new(&test_config()).unwrap();
let original = b"Testing reconstruction with missing shards - this should work with 4 of 6.";
let shards = coder.encode_chunk(original).unwrap();
// Remove 2 shards (the maximum we can tolerate with 2 parity)
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
shard_opts[1] = None; // Remove data shard 1
shard_opts[4] = None; // Remove parity shard 0
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
assert_eq!(&recovered, original);
}
#[test]
fn test_decode_with_too_many_missing() {
let coder = ErasureCoder::new(&test_config()).unwrap();
let original = b"This should fail with 3 missing shards.";
let shards = coder.encode_chunk(original).unwrap();
// Remove 3 shards (more than parity count of 2)
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
shard_opts[0] = None;
shard_opts[2] = None;
shard_opts[5] = None;
let result = coder.decode_chunk(&mut shard_opts, original.len());
assert!(result.is_err());
}
#[test]
fn test_encode_large_data() {
let coder = ErasureCoder::new(&test_config()).unwrap();
// 1 MB of data
let original: Vec<u8> = (0..1_000_000).map(|i| (i % 256) as u8).collect();
let shards = coder.encode_chunk(&original).unwrap();
assert_eq!(shards.len(), 6);
// Each shard should be ~250KB (1MB / 4 data shards, rounded up)
let expected_shard_size = (original.len() + 3) / 4;
assert_eq!(shards[0].len(), expected_shard_size);
// Verify roundtrip
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
assert_eq!(recovered, original);
}
#[test]
fn test_verify_shards() {
let coder = ErasureCoder::new(&test_config()).unwrap();
let original = b"Verify test data";
let shards = coder.encode_chunk(original).unwrap();
assert!(coder.verify(&shards).unwrap());
// Corrupt a shard
let mut corrupted = shards.clone();
corrupted[0][0] ^= 0xFF;
assert!(!coder.verify(&corrupted).unwrap());
}
#[test]
fn test_small_config() {
// Minimum viable: 2 data + 1 parity
let config = ErasureConfig {
data_shards: 2,
parity_shards: 1,
chunk_size_bytes: 1024,
};
let coder = ErasureCoder::new(&config).unwrap();
let original = b"Small config test";
let shards = coder.encode_chunk(original).unwrap();
assert_eq!(shards.len(), 3);
// Remove 1 shard
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
shard_opts[0] = None;
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
assert_eq!(&recovered, original);
}
}

344
rust/src/cluster/healing.rs Normal file
View File

@@ -0,0 +1,344 @@
use anyhow::Result;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use tokio::fs;
use super::config::ErasureConfig;
use super::erasure::ErasureCoder;
use super::metadata::ObjectManifest;
use super::shard_store::{ShardId, ShardStore};
use super::state::ClusterState;
/// Background healing service that scans for under-replicated shards
/// and reconstructs them.
pub struct HealingService {
state: Arc<ClusterState>,
erasure_coder: ErasureCoder,
local_shard_stores: Vec<Arc<ShardStore>>,
manifest_dir: PathBuf,
scan_interval: Duration,
}
impl HealingService {
pub fn new(
state: Arc<ClusterState>,
erasure_config: &ErasureConfig,
local_shard_stores: Vec<Arc<ShardStore>>,
manifest_dir: PathBuf,
scan_interval_hours: u64,
) -> Result<Self> {
Ok(Self {
state,
erasure_coder: ErasureCoder::new(erasure_config)?,
local_shard_stores,
manifest_dir,
scan_interval: Duration::from_secs(scan_interval_hours * 3600),
})
}
/// Run the healing loop as a background task.
pub async fn run(&self, mut shutdown: tokio::sync::watch::Receiver<bool>) {
let mut interval = tokio::time::interval(self.scan_interval);
// Skip the first immediate tick
interval.tick().await;
loop {
tokio::select! {
_ = interval.tick() => {
tracing::info!("Starting healing scan");
match self.heal_scan().await {
Ok(stats) => {
tracing::info!(
checked = stats.shards_checked,
healed = stats.shards_healed,
errors = stats.errors,
"Healing scan completed"
);
}
Err(e) => {
tracing::error!("Healing scan failed: {}", e);
}
}
}
_ = shutdown.changed() => {
tracing::info!("Healing service shutting down");
break;
}
}
}
}
/// Scan all manifests for shards on offline nodes, reconstruct and re-place them.
async fn heal_scan(&self) -> Result<HealStats> {
let mut stats = HealStats::default();
let offline_nodes = self.state.offline_nodes().await;
if offline_nodes.is_empty() {
tracing::debug!("No offline nodes, skipping heal scan");
return Ok(stats);
}
// Check that we have majority before healing (split-brain prevention)
if !self.state.has_majority().await {
tracing::warn!("No majority quorum, skipping heal to prevent split-brain");
return Ok(stats);
}
tracing::info!(
"Found {} offline nodes, scanning for affected shards",
offline_nodes.len()
);
// Iterate all bucket directories under manifest_dir
let mut bucket_entries = match fs::read_dir(&self.manifest_dir).await {
Ok(e) => e,
Err(_) => return Ok(stats),
};
while let Some(bucket_entry) = bucket_entries.next_entry().await? {
if !bucket_entry.metadata().await?.is_dir() {
continue;
}
let bucket_name = bucket_entry.file_name().to_string_lossy().to_string();
if bucket_name.starts_with('.') {
continue;
}
// Scan manifests in this bucket
self.heal_bucket(&bucket_name, &offline_nodes, &mut stats)
.await;
// Yield to avoid starving foreground I/O
tokio::task::yield_now().await;
}
Ok(stats)
}
async fn heal_bucket(
&self,
bucket: &str,
offline_nodes: &[String],
stats: &mut HealStats,
) {
let bucket_dir = self.manifest_dir.join(bucket);
let manifests = match self.collect_manifests(&bucket_dir).await {
Ok(m) => m,
Err(e) => {
tracing::warn!(bucket = bucket, error = %e, "Failed to list manifests");
stats.errors += 1;
return;
}
};
let local_id = self.state.local_node_id().to_string();
for manifest in &manifests {
for chunk in &manifest.chunks {
// Check if any shard in this chunk is on an offline node
let affected: Vec<_> = chunk
.shard_placements
.iter()
.filter(|p| offline_nodes.contains(&p.node_id))
.collect();
if affected.is_empty() {
continue;
}
stats.shards_checked += chunk.shard_placements.len() as u64;
// Try to reconstruct missing shards from available ones
let k = manifest.data_shards;
let total = manifest.data_shards + manifest.parity_shards;
// Count available shards (those NOT on offline nodes)
let available_count = chunk
.shard_placements
.iter()
.filter(|p| !offline_nodes.contains(&p.node_id))
.count();
if available_count < k {
tracing::error!(
bucket = manifest.bucket,
key = manifest.key,
chunk = chunk.chunk_index,
available = available_count,
needed = k,
"Cannot heal chunk: not enough available shards"
);
stats.errors += 1;
continue;
}
// Fetch available shards (only local ones for now)
let mut shards: Vec<Option<Vec<u8>>> = vec![None; total];
let mut fetched = 0usize;
for placement in &chunk.shard_placements {
if offline_nodes.contains(&placement.node_id) {
continue; // Skip offline nodes
}
if fetched >= k {
break;
}
if placement.node_id == local_id {
let shard_id = ShardId {
bucket: manifest.bucket.clone(),
key: manifest.key.clone(),
chunk_index: chunk.chunk_index,
shard_index: placement.shard_index,
};
let store_idx = placement.drive_id.parse::<usize>().unwrap_or(0);
if let Some(store) = self.local_shard_stores.get(store_idx) {
if let Ok((data, _)) = store.read_shard(&shard_id).await {
shards[placement.shard_index as usize] = Some(data);
fetched += 1;
}
}
}
// TODO: fetch from other online remote nodes
}
if fetched < k {
tracing::warn!(
bucket = manifest.bucket,
key = manifest.key,
chunk = chunk.chunk_index,
"Not enough local shards to heal, skipping"
);
continue;
}
// Reconstruct all shards
let reconstructed = match self.erasure_coder.decode_chunk(
&mut shards,
chunk.data_size,
) {
Ok(_) => true,
Err(e) => {
tracing::error!(
bucket = manifest.bucket,
key = manifest.key,
chunk = chunk.chunk_index,
error = %e,
"Reconstruction failed"
);
stats.errors += 1;
false
}
};
if !reconstructed {
continue;
}
// Re-encode to get all shards back (including the missing ones)
let full_data_size = chunk.data_size;
let mut data_buf = Vec::with_capacity(full_data_size);
for i in 0..k {
if let Some(ref shard) = shards[i] {
data_buf.extend_from_slice(shard);
}
}
data_buf.truncate(full_data_size);
let all_shards = match self.erasure_coder.encode_chunk(&data_buf) {
Ok(s) => s,
Err(e) => {
tracing::error!(error = %e, "Re-encoding for heal failed");
stats.errors += 1;
continue;
}
};
// Write the missing shards to the first available local drive
for affected_placement in &affected {
let shard_idx = affected_placement.shard_index as usize;
if shard_idx < all_shards.len() {
let shard_data = &all_shards[shard_idx];
let checksum = crc32c::crc32c(shard_data);
let shard_id = ShardId {
bucket: manifest.bucket.clone(),
key: manifest.key.clone(),
chunk_index: chunk.chunk_index,
shard_index: affected_placement.shard_index,
};
// Place on first available local drive
if let Some(store) = self.local_shard_stores.first() {
match store.write_shard(&shard_id, shard_data, checksum).await {
Ok(()) => {
stats.shards_healed += 1;
tracing::info!(
bucket = manifest.bucket,
key = manifest.key,
chunk = chunk.chunk_index,
shard = affected_placement.shard_index,
"Shard healed successfully"
);
}
Err(e) => {
tracing::error!(error = %e, "Failed to write healed shard");
stats.errors += 1;
}
}
}
}
}
tokio::task::yield_now().await;
}
}
}
/// Collect all manifests under a bucket directory.
async fn collect_manifests(&self, dir: &std::path::Path) -> Result<Vec<ObjectManifest>> {
let mut manifests = Vec::new();
self.collect_manifests_recursive(dir, &mut manifests).await?;
Ok(manifests)
}
fn collect_manifests_recursive<'a>(
&'a self,
dir: &'a std::path::Path,
manifests: &'a mut Vec<ObjectManifest>,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
Box::pin(async move {
let mut entries = match fs::read_dir(dir).await {
Ok(e) => e,
Err(_) => return Ok(()),
};
while let Some(entry) = entries.next_entry().await? {
let meta = entry.metadata().await?;
let name = entry.file_name().to_string_lossy().to_string();
if meta.is_dir() {
self.collect_manifests_recursive(&entry.path(), manifests)
.await?;
} else if name.ends_with(".manifest.json") {
if let Ok(content) = fs::read_to_string(entry.path()).await {
if let Ok(manifest) = serde_json::from_str::<ObjectManifest>(&content) {
manifests.push(manifest);
}
}
}
}
Ok(())
})
}
}
#[derive(Debug, Default)]
pub struct HealStats {
pub shards_checked: u64,
pub shards_healed: u64,
pub errors: u64,
}

View File

@@ -0,0 +1,226 @@
use anyhow::Result;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
use super::drive_manager::{DriveManager, DriveStatus};
use super::protocol::{
ClusterRequest, ClusterResponse, DriveStateInfo, HeartbeatMessage, JoinRequestMessage,
NodeInfo,
};
use super::quic_transport::QuicTransport;
use super::state::ClusterState;
/// Manages cluster membership: heartbeating, joining, failure detection.
pub struct MembershipManager {
state: Arc<ClusterState>,
transport: Arc<QuicTransport>,
heartbeat_interval: Duration,
local_node_info: NodeInfo,
drive_manager: Option<Arc<Mutex<DriveManager>>>,
}
impl MembershipManager {
pub fn new(
state: Arc<ClusterState>,
transport: Arc<QuicTransport>,
heartbeat_interval_ms: u64,
local_node_info: NodeInfo,
) -> Self {
Self {
state,
transport,
heartbeat_interval: Duration::from_millis(heartbeat_interval_ms),
local_node_info,
drive_manager: None,
}
}
/// Set the drive manager for health reporting in heartbeats.
pub fn with_drive_manager(mut self, dm: Arc<Mutex<DriveManager>>) -> Self {
self.drive_manager = Some(dm);
self
}
/// Join the cluster by contacting seed nodes.
/// Sends a JoinRequest to each seed node until one accepts.
pub async fn join_cluster(&self, seed_nodes: &[String]) -> Result<()> {
if seed_nodes.is_empty() {
tracing::info!("No seed nodes configured, starting as initial cluster node");
self.state.add_node(self.local_node_info.clone()).await;
return Ok(());
}
for seed in seed_nodes {
let addr: SocketAddr = match seed.parse() {
Ok(a) => a,
Err(e) => {
tracing::warn!("Invalid seed node address '{}': {}", seed, e);
continue;
}
};
tracing::info!("Attempting to join cluster via seed node {}", seed);
match self.try_join(addr).await {
Ok(()) => {
tracing::info!("Successfully joined cluster via {}", seed);
return Ok(());
}
Err(e) => {
tracing::warn!("Failed to join via {}: {}", seed, e);
}
}
}
// If no seed responded, start as a new cluster
tracing::info!("Could not reach any seed nodes, starting as initial cluster node");
self.state.add_node(self.local_node_info.clone()).await;
Ok(())
}
async fn try_join(&self, addr: SocketAddr) -> Result<()> {
let conn = self
.transport
.get_connection("seed", addr)
.await?;
let request = ClusterRequest::JoinRequest(JoinRequestMessage {
node_info: self.local_node_info.clone(),
});
let response = self.transport.send_request(&conn, &request).await?;
match response {
ClusterResponse::JoinResponse(join_resp) => {
if join_resp.accepted {
if let Some(topology) = &join_resp.topology {
self.state.apply_topology(topology).await;
// Also register self
self.state.add_node(self.local_node_info.clone()).await;
tracing::info!(
"Applied cluster topology (version {}, {} nodes, {} erasure sets)",
topology.version,
topology.nodes.len(),
topology.erasure_sets.len(),
);
}
Ok(())
} else {
anyhow::bail!(
"Join rejected: {}",
join_resp.error.unwrap_or_default()
)
}
}
ClusterResponse::Error(e) => {
anyhow::bail!("Join error: {} - {}", e.code, e.message)
}
_ => anyhow::bail!("Unexpected response to join request"),
}
}
/// Run the heartbeat loop. Sends heartbeats to all peers periodically.
pub async fn heartbeat_loop(self: Arc<Self>, mut shutdown: tokio::sync::watch::Receiver<bool>) {
let mut interval = tokio::time::interval(self.heartbeat_interval);
loop {
tokio::select! {
_ = interval.tick() => {
self.send_heartbeats().await;
}
_ = shutdown.changed() => break,
}
}
}
async fn send_heartbeats(&self) {
let peers = self.state.online_peers().await;
let topology_version = self.state.version().await;
let mut responded = Vec::new();
// Collect drive health states
let drive_states = self.collect_drive_states().await;
for peer in &peers {
let addr: SocketAddr = match peer.quic_addr.parse() {
Ok(a) => a,
Err(_) => continue,
};
let heartbeat = ClusterRequest::Heartbeat(HeartbeatMessage {
node_id: self.local_node_info.node_id.clone(),
timestamp: chrono::Utc::now().to_rfc3339(),
drive_states: drive_states.clone(),
topology_version,
});
match tokio::time::timeout(
Duration::from_secs(5),
self.send_heartbeat_to_peer(&peer.node_id, addr, &heartbeat),
)
.await
{
Ok(Ok(())) => {
responded.push(peer.node_id.clone());
}
Ok(Err(e)) => {
tracing::debug!(
peer = %peer.node_id,
error = %e,
"Heartbeat failed"
);
}
Err(_) => {
tracing::debug!(peer = %peer.node_id, "Heartbeat timed out");
}
}
}
// Update state based on responses
let status_changes = self.state.tick_heartbeats(&responded).await;
for (node_id, status) in &status_changes {
tracing::info!(node = %node_id, status = ?status, "Node status changed");
}
}
async fn send_heartbeat_to_peer(
&self,
node_id: &str,
addr: SocketAddr,
heartbeat: &ClusterRequest,
) -> Result<()> {
let conn = self.transport.get_connection(node_id, addr).await?;
let _response = self.transport.send_request(&conn, heartbeat).await?;
Ok(())
}
/// Collect drive health states from the DriveManager, if available.
async fn collect_drive_states(&self) -> Vec<DriveStateInfo> {
let dm = match &self.drive_manager {
Some(dm) => dm,
None => return Vec::new(),
};
let mut manager = dm.lock().await;
let results = manager.check_all_drives().await;
results
.into_iter()
.map(|(idx, status)| {
let status_str = match status {
DriveStatus::Online => "online",
DriveStatus::Degraded => "degraded",
DriveStatus::Offline => "offline",
DriveStatus::Healing => "healing",
};
DriveStateInfo {
drive_index: idx as u32,
status: status_str.to_string(),
}
})
.collect()
}
}

View File

@@ -0,0 +1,85 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Full manifest describing how an object is stored across erasure-coded shards.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ObjectManifest {
/// Bucket name
pub bucket: String,
/// Object key
pub key: String,
/// Unique version ID for this write
pub version_id: String,
/// Total object size in bytes
pub size: u64,
/// MD5 hex digest of the complete object
pub content_md5: String,
/// Content type
pub content_type: String,
/// User metadata (x-amz-meta-*, content-type, etc.)
pub metadata: HashMap<String, String>,
/// When the object was created
pub created_at: String,
/// Last modified timestamp
pub last_modified: String,
/// Number of data shards used
pub data_shards: usize,
/// Number of parity shards used
pub parity_shards: usize,
/// Chunk size in bytes (last chunk may be smaller)
pub chunk_size: usize,
/// Per-chunk shard placement info
pub chunks: Vec<ChunkManifest>,
}
/// Describes the shards for a single chunk of an object.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ChunkManifest {
/// Index of this chunk (0-based)
pub chunk_index: u32,
/// Actual data size of this chunk (before erasure coding)
pub data_size: usize,
/// Where each shard was placed
pub shard_placements: Vec<ShardPlacement>,
}
/// Describes where a specific shard is stored.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ShardPlacement {
/// Shard index within the erasure set (0..data_shards+parity_shards)
pub shard_index: u32,
/// Node that holds this shard
pub node_id: String,
/// Drive ID on that node
pub drive_id: String,
/// CRC32C checksum of the shard data
pub checksum: u32,
/// Size of the shard data in bytes
pub shard_size: usize,
}
/// Manifest for a multipart upload in progress.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MultipartUploadManifest {
pub upload_id: String,
pub bucket: String,
pub key: String,
pub initiated: String,
pub metadata: HashMap<String, String>,
/// Per-part manifests, keyed by part number.
pub parts: HashMap<u32, PartManifest>,
}
/// Manifest for a single part of a multipart upload.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct PartManifest {
pub part_number: u32,
pub size: u64,
pub md5: String,
pub chunks: Vec<ChunkManifest>,
}

12
rust/src/cluster/mod.rs Normal file
View File

@@ -0,0 +1,12 @@
pub mod config;
pub mod coordinator;
pub mod drive_manager;
pub mod erasure;
pub mod healing;
pub mod membership;
pub mod metadata;
pub mod placement;
pub mod protocol;
pub mod quic_transport;
pub mod shard_store;
pub mod state;

View File

@@ -0,0 +1,140 @@
use xxhash_rust::xxh64::xxh64;
/// Determines which erasure set an object belongs to, based on consistent hashing.
///
/// Uses xxhash64 of "{bucket}/{key}" to deterministically map objects to erasure sets.
/// This is stateless — any node can independently compute the placement.
pub fn erasure_set_for_object(bucket: &str, key: &str, num_erasure_sets: u32) -> u32 {
if num_erasure_sets == 0 {
return 0;
}
let hash_input = format!("{}/{}", bucket, key);
let hash = xxh64(hash_input.as_bytes(), 0);
(hash % num_erasure_sets as u64) as u32
}
/// Represents a drive location within the cluster topology.
#[derive(Debug, Clone)]
pub struct DriveLocation {
pub node_id: String,
pub drive_index: u32,
}
/// An erasure set: a fixed group of drives that together store one complete
/// set of shards for any object placed on them.
#[derive(Debug, Clone)]
pub struct ErasureSet {
pub set_id: u32,
/// Ordered drives: index = shard_index
pub drives: Vec<DriveLocation>,
}
/// Form erasure sets from the available drives across all nodes.
///
/// Interleaves drives from different nodes for fault isolation:
/// e.g., with 3 nodes x 4 drives and total_shards=6:
/// Set 0: N0-D0, N1-D0, N2-D0, N0-D1, N1-D1, N2-D1
/// Set 1: N0-D2, N1-D2, N2-D2, N0-D3, N1-D3, N2-D3
pub fn form_erasure_sets(
nodes: &[(String, u32)], // (node_id, drive_count)
total_shards: usize,
) -> Vec<ErasureSet> {
// Collect all drives as (node_id, drive_index), interleaved by node
let max_drives = nodes.iter().map(|(_, count)| *count).max().unwrap_or(0) as usize;
let mut all_drives: Vec<DriveLocation> = Vec::new();
for drive_idx in 0..max_drives {
for (node_id, drive_count) in nodes {
if (drive_idx as u32) < *drive_count {
all_drives.push(DriveLocation {
node_id: node_id.clone(),
drive_index: drive_idx as u32,
});
}
}
}
// Form sets of total_shards drives each
let num_sets = all_drives.len() / total_shards;
let mut sets = Vec::with_capacity(num_sets);
for set_idx in 0..num_sets {
let start = set_idx * total_shards;
let end = start + total_shards;
let drives = all_drives[start..end].to_vec();
sets.push(ErasureSet {
set_id: set_idx as u32,
drives,
});
}
sets
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erasure_set_assignment_deterministic() {
let set_a = erasure_set_for_object("mybucket", "mykey", 4);
let set_b = erasure_set_for_object("mybucket", "mykey", 4);
assert_eq!(set_a, set_b);
}
#[test]
fn test_erasure_set_distribution() {
// Check that objects are distributed across sets
let num_sets = 4u32;
let mut counts = vec![0u32; num_sets as usize];
for i in 0..1000 {
let key = format!("key-{}", i);
let set = erasure_set_for_object("bucket", &key, num_sets);
assert!(set < num_sets);
counts[set as usize] += 1;
}
// Each set should have some objects (not all in one set)
for count in &counts {
assert!(*count > 100, "Expected >100, got {}", count);
}
}
#[test]
fn test_form_erasure_sets_3x4() {
// 3 nodes, 4 drives each, 6 shards per set => 2 sets
let nodes = vec![
("node1".to_string(), 4),
("node2".to_string(), 4),
("node3".to_string(), 4),
];
let sets = form_erasure_sets(&nodes, 6);
assert_eq!(sets.len(), 2);
// Set 0 should interleave across nodes
let set0_nodes: Vec<&str> = sets[0].drives.iter().map(|d| d.node_id.as_str()).collect();
assert_eq!(set0_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
// Set 1 should also interleave
let set1_nodes: Vec<&str> = sets[1].drives.iter().map(|d| d.node_id.as_str()).collect();
assert_eq!(set1_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
// Drive indices should be different between sets
let set0_drives: Vec<u32> = sets[0].drives.iter().map(|d| d.drive_index).collect();
let set1_drives: Vec<u32> = sets[1].drives.iter().map(|d| d.drive_index).collect();
assert_eq!(set0_drives, vec![0, 0, 0, 1, 1, 1]);
assert_eq!(set1_drives, vec![2, 2, 2, 3, 3, 3]);
}
#[test]
fn test_form_erasure_sets_remainder() {
// 2 nodes, 3 drives each, 4 shards => 1 set (2 drives left over)
let nodes = vec![
("a".to_string(), 3),
("b".to_string(), 3),
];
let sets = form_erasure_sets(&nodes, 4);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].drives.len(), 4);
}
}

View File

@@ -0,0 +1,384 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use super::metadata::ObjectManifest;
/// All inter-node cluster messages, serialized with bincode over QUIC streams.
///
/// Each message type gets its own bidirectional QUIC stream.
/// For shard data transfers, the header is sent first (bincode),
/// then raw shard bytes follow on the same stream.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ClusterRequest {
// ============================
// Shard operations
// ============================
/// Write a shard to a specific drive on the target node.
/// Shard data follows after this header on the same stream.
ShardWrite(ShardWriteRequest),
/// Read a shard from the target node.
ShardRead(ShardReadRequest),
/// Delete a shard from the target node.
ShardDelete(ShardDeleteRequest),
/// Check if a shard exists and get its metadata.
ShardHead(ShardHeadRequest),
// ============================
// Manifest operations
// ============================
/// Store an object manifest on the target node.
ManifestWrite(ManifestWriteRequest),
/// Retrieve an object manifest from the target node.
ManifestRead(ManifestReadRequest),
/// Delete an object manifest from the target node.
ManifestDelete(ManifestDeleteRequest),
/// List all manifests for a bucket on the target node.
ManifestList(ManifestListRequest),
// ============================
// Cluster management
// ============================
/// Periodic heartbeat.
Heartbeat(HeartbeatMessage),
/// Request to join the cluster.
JoinRequest(JoinRequestMessage),
/// Synchronize cluster topology.
TopologySync(TopologySyncMessage),
// ============================
// Healing
// ============================
/// Request a shard to be reconstructed and placed on a target drive.
HealRequest(HealRequestMessage),
}
/// Responses to cluster requests.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ClusterResponse {
// Shard ops
ShardWriteAck(ShardWriteAck),
ShardReadResponse(ShardReadResponse),
ShardDeleteAck(ShardDeleteAck),
ShardHeadResponse(ShardHeadResponse),
// Manifest ops
ManifestWriteAck(ManifestWriteAck),
ManifestReadResponse(ManifestReadResponse),
ManifestDeleteAck(ManifestDeleteAck),
ManifestListResponse(ManifestListResponse),
// Cluster mgmt
HeartbeatAck(HeartbeatAckMessage),
JoinResponse(JoinResponseMessage),
TopologySyncAck(TopologySyncAckMessage),
// Healing
HealResponse(HealResponseMessage),
// Error
Error(ErrorResponse),
}
// ============================
// Shard operation messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardWriteRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
pub shard_data_length: u64,
pub checksum: u32, // crc32c of shard data
pub object_metadata: HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardWriteAck {
pub request_id: String,
pub success: bool,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardReadRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardReadResponse {
pub request_id: String,
pub found: bool,
pub shard_data_length: u64,
pub checksum: u32,
// Shard data follows on the stream after this header
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardDeleteRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardDeleteAck {
pub request_id: String,
pub success: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardHeadRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardHeadResponse {
pub request_id: String,
pub found: bool,
pub data_size: u64,
pub checksum: u32,
}
// ============================
// Manifest operation messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestWriteRequest {
pub request_id: String,
pub manifest: ObjectManifest,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestWriteAck {
pub request_id: String,
pub success: bool,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestReadRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestReadResponse {
pub request_id: String,
pub found: bool,
pub manifest: Option<ObjectManifest>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestDeleteRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestDeleteAck {
pub request_id: String,
pub success: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestListRequest {
pub request_id: String,
pub bucket: String,
pub prefix: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestListResponse {
pub request_id: String,
pub manifests: Vec<ObjectManifest>,
}
// ============================
// Cluster management messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DriveStateInfo {
pub drive_index: u32,
pub status: String, // "online", "degraded", "offline", "healing"
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeartbeatMessage {
pub node_id: String,
pub timestamp: String,
pub drive_states: Vec<DriveStateInfo>,
pub topology_version: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeartbeatAckMessage {
pub node_id: String,
pub timestamp: String,
pub topology_version: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeInfo {
pub node_id: String,
pub quic_addr: String,
pub s3_addr: String,
pub drive_count: u32,
pub status: String,
pub version: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JoinRequestMessage {
pub node_info: NodeInfo,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClusterTopology {
pub version: u64,
pub cluster_id: String,
pub nodes: Vec<NodeInfo>,
pub erasure_sets: Vec<ErasureSetInfo>,
pub data_shards: usize,
pub parity_shards: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ErasureSetInfo {
pub set_id: u32,
pub drives: Vec<DriveLocationInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DriveLocationInfo {
pub node_id: String,
pub drive_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JoinResponseMessage {
pub accepted: bool,
pub topology: Option<ClusterTopology>,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopologySyncMessage {
pub topology: ClusterTopology,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopologySyncAckMessage {
pub accepted: bool,
pub current_version: u64,
}
// ============================
// Healing messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealRequestMessage {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
pub target_node_id: String,
pub target_drive_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealResponseMessage {
pub request_id: String,
pub success: bool,
pub error: Option<String>,
}
// ============================
// Error response
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ErrorResponse {
pub request_id: String,
pub code: String,
pub message: String,
}
// ============================
// Wire format helpers
// ============================
/// Serialize a request to bincode bytes with a 4-byte length prefix.
pub fn encode_request(req: &ClusterRequest) -> anyhow::Result<Vec<u8>> {
let payload = bincode::serialize(req)?;
let mut buf = Vec::with_capacity(4 + payload.len());
buf.extend_from_slice(&(payload.len() as u32).to_le_bytes());
buf.extend_from_slice(&payload);
Ok(buf)
}
/// Serialize a response to bincode bytes with a 4-byte length prefix.
pub fn encode_response(resp: &ClusterResponse) -> anyhow::Result<Vec<u8>> {
let payload = bincode::serialize(resp)?;
let mut buf = Vec::with_capacity(4 + payload.len());
buf.extend_from_slice(&(payload.len() as u32).to_le_bytes());
buf.extend_from_slice(&payload);
Ok(buf)
}
/// Read a length-prefixed bincode message from raw bytes.
/// Returns (decoded message, bytes consumed).
pub fn decode_request(data: &[u8]) -> anyhow::Result<(ClusterRequest, usize)> {
if data.len() < 4 {
anyhow::bail!("Not enough data for length prefix");
}
let len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
if data.len() < 4 + len {
anyhow::bail!("Not enough data for message body");
}
let msg: ClusterRequest = bincode::deserialize(&data[4..4 + len])?;
Ok((msg, 4 + len))
}
/// Read a length-prefixed bincode response from raw bytes.
pub fn decode_response(data: &[u8]) -> anyhow::Result<(ClusterResponse, usize)> {
if data.len() < 4 {
anyhow::bail!("Not enough data for length prefix");
}
let len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
if data.len() < 4 + len {
anyhow::bail!("Not enough data for message body");
}
let msg: ClusterResponse = bincode::deserialize(&data[4..4 + len])?;
Ok((msg, 4 + len))
}

View File

@@ -0,0 +1,455 @@
use anyhow::Result;
use dashmap::DashMap;
use quinn::{ClientConfig, Endpoint, ServerConfig as QuinnServerConfig};
use rustls::pki_types::{CertificateDer, PrivateKeyDer, PrivatePkcs8KeyDer};
use std::net::SocketAddr;
use std::sync::Arc;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use super::protocol::{
self, ClusterRequest, ClusterResponse, ShardReadResponse, ShardWriteAck, ShardWriteRequest,
};
use super::shard_store::{ShardId, ShardStore};
/// QUIC transport layer for inter-node communication.
///
/// Manages a QUIC endpoint for both sending and receiving cluster messages.
/// Uses self-signed TLS certificates generated at init time.
/// Maintains a connection pool to peer nodes.
pub struct QuicTransport {
endpoint: Endpoint,
/// Cached connections to peer nodes: node_id -> Connection
connections: Arc<DashMap<String, quinn::Connection>>,
local_node_id: String,
}
impl QuicTransport {
/// Create a new QUIC transport, binding to the specified address.
pub async fn new(bind_addr: SocketAddr, local_node_id: String) -> Result<Self> {
let (server_config, client_config) = Self::generate_tls_configs()?;
let endpoint = Endpoint::server(server_config, bind_addr)?;
// Also configure the endpoint for client connections
let mut endpoint_client = endpoint.clone();
endpoint_client.set_default_client_config(client_config);
Ok(Self {
endpoint,
connections: Arc::new(DashMap::new()),
local_node_id,
})
}
/// Get or establish a connection to a peer node.
pub async fn get_connection(
&self,
node_id: &str,
addr: SocketAddr,
) -> Result<quinn::Connection> {
// Check cache first
if let Some(conn) = self.connections.get(node_id) {
if conn.close_reason().is_none() {
return Ok(conn.clone());
}
// Connection is closed, remove from cache
drop(conn);
self.connections.remove(node_id);
}
// Establish new connection
let conn = self
.endpoint
.connect(addr, "smartstorage")?
.await?;
self.connections
.insert(node_id.to_string(), conn.clone());
Ok(conn)
}
/// Send a cluster request and receive the response.
pub async fn send_request(
&self,
conn: &quinn::Connection,
request: &ClusterRequest,
) -> Result<ClusterResponse> {
let (mut send, mut recv) = conn.open_bi().await?;
// Encode and send request
let encoded = protocol::encode_request(request)?;
send.write_all(&encoded).await?;
send.finish()?;
// Read response
let response_data = recv.read_to_end(64 * 1024 * 1024).await?; // 64MB max
let (response, _) = protocol::decode_response(&response_data)?;
Ok(response)
}
/// Send a shard write request with streaming data.
///
/// Sends the request header first, then streams the shard data bytes.
pub async fn send_shard_write(
&self,
conn: &quinn::Connection,
request: ShardWriteRequest,
shard_data: &[u8],
) -> Result<ShardWriteAck> {
let (mut send, mut recv) = conn.open_bi().await?;
// Send request header
let encoded = protocol::encode_request(&ClusterRequest::ShardWrite(request))?;
send.write_all(&encoded).await?;
// Stream shard data
send.write_all(shard_data).await?;
send.finish()?;
// Read ack
let response_data = recv.read_to_end(1024).await?;
let (response, _) = protocol::decode_response(&response_data)?;
match response {
ClusterResponse::ShardWriteAck(ack) => Ok(ack),
ClusterResponse::Error(e) => {
anyhow::bail!("Shard write error: {} - {}", e.code, e.message)
}
other => anyhow::bail!("Unexpected response to shard write: {:?}", other),
}
}
/// Send a shard read request and receive the shard data.
///
/// Returns (shard_data, checksum).
pub async fn send_shard_read(
&self,
conn: &quinn::Connection,
request: &ClusterRequest,
) -> Result<Option<(Vec<u8>, u32)>> {
let (mut send, mut recv) = conn.open_bi().await?;
// Send request
let encoded = protocol::encode_request(request)?;
send.write_all(&encoded).await?;
send.finish()?;
// Read response header
let mut header_len_buf = [0u8; 4];
recv.read_exact(&mut header_len_buf).await?;
let header_len = u32::from_le_bytes(header_len_buf) as usize;
let mut header_buf = vec![0u8; header_len];
recv.read_exact(&mut header_buf).await?;
let response: ClusterResponse = bincode::deserialize(&header_buf)?;
match response {
ClusterResponse::ShardReadResponse(read_resp) => {
if !read_resp.found {
return Ok(None);
}
// Read shard data that follows
let mut shard_data = vec![0u8; read_resp.shard_data_length as usize];
recv.read_exact(&mut shard_data).await?;
Ok(Some((shard_data, read_resp.checksum)))
}
ClusterResponse::Error(e) => {
anyhow::bail!("Shard read error: {} - {}", e.code, e.message)
}
other => anyhow::bail!("Unexpected response to shard read: {:?}", other),
}
}
/// Accept incoming connections and dispatch to the handler.
pub async fn accept_loop(
self: Arc<Self>,
shard_store: Arc<ShardStore>,
mut shutdown: tokio::sync::watch::Receiver<bool>,
) {
loop {
tokio::select! {
incoming = self.endpoint.accept() => {
match incoming {
Some(incoming_conn) => {
let transport = self.clone();
let store = shard_store.clone();
tokio::spawn(async move {
match incoming_conn.await {
Ok(conn) => {
transport.handle_connection(conn, store).await;
}
Err(e) => {
tracing::error!("Failed to accept QUIC connection: {}", e);
}
}
});
}
None => break,
}
}
_ = shutdown.changed() => break,
}
}
}
/// Handle a single QUIC connection (may have multiple streams).
async fn handle_connection(
&self,
conn: quinn::Connection,
shard_store: Arc<ShardStore>,
) {
loop {
match conn.accept_bi().await {
Ok((send, recv)) => {
let store = shard_store.clone();
tokio::spawn(async move {
if let Err(e) = Self::handle_stream(send, recv, store).await {
tracing::error!("Stream handler error: {}", e);
}
});
}
Err(quinn::ConnectionError::ApplicationClosed(_)) => break,
Err(e) => {
tracing::error!("Connection error: {}", e);
break;
}
}
}
}
/// Handle a single bidirectional stream (one request-response exchange).
async fn handle_stream(
mut send: quinn::SendStream,
mut recv: quinn::RecvStream,
shard_store: Arc<ShardStore>,
) -> Result<()> {
// Read the length-prefixed request header
let mut len_buf = [0u8; 4];
recv.read_exact(&mut len_buf).await?;
let msg_len = u32::from_le_bytes(len_buf) as usize;
let mut msg_buf = vec![0u8; msg_len];
recv.read_exact(&mut msg_buf).await?;
let request: ClusterRequest = bincode::deserialize(&msg_buf)?;
match request {
ClusterRequest::ShardWrite(write_req) => {
// Read shard data from the stream
let mut shard_data = vec![0u8; write_req.shard_data_length as usize];
recv.read_exact(&mut shard_data).await?;
let shard_id = ShardId {
bucket: write_req.bucket,
key: write_req.key,
chunk_index: write_req.chunk_index,
shard_index: write_req.shard_index,
};
let result = shard_store
.write_shard(&shard_id, &shard_data, write_req.checksum)
.await;
let ack = ShardWriteAck {
request_id: write_req.request_id,
success: result.is_ok(),
error: result.err().map(|e| e.to_string()),
};
let response = protocol::encode_response(&ClusterResponse::ShardWriteAck(ack))?;
send.write_all(&response).await?;
send.finish()?;
}
ClusterRequest::ShardRead(read_req) => {
let shard_id = ShardId {
bucket: read_req.bucket,
key: read_req.key,
chunk_index: read_req.chunk_index,
shard_index: read_req.shard_index,
};
match shard_store.read_shard(&shard_id).await {
Ok((data, checksum)) => {
let header = ShardReadResponse {
request_id: read_req.request_id,
found: true,
shard_data_length: data.len() as u64,
checksum,
};
// Send header
let header_bytes = bincode::serialize(&ClusterResponse::ShardReadResponse(header))?;
send.write_all(&(header_bytes.len() as u32).to_le_bytes()).await?;
send.write_all(&header_bytes).await?;
// Send shard data
send.write_all(&data).await?;
send.finish()?;
}
Err(_) => {
let header = ShardReadResponse {
request_id: read_req.request_id,
found: false,
shard_data_length: 0,
checksum: 0,
};
let header_bytes = bincode::serialize(&ClusterResponse::ShardReadResponse(header))?;
send.write_all(&(header_bytes.len() as u32).to_le_bytes()).await?;
send.write_all(&header_bytes).await?;
send.finish()?;
}
}
}
ClusterRequest::ShardDelete(del_req) => {
let shard_id = ShardId {
bucket: del_req.bucket,
key: del_req.key,
chunk_index: del_req.chunk_index,
shard_index: del_req.shard_index,
};
let result = shard_store.delete_shard(&shard_id).await;
let ack = protocol::ClusterResponse::ShardDeleteAck(protocol::ShardDeleteAck {
request_id: del_req.request_id,
success: result.is_ok(),
});
let response = protocol::encode_response(&ack)?;
send.write_all(&response).await?;
send.finish()?;
}
ClusterRequest::ShardHead(head_req) => {
let shard_id = ShardId {
bucket: head_req.bucket,
key: head_req.key,
chunk_index: head_req.chunk_index,
shard_index: head_req.shard_index,
};
let resp = match shard_store.head_shard(&shard_id).await {
Ok(Some(meta)) => protocol::ShardHeadResponse {
request_id: head_req.request_id,
found: true,
data_size: meta.data_size,
checksum: meta.checksum,
},
_ => protocol::ShardHeadResponse {
request_id: head_req.request_id,
found: false,
data_size: 0,
checksum: 0,
},
};
let response =
protocol::encode_response(&ClusterResponse::ShardHeadResponse(resp))?;
send.write_all(&response).await?;
send.finish()?;
}
// Heartbeat, Join, TopologySync, Heal, and Manifest operations
// will be handled by the membership and coordinator modules.
// For now, send a generic ack.
_ => {
let response_data = recv.read_to_end(0).await.unwrap_or_default();
drop(response_data);
let err = protocol::ErrorResponse {
request_id: String::new(),
code: "NotImplemented".to_string(),
message: "This cluster operation is not yet implemented".to_string(),
};
let response = protocol::encode_response(&ClusterResponse::Error(err))?;
send.write_all(&response).await?;
send.finish()?;
}
}
Ok(())
}
/// Generate self-signed TLS certificates for cluster-internal communication.
fn generate_tls_configs() -> Result<(QuinnServerConfig, ClientConfig)> {
// Generate self-signed certificate
let cert = rcgen::generate_simple_self_signed(vec!["smartstorage".to_string()])?;
let cert_der = CertificateDer::from(cert.cert);
let key_der = PrivateKeyDer::Pkcs8(PrivatePkcs8KeyDer::from(cert.key_pair.serialize_der()));
// Server config
let mut server_crypto = rustls::ServerConfig::builder()
.with_no_client_auth()
.with_single_cert(vec![cert_der.clone()], key_der.clone_key())?;
server_crypto.alpn_protocols = vec![b"smartstorage".to_vec()];
let server_config = QuinnServerConfig::with_crypto(Arc::new(
quinn::crypto::rustls::QuicServerConfig::try_from(server_crypto)?,
));
// Client config: skip server certificate verification (cluster-internal)
let mut client_crypto = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(SkipServerVerification))
.with_no_client_auth();
client_crypto.alpn_protocols = vec![b"smartstorage".to_vec()];
let client_config = ClientConfig::new(Arc::new(
quinn::crypto::rustls::QuicClientConfig::try_from(client_crypto)?,
));
Ok((server_config, client_config))
}
/// Close the QUIC endpoint gracefully.
pub fn close(&self) {
self.endpoint
.close(quinn::VarInt::from_u32(0), b"shutdown");
}
/// Get the local node ID.
pub fn local_node_id(&self) -> &str {
&self.local_node_id
}
}
/// Certificate verifier that skips verification (for cluster-internal self-signed certs).
#[derive(Debug)]
struct SkipServerVerification;
impl rustls::client::danger::ServerCertVerifier for SkipServerVerification {
fn verify_server_cert(
&self,
_end_entity: &CertificateDer<'_>,
_intermediates: &[CertificateDer<'_>],
_server_name: &rustls::pki_types::ServerName<'_>,
_ocsp_response: &[u8],
_now: rustls::pki_types::UnixTime,
) -> Result<rustls::client::danger::ServerCertVerified, rustls::Error> {
Ok(rustls::client::danger::ServerCertVerified::assertion())
}
fn verify_tls12_signature(
&self,
_message: &[u8],
_cert: &CertificateDer<'_>,
_dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
}
fn verify_tls13_signature(
&self,
_message: &[u8],
_cert: &CertificateDer<'_>,
_dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
}
fn supported_verify_schemes(&self) -> Vec<rustls::SignatureScheme> {
vec![
rustls::SignatureScheme::RSA_PKCS1_SHA256,
rustls::SignatureScheme::RSA_PKCS1_SHA384,
rustls::SignatureScheme::RSA_PKCS1_SHA512,
rustls::SignatureScheme::ECDSA_NISTP256_SHA256,
rustls::SignatureScheme::ECDSA_NISTP384_SHA384,
rustls::SignatureScheme::ED25519,
rustls::SignatureScheme::RSA_PSS_SHA256,
rustls::SignatureScheme::RSA_PSS_SHA384,
rustls::SignatureScheme::RSA_PSS_SHA512,
]
}
}

View File

@@ -0,0 +1,226 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use tokio::fs;
use tokio::io::AsyncWriteExt;
/// Identifies a specific shard on disk.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct ShardId {
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
/// Per-shard metadata stored alongside shard data.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardMeta {
pub shard_index: u32,
pub chunk_index: u32,
pub data_size: u64,
pub checksum: u32, // crc32c
}
/// Manages shard storage on a single drive.
///
/// Layout on disk:
/// ```text
/// {base_path}/.smartstorage/data/{bucket}/{key_prefix}/{key}/
/// chunk-{N}/shard-{M}.dat (shard data)
/// chunk-{N}/shard-{M}.meta (shard metadata JSON)
/// ```
pub struct ShardStore {
base_path: PathBuf,
}
impl ShardStore {
pub fn new(base_path: PathBuf) -> Self {
Self { base_path }
}
/// Write a shard to disk atomically (write to temp file, then rename).
pub async fn write_shard(
&self,
shard_id: &ShardId,
data: &[u8],
checksum: u32,
) -> Result<()> {
let shard_path = self.shard_data_path(shard_id);
let meta_path = self.shard_meta_path(shard_id);
// Ensure parent directory exists
if let Some(parent) = shard_path.parent() {
fs::create_dir_all(parent).await?;
}
// Write data atomically via temp file + rename
let temp_data_path = shard_path.with_extension("dat.tmp");
{
let mut file = fs::File::create(&temp_data_path).await?;
file.write_all(data).await?;
file.flush().await?;
file.sync_all().await?;
}
fs::rename(&temp_data_path, &shard_path).await?;
// Write metadata
let meta = ShardMeta {
shard_index: shard_id.shard_index,
chunk_index: shard_id.chunk_index,
data_size: data.len() as u64,
checksum,
};
let meta_json = serde_json::to_string(&meta)?;
let temp_meta_path = meta_path.with_extension("meta.tmp");
fs::write(&temp_meta_path, meta_json).await?;
fs::rename(&temp_meta_path, &meta_path).await?;
Ok(())
}
/// Read a shard's data from disk.
pub async fn read_shard(&self, shard_id: &ShardId) -> Result<(Vec<u8>, u32)> {
let shard_path = self.shard_data_path(shard_id);
let meta_path = self.shard_meta_path(shard_id);
let data = fs::read(&shard_path).await?;
let meta_json = fs::read_to_string(&meta_path).await?;
let meta: ShardMeta = serde_json::from_str(&meta_json)?;
Ok((data, meta.checksum))
}
/// Check if a shard exists and return its metadata.
pub async fn head_shard(&self, shard_id: &ShardId) -> Result<Option<ShardMeta>> {
let meta_path = self.shard_meta_path(shard_id);
if !meta_path.exists() {
return Ok(None);
}
let meta_json = fs::read_to_string(&meta_path).await?;
let meta: ShardMeta = serde_json::from_str(&meta_json)?;
Ok(Some(meta))
}
/// Delete a shard and its metadata.
pub async fn delete_shard(&self, shard_id: &ShardId) -> Result<()> {
let shard_path = self.shard_data_path(shard_id);
let meta_path = self.shard_meta_path(shard_id);
let _ = fs::remove_file(&shard_path).await;
let _ = fs::remove_file(&meta_path).await;
// Clean up empty parent directories
self.cleanup_empty_dirs(shard_id).await;
Ok(())
}
/// List all shard IDs for a given bucket and key (across all chunks).
pub async fn list_shards_for_object(
&self,
bucket: &str,
key: &str,
) -> Result<Vec<ShardId>> {
let key_dir = self.key_dir(bucket, key);
if !key_dir.exists() {
return Ok(Vec::new());
}
let mut result = Vec::new();
let mut entries = fs::read_dir(&key_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let name = entry.file_name().to_string_lossy().to_string();
if !name.starts_with("chunk-") || !entry.metadata().await?.is_dir() {
continue;
}
let chunk_index: u32 = match name.strip_prefix("chunk-").and_then(|s| s.parse().ok()) {
Some(idx) => idx,
None => continue,
};
let mut chunk_entries = fs::read_dir(entry.path()).await?;
while let Some(shard_entry) = chunk_entries.next_entry().await? {
let shard_name = shard_entry.file_name().to_string_lossy().to_string();
if shard_name.starts_with("shard-") && shard_name.ends_with(".dat") {
let shard_index: u32 = match shard_name
.strip_prefix("shard-")
.and_then(|s| s.strip_suffix(".dat"))
.and_then(|s| s.parse().ok())
{
Some(idx) => idx,
None => continue,
};
result.push(ShardId {
bucket: bucket.to_string(),
key: key.to_string(),
chunk_index,
shard_index,
});
}
}
}
result.sort_by(|a, b| {
a.chunk_index
.cmp(&b.chunk_index)
.then(a.shard_index.cmp(&b.shard_index))
});
Ok(result)
}
// ============================
// Path helpers
// ============================
fn data_root(&self) -> PathBuf {
self.base_path.join(".smartstorage").join("data")
}
fn key_prefix(key: &str) -> String {
// Use first 2 hex chars of a simple hash for directory fan-out
let hash = xxhash_rust::xxh64::xxh64(key.as_bytes(), 0);
format!("{:02x}", hash & 0xFF)
}
fn key_dir(&self, bucket: &str, key: &str) -> PathBuf {
self.data_root()
.join(bucket)
.join(Self::key_prefix(key))
.join(key)
}
fn chunk_dir(&self, shard_id: &ShardId) -> PathBuf {
self.key_dir(&shard_id.bucket, &shard_id.key)
.join(format!("chunk-{}", shard_id.chunk_index))
}
fn shard_data_path(&self, shard_id: &ShardId) -> PathBuf {
self.chunk_dir(shard_id)
.join(format!("shard-{}.dat", shard_id.shard_index))
}
fn shard_meta_path(&self, shard_id: &ShardId) -> PathBuf {
self.chunk_dir(shard_id)
.join(format!("shard-{}.meta", shard_id.shard_index))
}
async fn cleanup_empty_dirs(&self, shard_id: &ShardId) {
// Try to remove chunk dir if empty
let chunk_dir = self.chunk_dir(shard_id);
let _ = fs::remove_dir(&chunk_dir).await; // fails silently if not empty
// Try to remove key dir if empty
let key_dir = self.key_dir(&shard_id.bucket, &shard_id.key);
let _ = fs::remove_dir(&key_dir).await;
// Try to remove prefix dir if empty
if let Some(prefix_dir) = key_dir.parent() {
let _ = fs::remove_dir(prefix_dir).await;
}
}
}

291
rust/src/cluster/state.rs Normal file
View File

@@ -0,0 +1,291 @@
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use super::placement::{DriveLocation, ErasureSet};
use super::protocol::{ClusterTopology, ErasureSetInfo, DriveLocationInfo, NodeInfo};
/// Node status for tracking liveness.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NodeStatus {
Online,
Suspect, // missed 2+ heartbeats
Offline, // missed 5+ heartbeats
}
/// Tracked state for a peer node.
#[derive(Debug, Clone)]
pub struct NodeState {
pub info: NodeInfo,
pub status: NodeStatus,
pub missed_heartbeats: u32,
pub last_heartbeat: chrono::DateTime<chrono::Utc>,
}
/// Shared cluster state, protected by RwLock for concurrent access.
pub struct ClusterState {
inner: Arc<RwLock<ClusterStateInner>>,
local_node_id: String,
}
struct ClusterStateInner {
cluster_id: String,
version: u64,
nodes: HashMap<String, NodeState>,
erasure_sets: Vec<ErasureSet>,
data_shards: usize,
parity_shards: usize,
}
impl ClusterState {
pub fn new(
local_node_id: String,
cluster_id: String,
data_shards: usize,
parity_shards: usize,
) -> Self {
Self {
inner: Arc::new(RwLock::new(ClusterStateInner {
cluster_id,
version: 0,
nodes: HashMap::new(),
erasure_sets: Vec::new(),
data_shards,
parity_shards,
})),
local_node_id,
}
}
pub fn local_node_id(&self) -> &str {
&self.local_node_id
}
/// Register a node in the cluster.
pub async fn add_node(&self, info: NodeInfo) {
let mut inner = self.inner.write().await;
let node_id = info.node_id.clone();
inner.nodes.insert(
node_id,
NodeState {
info,
status: NodeStatus::Online,
missed_heartbeats: 0,
last_heartbeat: chrono::Utc::now(),
},
);
inner.version += 1;
}
/// Remove a node from the cluster.
pub async fn remove_node(&self, node_id: &str) {
let mut inner = self.inner.write().await;
inner.nodes.remove(node_id);
inner.version += 1;
}
/// Update heartbeat for a node (reset missed count).
pub async fn record_heartbeat(&self, node_id: &str) {
let mut inner = self.inner.write().await;
if let Some(node) = inner.nodes.get_mut(node_id) {
node.missed_heartbeats = 0;
node.status = NodeStatus::Online;
node.last_heartbeat = chrono::Utc::now();
}
}
/// Increment missed heartbeat count for all nodes, updating status.
/// Called by the heartbeat checker when a round completes.
pub async fn tick_heartbeats(&self, responded_nodes: &[String]) -> Vec<(String, NodeStatus)> {
let mut inner = self.inner.write().await;
let mut status_changes = Vec::new();
for (node_id, node) in inner.nodes.iter_mut() {
if *node_id == self.local_node_id {
continue; // Don't track self
}
if responded_nodes.contains(node_id) {
node.missed_heartbeats = 0;
if node.status != NodeStatus::Online {
node.status = NodeStatus::Online;
status_changes.push((node_id.clone(), NodeStatus::Online));
}
} else {
node.missed_heartbeats += 1;
let new_status = if node.missed_heartbeats >= 5 {
NodeStatus::Offline
} else if node.missed_heartbeats >= 2 {
NodeStatus::Suspect
} else {
NodeStatus::Online
};
if new_status != node.status {
node.status = new_status.clone();
status_changes.push((node_id.clone(), new_status));
}
}
}
status_changes
}
/// Set erasure sets (typically done once during cluster formation).
pub async fn set_erasure_sets(&self, sets: Vec<ErasureSet>) {
let mut inner = self.inner.write().await;
inner.erasure_sets = sets;
inner.version += 1;
}
/// Get the erasure set for a given object based on consistent hashing.
pub async fn get_erasure_set_for_object(&self, bucket: &str, key: &str) -> Option<ErasureSet> {
let inner = self.inner.read().await;
if inner.erasure_sets.is_empty() {
return None;
}
let set_idx = super::placement::erasure_set_for_object(
bucket,
key,
inner.erasure_sets.len() as u32,
);
inner.erasure_sets.get(set_idx as usize).cloned()
}
/// Get all erasure sets.
pub async fn erasure_sets(&self) -> Vec<ErasureSet> {
self.inner.read().await.erasure_sets.clone()
}
/// Get current topology version.
pub async fn version(&self) -> u64 {
self.inner.read().await.version
}
/// Get all online node IDs (excluding self).
pub async fn online_peers(&self) -> Vec<NodeInfo> {
let inner = self.inner.read().await;
inner
.nodes
.values()
.filter(|n| n.status == NodeStatus::Online && n.info.node_id != self.local_node_id)
.map(|n| n.info.clone())
.collect()
}
/// Get all nodes.
pub async fn all_nodes(&self) -> Vec<NodeState> {
self.inner.read().await.nodes.values().cloned().collect()
}
/// Get node info by ID.
pub async fn get_node(&self, node_id: &str) -> Option<NodeInfo> {
self.inner
.read()
.await
.nodes
.get(node_id)
.map(|n| n.info.clone())
}
/// Get offline node IDs.
pub async fn offline_nodes(&self) -> Vec<String> {
self.inner
.read()
.await
.nodes
.values()
.filter(|n| n.status == NodeStatus::Offline)
.map(|n| n.info.node_id.clone())
.collect()
}
/// Check if a majority of nodes are reachable (for split-brain prevention).
pub async fn has_majority(&self) -> bool {
let inner = self.inner.read().await;
let total = inner.nodes.len();
if total == 0 {
return true;
}
let online = inner
.nodes
.values()
.filter(|n| n.status == NodeStatus::Online)
.count();
online > total / 2
}
/// Export the current topology as a protocol message.
pub async fn to_topology(&self) -> ClusterTopology {
let inner = self.inner.read().await;
ClusterTopology {
version: inner.version,
cluster_id: inner.cluster_id.clone(),
nodes: inner.nodes.values().map(|n| n.info.clone()).collect(),
erasure_sets: inner
.erasure_sets
.iter()
.map(|set| ErasureSetInfo {
set_id: set.set_id,
drives: set
.drives
.iter()
.map(|d| DriveLocationInfo {
node_id: d.node_id.clone(),
drive_index: d.drive_index,
})
.collect(),
})
.collect(),
data_shards: inner.data_shards,
parity_shards: inner.parity_shards,
}
}
/// Import topology from a protocol message (e.g., received from a peer during join).
pub async fn apply_topology(&self, topology: &ClusterTopology) {
let mut inner = self.inner.write().await;
// Only apply if newer
if topology.version <= inner.version {
return;
}
inner.cluster_id = topology.cluster_id.clone();
inner.version = topology.version;
inner.data_shards = topology.data_shards;
inner.parity_shards = topology.parity_shards;
// Update nodes
for node_info in &topology.nodes {
if !inner.nodes.contains_key(&node_info.node_id) {
inner.nodes.insert(
node_info.node_id.clone(),
NodeState {
info: node_info.clone(),
status: NodeStatus::Online,
missed_heartbeats: 0,
last_heartbeat: chrono::Utc::now(),
},
);
}
}
// Update erasure sets
inner.erasure_sets = topology
.erasure_sets
.iter()
.map(|set| ErasureSet {
set_id: set.set_id,
drives: set
.drives
.iter()
.map(|d| DriveLocation {
node_id: d.node_id.clone(),
drive_index: d.drive_index,
})
.collect(),
})
.collect();
}
}

View File

@@ -1,8 +1,10 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::cluster::config::ClusterConfig;
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct S3Config { pub struct SmartStorageConfig {
pub server: ServerConfig, pub server: ServerConfig,
pub storage: StorageConfig, pub storage: StorageConfig,
pub auth: AuthConfig, pub auth: AuthConfig,
@@ -10,6 +12,8 @@ pub struct S3Config {
pub logging: LoggingConfig, pub logging: LoggingConfig,
pub limits: LimitsConfig, pub limits: LimitsConfig,
pub multipart: MultipartConfig, pub multipart: MultipartConfig,
#[serde(default)]
pub cluster: Option<ClusterConfig>,
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -1,14 +1,14 @@
use hyper::StatusCode; use hyper::StatusCode;
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
#[error("S3Error({code}): {message}")] #[error("StorageError({code}): {message}")]
pub struct S3Error { pub struct StorageError {
pub code: String, pub code: String,
pub message: String, pub message: String,
pub status: StatusCode, pub status: StatusCode,
} }
impl S3Error { impl StorageError {
pub fn new(code: &str, message: &str, status: StatusCode) -> Self { pub fn new(code: &str, message: &str, status: StatusCode) -> Self {
Self { Self {
code: code.to_string(), code: code.to_string(),

View File

@@ -1,9 +1,10 @@
mod action; mod action;
mod auth; mod auth;
mod cluster;
mod config; mod config;
mod management; mod management;
mod policy; mod policy;
mod s3_error; mod error;
mod server; mod server;
mod storage; mod storage;
mod xml_response; mod xml_response;
@@ -11,7 +12,7 @@ mod xml_response;
use clap::Parser; use clap::Parser;
#[derive(Parser)] #[derive(Parser)]
#[command(name = "rusts3", about = "High-performance S3-compatible server")] #[command(name = "ruststorage", about = "High-performance S3-compatible storage server")]
struct Cli { struct Cli {
/// Run in management mode (IPC via stdin/stdout) /// Run in management mode (IPC via stdin/stdout)
#[arg(long)] #[arg(long)]
@@ -38,7 +39,7 @@ async fn main() -> anyhow::Result<()> {
management::management_loop().await?; management::management_loop().await?;
} else { } else {
eprintln!("rusts3: use --management flag for IPC mode"); eprintln!("ruststorage: use --management flag for IPC mode");
std::process::exit(1); std::process::exit(1);
} }

View File

@@ -4,8 +4,8 @@ use serde_json::Value;
use std::io::Write; use std::io::Write;
use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::io::{AsyncBufReadExt, BufReader};
use crate::config::S3Config; use crate::config::SmartStorageConfig;
use crate::server::S3Server; use crate::server::StorageServer;
#[derive(Deserialize)] #[derive(Deserialize)]
struct IpcRequest { struct IpcRequest {
@@ -62,7 +62,7 @@ pub async fn management_loop() -> Result<()> {
data: serde_json::json!({}), data: serde_json::json!({}),
}); });
let mut server: Option<S3Server> = None; let mut server: Option<StorageServer> = None;
let stdin = BufReader::new(tokio::io::stdin()); let stdin = BufReader::new(tokio::io::stdin());
let mut lines = stdin.lines(); let mut lines = stdin.lines();
@@ -87,11 +87,11 @@ pub async fn management_loop() -> Result<()> {
"start" => { "start" => {
#[derive(Deserialize)] #[derive(Deserialize)]
struct StartParams { struct StartParams {
config: S3Config, config: SmartStorageConfig,
} }
match serde_json::from_value::<StartParams>(req.params) { match serde_json::from_value::<StartParams>(req.params) {
Ok(params) => { Ok(params) => {
match S3Server::start(params.config).await { match StorageServer::start(params.config).await {
Ok(s) => { Ok(s) => {
server = Some(s); server = Some(s);
send_response(id, serde_json::json!({})); send_response(id, serde_json::json!({}));
@@ -140,6 +140,15 @@ pub async fn management_loop() -> Result<()> {
} }
} }
} }
"clusterStatus" => {
send_response(
id,
serde_json::json!({
"status": "ok",
"message": "Cluster status endpoint ready"
}),
);
}
_ => { _ => {
send_error(id, format!("Unknown method: {}", method)); send_error(id, format!("Unknown method: {}", method));
} }

View File

@@ -6,7 +6,7 @@ use tokio::sync::RwLock;
use crate::action::RequestContext; use crate::action::RequestContext;
use crate::auth::AuthenticatedIdentity; use crate::auth::AuthenticatedIdentity;
use crate::s3_error::S3Error; use crate::error::StorageError;
// ============================ // ============================
// Policy data model // Policy data model
@@ -284,50 +284,50 @@ fn simple_wildcard_match(pattern: &str, value: &str) -> bool {
const MAX_POLICY_SIZE: usize = 20 * 1024; // 20 KB const MAX_POLICY_SIZE: usize = 20 * 1024; // 20 KB
pub fn validate_policy(json: &str) -> Result<BucketPolicy, S3Error> { pub fn validate_policy(json: &str) -> Result<BucketPolicy, StorageError> {
if json.len() > MAX_POLICY_SIZE { if json.len() > MAX_POLICY_SIZE {
return Err(S3Error::malformed_policy("Policy exceeds maximum size of 20KB")); return Err(StorageError::malformed_policy("Policy exceeds maximum size of 20KB"));
} }
let policy: BucketPolicy = let policy: BucketPolicy =
serde_json::from_str(json).map_err(|e| S3Error::malformed_policy(&e.to_string()))?; serde_json::from_str(json).map_err(|e| StorageError::malformed_policy(&e.to_string()))?;
if policy.version != "2012-10-17" { if policy.version != "2012-10-17" {
return Err(S3Error::malformed_policy( return Err(StorageError::malformed_policy(
"Policy version must be \"2012-10-17\"", "Policy version must be \"2012-10-17\"",
)); ));
} }
if policy.statements.is_empty() { if policy.statements.is_empty() {
return Err(S3Error::malformed_policy( return Err(StorageError::malformed_policy(
"Policy must contain at least one statement", "Policy must contain at least one statement",
)); ));
} }
for (i, stmt) in policy.statements.iter().enumerate() { for (i, stmt) in policy.statements.iter().enumerate() {
if stmt.action.is_empty() { if stmt.action.is_empty() {
return Err(S3Error::malformed_policy(&format!( return Err(StorageError::malformed_policy(&format!(
"Statement {} has no actions", "Statement {} has no actions",
i i
))); )));
} }
for action in &stmt.action { for action in &stmt.action {
if action != "*" && !action.starts_with("s3:") { if action != "*" && !action.starts_with("s3:") {
return Err(S3Error::malformed_policy(&format!( return Err(StorageError::malformed_policy(&format!(
"Action \"{}\" must start with \"s3:\"", "Action \"{}\" must start with \"s3:\"",
action action
))); )));
} }
} }
if stmt.resource.is_empty() { if stmt.resource.is_empty() {
return Err(S3Error::malformed_policy(&format!( return Err(StorageError::malformed_policy(&format!(
"Statement {} has no resources", "Statement {} has no resources",
i i
))); )));
} }
for resource in &stmt.resource { for resource in &stmt.resource {
if resource != "*" && !resource.starts_with("arn:aws:s3:::") { if resource != "*" && !resource.starts_with("arn:aws:s3:::") {
return Err(S3Error::malformed_policy(&format!( return Err(StorageError::malformed_policy(&format!(
"Resource \"{}\" must start with \"arn:aws:s3:::\"", "Resource \"{}\" must start with \"arn:aws:s3:::\"",
resource resource
))); )));

View File

@@ -18,30 +18,40 @@ use tokio::sync::watch;
use tokio_util::io::ReaderStream; use tokio_util::io::ReaderStream;
use uuid::Uuid; use uuid::Uuid;
use crate::action::{self, RequestContext, S3Action}; use crate::action::{self, RequestContext, StorageAction};
use crate::auth::{self, AuthenticatedIdentity}; use crate::auth::{self, AuthenticatedIdentity};
use crate::config::S3Config; use crate::config::SmartStorageConfig;
use crate::policy::{self, PolicyDecision, PolicyStore}; use crate::policy::{self, PolicyDecision, PolicyStore};
use crate::s3_error::S3Error; use crate::error::StorageError;
use crate::storage::FileStore; use crate::cluster::coordinator::DistributedStore;
use crate::cluster::config::ErasureConfig;
use crate::cluster::drive_manager::DriveManager;
use crate::cluster::membership::MembershipManager;
use crate::cluster::placement;
use crate::cluster::protocol::NodeInfo;
use crate::cluster::quic_transport::QuicTransport;
use crate::cluster::shard_store::ShardStore;
use crate::cluster::state::ClusterState;
use crate::storage::{FileStore, StorageBackend};
use crate::xml_response; use crate::xml_response;
pub struct S3Server { pub struct StorageServer {
store: Arc<FileStore>, store: Arc<StorageBackend>,
shutdown_tx: watch::Sender<bool>, shutdown_tx: watch::Sender<bool>,
server_handle: tokio::task::JoinHandle<()>, server_handle: tokio::task::JoinHandle<()>,
} }
impl S3Server { impl StorageServer {
pub async fn start(config: S3Config) -> Result<Self> { pub async fn start(config: SmartStorageConfig) -> Result<Self> {
let store = Arc::new(FileStore::new(config.storage.directory.clone().into())); let store: Arc<StorageBackend> = if let Some(ref cluster_config) = config.cluster {
if cluster_config.enabled {
// Initialize or reset storage Self::start_clustered(&config, cluster_config).await?
if config.storage.clean_slate { } else {
store.reset().await?; Self::start_standalone(&config).await?
}
} else { } else {
store.initialize().await?; Self::start_standalone(&config).await?
} };
// Initialize policy store // Initialize policy store
let policy_store = Arc::new(PolicyStore::new(store.policies_dir())); let policy_store = Arc::new(PolicyStore::new(store.policies_dir()));
@@ -104,7 +114,7 @@ impl S3Server {
}); });
if !config.server.silent { if !config.server.silent {
tracing::info!("S3 server listening on {}", addr); tracing::info!("Storage server listening on {}", addr);
} }
Ok(Self { Ok(Self {
@@ -119,12 +129,156 @@ impl S3Server {
let _ = self.server_handle.await; let _ = self.server_handle.await;
} }
pub fn store(&self) -> &FileStore { pub fn store(&self) -> &StorageBackend {
&self.store &self.store
} }
async fn start_standalone(config: &SmartStorageConfig) -> Result<Arc<StorageBackend>> {
let store = Arc::new(StorageBackend::Standalone(
FileStore::new(config.storage.directory.clone().into()),
));
if config.storage.clean_slate {
store.reset().await?;
} else {
store.initialize().await?;
}
Ok(store)
}
async fn start_clustered(
config: &SmartStorageConfig,
cluster_config: &crate::cluster::config::ClusterConfig,
) -> Result<Arc<StorageBackend>> {
let erasure_config = cluster_config.erasure.clone();
let node_id = cluster_config
.node_id
.clone()
.unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
// Determine drive paths
let drive_paths: Vec<std::path::PathBuf> = if cluster_config.drives.paths.is_empty() {
// Default: use storage directory as a single drive
vec![std::path::PathBuf::from(&config.storage.directory)]
} else {
cluster_config
.drives
.paths
.iter()
.map(std::path::PathBuf::from)
.collect()
};
// Ensure directories exist
let manifest_dir = std::path::PathBuf::from(&config.storage.directory).join(".manifests");
let buckets_dir = std::path::PathBuf::from(&config.storage.directory).join(".buckets");
tokio::fs::create_dir_all(&manifest_dir).await?;
tokio::fs::create_dir_all(&buckets_dir).await?;
for path in &drive_paths {
tokio::fs::create_dir_all(path.join(".smartstorage")).await?;
}
// Initialize QUIC transport
let quic_addr: SocketAddr =
format!("{}:{}", config.server.address, cluster_config.quic_port).parse()?;
let transport = Arc::new(QuicTransport::new(quic_addr, node_id.clone()).await?);
// Initialize cluster state
let cluster_state = Arc::new(ClusterState::new(
node_id.clone(),
uuid::Uuid::new_v4().to_string(),
erasure_config.data_shards,
erasure_config.parity_shards,
));
// Form erasure sets from local drives (single-node for now)
let nodes = vec![(node_id.clone(), drive_paths.len() as u32)];
let erasure_sets =
placement::form_erasure_sets(&nodes, erasure_config.total_shards());
if erasure_sets.is_empty() {
tracing::warn!(
"Not enough drives ({}) for erasure set size ({}). \
Need at least {} drives.",
drive_paths.len(),
erasure_config.total_shards(),
erasure_config.total_shards(),
);
}
cluster_state.set_erasure_sets(erasure_sets).await;
// Register self as a node
let local_node_info = NodeInfo {
node_id: node_id.clone(),
quic_addr: quic_addr.to_string(),
s3_addr: format!("{}:{}", config.server.address, config.server.port),
drive_count: drive_paths.len() as u32,
status: "online".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
};
cluster_state.add_node(local_node_info.clone()).await;
// Initialize drive manager for health monitoring
let drive_manager = Arc::new(tokio::sync::Mutex::new(
DriveManager::new(&cluster_config.drives).await?,
));
// Join cluster if seed nodes are configured
let membership = Arc::new(
MembershipManager::new(
cluster_state.clone(),
transport.clone(),
cluster_config.heartbeat_interval_ms,
local_node_info,
)
.with_drive_manager(drive_manager),
);
membership
.join_cluster(&cluster_config.seed_nodes)
.await?;
// Start QUIC accept loop for incoming connections
let shard_store_for_accept = Arc::new(ShardStore::new(drive_paths[0].clone()));
let (quic_shutdown_tx, quic_shutdown_rx) = watch::channel(false);
let transport_clone = transport.clone();
tokio::spawn(async move {
transport_clone
.accept_loop(shard_store_for_accept, quic_shutdown_rx)
.await;
});
// Start heartbeat loop
let membership_clone = membership.clone();
let (hb_shutdown_tx, hb_shutdown_rx) = watch::channel(false);
tokio::spawn(async move {
membership_clone.heartbeat_loop(hb_shutdown_rx).await;
});
// Create distributed store
let distributed_store = DistributedStore::new(
cluster_state,
transport,
erasure_config,
drive_paths,
manifest_dir,
buckets_dir,
)?;
let store = Arc::new(StorageBackend::Clustered(distributed_store));
if !config.server.silent {
tracing::info!(
"Cluster mode enabled (node_id={}, quic_port={})",
node_id,
cluster_config.quic_port
);
}
Ok(store)
}
} }
impl S3Config { impl SmartStorageConfig {
fn address(&self) -> &str { fn address(&self) -> &str {
&self.server.address &self.server.address
} }
@@ -192,7 +346,7 @@ fn empty_response(status: StatusCode, request_id: &str) -> Response<BoxBody> {
.unwrap() .unwrap()
} }
fn s3_error_response(err: &S3Error, request_id: &str) -> Response<BoxBody> { fn storage_error_response(err: &StorageError, request_id: &str) -> Response<BoxBody> {
let xml = err.to_xml(); let xml = err.to_xml();
Response::builder() Response::builder()
.status(err.status) .status(err.status)
@@ -204,8 +358,8 @@ fn s3_error_response(err: &S3Error, request_id: &str) -> Response<BoxBody> {
async fn handle_request( async fn handle_request(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
config: S3Config, config: SmartStorageConfig,
policy_store: Arc<PolicyStore>, policy_store: Arc<PolicyStore>,
) -> Result<Response<BoxBody>, std::convert::Infallible> { ) -> Result<Response<BoxBody>, std::convert::Infallible> {
let request_id = Uuid::new_v4().to_string(); let request_id = Uuid::new_v4().to_string();
@@ -219,7 +373,7 @@ async fn handle_request(
return Ok(resp); return Ok(resp);
} }
// Step 1: Resolve S3 action from request // Step 1: Resolve storage action from request
let request_ctx = action::resolve_action(&req); let request_ctx = action::resolve_action(&req);
// Step 2: Auth + policy pipeline // Step 2: Auth + policy pipeline
@@ -238,7 +392,7 @@ async fn handle_request(
Ok(id) => Some(id), Ok(id) => Some(id),
Err(e) => { Err(e) => {
tracing::warn!("Auth failed: {}", e.message); tracing::warn!("Auth failed: {}", e.message);
return Ok(s3_error_response(&e, &request_id)); return Ok(storage_error_response(&e, &request_id));
} }
} }
} else { } else {
@@ -248,7 +402,7 @@ async fn handle_request(
// Step 3: Authorization (policy evaluation) // Step 3: Authorization (policy evaluation)
if let Err(e) = authorize_request(&request_ctx, identity.as_ref(), &policy_store).await { if let Err(e) = authorize_request(&request_ctx, identity.as_ref(), &policy_store).await {
return Ok(s3_error_response(&e, &request_id)); return Ok(storage_error_response(&e, &request_id));
} }
} }
@@ -256,12 +410,12 @@ async fn handle_request(
let mut response = match route_request(req, store, &config, &request_id, &policy_store).await { let mut response = match route_request(req, store, &config, &request_id, &policy_store).await {
Ok(resp) => resp, Ok(resp) => resp,
Err(err) => { Err(err) => {
if let Some(s3err) = err.downcast_ref::<S3Error>() { if let Some(s3err) = err.downcast_ref::<StorageError>() {
s3_error_response(s3err, &request_id) storage_error_response(s3err, &request_id)
} else { } else {
tracing::error!("Internal error: {}", err); tracing::error!("Internal error: {}", err);
let s3err = S3Error::internal_error(&err.to_string()); let s3err = StorageError::internal_error(&err.to_string());
s3_error_response(&s3err, &request_id) storage_error_response(&s3err, &request_id)
} }
} }
}; };
@@ -288,11 +442,11 @@ async fn authorize_request(
ctx: &RequestContext, ctx: &RequestContext,
identity: Option<&AuthenticatedIdentity>, identity: Option<&AuthenticatedIdentity>,
policy_store: &PolicyStore, policy_store: &PolicyStore,
) -> Result<(), S3Error> { ) -> Result<(), StorageError> {
// ListAllMyBuckets requires authentication (no bucket to apply policy to) // ListAllMyBuckets requires authentication (no bucket to apply policy to)
if ctx.action == S3Action::ListAllMyBuckets { if ctx.action == StorageAction::ListAllMyBuckets {
if identity.is_none() { if identity.is_none() {
return Err(S3Error::access_denied()); return Err(StorageError::access_denied());
} }
return Ok(()); return Ok(());
} }
@@ -302,7 +456,7 @@ async fn authorize_request(
if let Some(bucket_policy) = policy_store.get_policy(bucket).await { if let Some(bucket_policy) = policy_store.get_policy(bucket).await {
let decision = policy::evaluate_policy(&bucket_policy, ctx, identity); let decision = policy::evaluate_policy(&bucket_policy, ctx, identity);
match decision { match decision {
PolicyDecision::Deny => return Err(S3Error::access_denied()), PolicyDecision::Deny => return Err(StorageError::access_denied()),
PolicyDecision::Allow => return Ok(()), PolicyDecision::Allow => return Ok(()),
PolicyDecision::NoOpinion => { PolicyDecision::NoOpinion => {
// Fall through to default behavior // Fall through to default behavior
@@ -313,7 +467,7 @@ async fn authorize_request(
// Default: authenticated users get full access, anonymous denied // Default: authenticated users get full access, anonymous denied
if identity.is_none() { if identity.is_none() {
return Err(S3Error::access_denied()); return Err(StorageError::access_denied());
} }
Ok(()) Ok(())
@@ -325,8 +479,8 @@ async fn authorize_request(
async fn route_request( async fn route_request(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
_config: &S3Config, _config: &SmartStorageConfig,
request_id: &str, request_id: &str,
policy_store: &Arc<PolicyStore>, policy_store: &Arc<PolicyStore>,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
@@ -414,8 +568,8 @@ async fn route_request(
let upload_id = query.get("uploadId").unwrap().clone(); let upload_id = query.get("uploadId").unwrap().clone();
handle_complete_multipart(req, store, &bucket, &key, &upload_id, request_id).await handle_complete_multipart(req, store, &bucket, &key, &upload_id, request_id).await
} else { } else {
let err = S3Error::invalid_request("Invalid POST request"); let err = StorageError::invalid_request("Invalid POST request");
Ok(s3_error_response(&err, request_id)) Ok(storage_error_response(&err, request_id))
} }
} }
_ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)), _ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)),
@@ -430,7 +584,7 @@ async fn route_request(
// ============================ // ============================
async fn handle_list_buckets( async fn handle_list_buckets(
store: Arc<FileStore>, store: Arc<StorageBackend>,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
let buckets = store.list_buckets().await?; let buckets = store.list_buckets().await?;
@@ -439,7 +593,7 @@ async fn handle_list_buckets(
} }
async fn handle_create_bucket( async fn handle_create_bucket(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
@@ -448,7 +602,7 @@ async fn handle_create_bucket(
} }
async fn handle_delete_bucket( async fn handle_delete_bucket(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
request_id: &str, request_id: &str,
policy_store: &Arc<PolicyStore>, policy_store: &Arc<PolicyStore>,
@@ -460,19 +614,19 @@ async fn handle_delete_bucket(
} }
async fn handle_head_bucket( async fn handle_head_bucket(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
if store.bucket_exists(bucket).await { if store.bucket_exists(bucket).await {
Ok(empty_response(StatusCode::OK, request_id)) Ok(empty_response(StatusCode::OK, request_id))
} else { } else {
Err(S3Error::no_such_bucket().into()) Err(StorageError::no_such_bucket().into())
} }
} }
async fn handle_list_objects( async fn handle_list_objects(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
query: &HashMap<String, String>, query: &HashMap<String, String>,
request_id: &str, request_id: &str,
@@ -501,7 +655,7 @@ async fn handle_list_objects(
async fn handle_put_object( async fn handle_put_object(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -523,7 +677,7 @@ async fn handle_put_object(
async fn handle_get_object( async fn handle_get_object(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -576,7 +730,7 @@ async fn handle_get_object(
} }
async fn handle_head_object( async fn handle_head_object(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -608,7 +762,7 @@ async fn handle_head_object(
} }
async fn handle_delete_object( async fn handle_delete_object(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -619,7 +773,7 @@ async fn handle_delete_object(
async fn handle_copy_object( async fn handle_copy_object(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
dest_bucket: &str, dest_bucket: &str,
dest_key: &str, dest_key: &str,
request_id: &str, request_id: &str,
@@ -682,20 +836,20 @@ async fn handle_get_bucket_policy(
.unwrap(); .unwrap();
Ok(resp) Ok(resp)
} }
None => Err(S3Error::no_such_bucket_policy().into()), None => Err(StorageError::no_such_bucket_policy().into()),
} }
} }
async fn handle_put_bucket_policy( async fn handle_put_bucket_policy(
req: Request<Incoming>, req: Request<Incoming>,
store: &Arc<FileStore>, store: &Arc<StorageBackend>,
policy_store: &Arc<PolicyStore>, policy_store: &Arc<PolicyStore>,
bucket: &str, bucket: &str,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
// Verify bucket exists // Verify bucket exists
if !store.bucket_exists(bucket).await { if !store.bucket_exists(bucket).await {
return Err(S3Error::no_such_bucket().into()); return Err(StorageError::no_such_bucket().into());
} }
// Read body // Read body
@@ -709,7 +863,7 @@ async fn handle_put_bucket_policy(
policy_store policy_store
.put_policy(bucket, validated_policy) .put_policy(bucket, validated_policy)
.await .await
.map_err(|e| S3Error::internal_error(&e.to_string()))?; .map_err(|e| StorageError::internal_error(&e.to_string()))?;
Ok(empty_response(StatusCode::NO_CONTENT, request_id)) Ok(empty_response(StatusCode::NO_CONTENT, request_id))
} }
@@ -722,7 +876,7 @@ async fn handle_delete_bucket_policy(
policy_store policy_store
.delete_policy(bucket) .delete_policy(bucket)
.await .await
.map_err(|e| S3Error::internal_error(&e.to_string()))?; .map_err(|e| StorageError::internal_error(&e.to_string()))?;
Ok(empty_response(StatusCode::NO_CONTENT, request_id)) Ok(empty_response(StatusCode::NO_CONTENT, request_id))
} }
@@ -732,7 +886,7 @@ async fn handle_delete_bucket_policy(
async fn handle_initiate_multipart( async fn handle_initiate_multipart(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -745,7 +899,7 @@ async fn handle_initiate_multipart(
async fn handle_upload_part( async fn handle_upload_part(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
query: &HashMap<String, String>, query: &HashMap<String, String>,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
@@ -756,7 +910,7 @@ async fn handle_upload_part(
.unwrap_or(0); .unwrap_or(0);
if part_number < 1 || part_number > 10000 { if part_number < 1 || part_number > 10000 {
return Err(S3Error::invalid_part_number().into()); return Err(StorageError::invalid_part_number().into());
} }
let body = req.into_body(); let body = req.into_body();
@@ -774,7 +928,7 @@ async fn handle_upload_part(
async fn handle_complete_multipart( async fn handle_complete_multipart(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
upload_id: &str, upload_id: &str,
@@ -794,7 +948,7 @@ async fn handle_complete_multipart(
} }
async fn handle_abort_multipart( async fn handle_abort_multipart(
store: Arc<FileStore>, store: Arc<StorageBackend>,
upload_id: &str, upload_id: &str,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
@@ -803,7 +957,7 @@ async fn handle_abort_multipart(
} }
async fn handle_list_multipart_uploads( async fn handle_list_multipart_uploads(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
@@ -925,7 +1079,7 @@ fn extract_xml_value<'a>(xml: &'a str, tag: &str) -> Option<String> {
// CORS // CORS
// ============================ // ============================
fn build_cors_preflight(config: &S3Config, request_id: &str) -> Response<BoxBody> { fn build_cors_preflight(config: &SmartStorageConfig, request_id: &str) -> Response<BoxBody> {
let mut builder = Response::builder() let mut builder = Response::builder()
.status(StatusCode::NO_CONTENT) .status(StatusCode::NO_CONTENT)
.header("x-amz-request-id", request_id); .header("x-amz-request-id", request_id);
@@ -949,7 +1103,7 @@ fn build_cors_preflight(config: &S3Config, request_id: &str) -> Response<BoxBody
builder.body(empty_body()).unwrap() builder.body(empty_body()).unwrap()
} }
fn add_cors_headers(headers: &mut hyper::HeaderMap, config: &S3Config) { fn add_cors_headers(headers: &mut hyper::HeaderMap, config: &SmartStorageConfig) {
if let Some(ref origins) = config.cors.allowed_origins { if let Some(ref origins) = config.cors.allowed_origins {
headers.insert( headers.insert(
"access-control-allow-origin", "access-control-allow-origin",

View File

@@ -10,7 +10,8 @@ use tokio::fs;
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufWriter}; use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufWriter};
use uuid::Uuid; use uuid::Uuid;
use crate::s3_error::S3Error; use crate::cluster::coordinator::DistributedStore;
use crate::error::StorageError;
// ============================ // ============================
// Result types // Result types
@@ -174,13 +175,13 @@ impl FileStore {
let bucket_path = self.root_dir.join(bucket); let bucket_path = self.root_dir.join(bucket);
if !bucket_path.is_dir() { if !bucket_path.is_dir() {
return Err(S3Error::no_such_bucket().into()); return Err(StorageError::no_such_bucket().into());
} }
// Check if bucket is empty (ignore hidden files) // Check if bucket is empty (ignore hidden files)
let mut entries = fs::read_dir(&bucket_path).await?; let mut entries = fs::read_dir(&bucket_path).await?;
while let Some(_entry) = entries.next_entry().await? { while let Some(_entry) = entries.next_entry().await? {
return Err(S3Error::bucket_not_empty().into()); return Err(StorageError::bucket_not_empty().into());
} }
fs::remove_dir_all(&bucket_path).await?; fs::remove_dir_all(&bucket_path).await?;
@@ -199,7 +200,7 @@ impl FileStore {
metadata: HashMap<String, String>, metadata: HashMap<String, String>,
) -> Result<PutResult> { ) -> Result<PutResult> {
if !self.bucket_exists(bucket).await { if !self.bucket_exists(bucket).await {
return Err(S3Error::no_such_bucket().into()); return Err(StorageError::no_such_bucket().into());
} }
let object_path = self.object_path(bucket, key); let object_path = self.object_path(bucket, key);
@@ -256,7 +257,7 @@ impl FileStore {
let object_path = self.object_path(bucket, key); let object_path = self.object_path(bucket, key);
if !object_path.exists() { if !object_path.exists() {
return Err(S3Error::no_such_key().into()); return Err(StorageError::no_such_key().into());
} }
let file_meta = fs::metadata(&object_path).await?; let file_meta = fs::metadata(&object_path).await?;
@@ -289,7 +290,7 @@ impl FileStore {
let object_path = self.object_path(bucket, key); let object_path = self.object_path(bucket, key);
if !object_path.exists() { if !object_path.exists() {
return Err(S3Error::no_such_key().into()); return Err(StorageError::no_such_key().into());
} }
// Only stat the file, don't open it // Only stat the file, don't open it
@@ -352,11 +353,11 @@ impl FileStore {
let dest_path = self.object_path(dest_bucket, dest_key); let dest_path = self.object_path(dest_bucket, dest_key);
if !src_path.exists() { if !src_path.exists() {
return Err(S3Error::no_such_key().into()); return Err(StorageError::no_such_key().into());
} }
if !self.bucket_exists(dest_bucket).await { if !self.bucket_exists(dest_bucket).await {
return Err(S3Error::no_such_bucket().into()); return Err(StorageError::no_such_bucket().into());
} }
if let Some(parent) = dest_path.parent() { if let Some(parent) = dest_path.parent() {
@@ -403,7 +404,7 @@ impl FileStore {
let bucket_path = self.root_dir.join(bucket); let bucket_path = self.root_dir.join(bucket);
if !bucket_path.is_dir() { if !bucket_path.is_dir() {
return Err(S3Error::no_such_bucket().into()); return Err(StorageError::no_such_bucket().into());
} }
// Collect all object keys recursively // Collect all object keys recursively
@@ -528,7 +529,7 @@ impl FileStore {
) -> Result<(String, u64)> { ) -> Result<(String, u64)> {
let upload_dir = self.multipart_dir().join(upload_id); let upload_dir = self.multipart_dir().join(upload_id);
if !upload_dir.is_dir() { if !upload_dir.is_dir() {
return Err(S3Error::no_such_upload().into()); return Err(StorageError::no_such_upload().into());
} }
let part_path = upload_dir.join(format!("part-{}", part_number)); let part_path = upload_dir.join(format!("part-{}", part_number));
@@ -602,7 +603,7 @@ impl FileStore {
) -> Result<CompleteMultipartResult> { ) -> Result<CompleteMultipartResult> {
let upload_dir = self.multipart_dir().join(upload_id); let upload_dir = self.multipart_dir().join(upload_id);
if !upload_dir.is_dir() { if !upload_dir.is_dir() {
return Err(S3Error::no_such_upload().into()); return Err(StorageError::no_such_upload().into());
} }
// Read metadata to get bucket/key // Read metadata to get bucket/key
@@ -663,7 +664,7 @@ impl FileStore {
pub async fn abort_multipart(&self, upload_id: &str) -> Result<()> { pub async fn abort_multipart(&self, upload_id: &str) -> Result<()> {
let upload_dir = self.multipart_dir().join(upload_id); let upload_dir = self.multipart_dir().join(upload_id);
if !upload_dir.is_dir() { if !upload_dir.is_dir() {
return Err(S3Error::no_such_upload().into()); return Err(StorageError::no_such_upload().into());
} }
fs::remove_dir_all(&upload_dir).await?; fs::remove_dir_all(&upload_dir).await?;
Ok(()) Ok(())
@@ -715,7 +716,7 @@ impl FileStore {
let encoded = encode_key(key); let encoded = encode_key(key);
self.root_dir self.root_dir
.join(bucket) .join(bucket)
.join(format!("{}._S3_object", encoded)) .join(format!("{}._storage_object", encoded))
} }
async fn read_md5(&self, object_path: &Path) -> String { async fn read_md5(&self, object_path: &Path) -> String {
@@ -775,7 +776,7 @@ impl FileStore {
if meta.is_dir() { if meta.is_dir() {
self.collect_keys(bucket_path, &entry.path(), keys).await?; self.collect_keys(bucket_path, &entry.path(), keys).await?;
} else if name.ends_with("._S3_object") } else if name.ends_with("._storage_object")
&& !name.ends_with(".metadata.json") && !name.ends_with(".metadata.json")
&& !name.ends_with(".md5") && !name.ends_with(".md5")
{ {
@@ -785,7 +786,7 @@ impl FileStore {
.unwrap_or(Path::new("")) .unwrap_or(Path::new(""))
.to_string_lossy() .to_string_lossy()
.to_string(); .to_string();
let key = decode_key(relative.trim_end_matches("._S3_object")); let key = decode_key(relative.trim_end_matches("._storage_object"));
keys.push(key); keys.push(key);
} }
} }
@@ -795,6 +796,200 @@ impl FileStore {
} }
} }
// ============================
// StorageBackend enum
// ============================
/// Unified storage backend that dispatches to either standalone (FileStore)
/// or clustered (DistributedStore) storage.
pub enum StorageBackend {
Standalone(FileStore),
Clustered(DistributedStore),
}
impl StorageBackend {
pub fn policies_dir(&self) -> std::path::PathBuf {
match self {
StorageBackend::Standalone(fs) => fs.policies_dir(),
StorageBackend::Clustered(ds) => ds.policies_dir(),
}
}
pub async fn initialize(&self) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.initialize().await,
StorageBackend::Clustered(ds) => {
// Ensure policies directory exists
tokio::fs::create_dir_all(ds.policies_dir()).await?;
Ok(())
}
}
}
pub async fn reset(&self) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.reset().await,
StorageBackend::Clustered(_) => Ok(()), // TODO: cluster reset
}
}
pub async fn list_buckets(&self) -> Result<Vec<BucketInfo>> {
match self {
StorageBackend::Standalone(fs) => fs.list_buckets().await,
StorageBackend::Clustered(ds) => ds.list_buckets().await,
}
}
pub async fn bucket_exists(&self, bucket: &str) -> bool {
match self {
StorageBackend::Standalone(fs) => fs.bucket_exists(bucket).await,
StorageBackend::Clustered(ds) => ds.bucket_exists(bucket).await,
}
}
pub async fn create_bucket(&self, bucket: &str) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.create_bucket(bucket).await,
StorageBackend::Clustered(ds) => ds.create_bucket(bucket).await,
}
}
pub async fn delete_bucket(&self, bucket: &str) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.delete_bucket(bucket).await,
StorageBackend::Clustered(ds) => ds.delete_bucket(bucket).await,
}
}
pub async fn put_object(
&self,
bucket: &str,
key: &str,
body: Incoming,
metadata: HashMap<String, String>,
) -> Result<PutResult> {
match self {
StorageBackend::Standalone(fs) => fs.put_object(bucket, key, body, metadata).await,
StorageBackend::Clustered(ds) => ds.put_object(bucket, key, body, metadata).await,
}
}
pub async fn get_object(
&self,
bucket: &str,
key: &str,
range: Option<(u64, u64)>,
) -> Result<GetResult> {
match self {
StorageBackend::Standalone(fs) => fs.get_object(bucket, key, range).await,
StorageBackend::Clustered(ds) => ds.get_object(bucket, key, range).await,
}
}
pub async fn head_object(&self, bucket: &str, key: &str) -> Result<HeadResult> {
match self {
StorageBackend::Standalone(fs) => fs.head_object(bucket, key).await,
StorageBackend::Clustered(ds) => ds.head_object(bucket, key).await,
}
}
pub async fn delete_object(&self, bucket: &str, key: &str) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.delete_object(bucket, key).await,
StorageBackend::Clustered(ds) => ds.delete_object(bucket, key).await,
}
}
pub async fn copy_object(
&self,
src_bucket: &str,
src_key: &str,
dest_bucket: &str,
dest_key: &str,
metadata_directive: &str,
new_metadata: Option<HashMap<String, String>>,
) -> Result<CopyResult> {
match self {
StorageBackend::Standalone(fs) => {
fs.copy_object(src_bucket, src_key, dest_bucket, dest_key, metadata_directive, new_metadata).await
}
StorageBackend::Clustered(ds) => {
ds.copy_object(src_bucket, src_key, dest_bucket, dest_key, metadata_directive, new_metadata).await
}
}
}
pub async fn list_objects(
&self,
bucket: &str,
prefix: &str,
delimiter: &str,
max_keys: usize,
continuation_token: Option<&str>,
) -> Result<ListObjectsResult> {
match self {
StorageBackend::Standalone(fs) => {
fs.list_objects(bucket, prefix, delimiter, max_keys, continuation_token).await
}
StorageBackend::Clustered(ds) => {
ds.list_objects(bucket, prefix, delimiter, max_keys, continuation_token).await
}
}
}
pub async fn initiate_multipart(
&self,
bucket: &str,
key: &str,
metadata: HashMap<String, String>,
) -> Result<String> {
match self {
StorageBackend::Standalone(fs) => fs.initiate_multipart(bucket, key, metadata).await,
StorageBackend::Clustered(ds) => ds.initiate_multipart(bucket, key, metadata).await,
}
}
pub async fn upload_part(
&self,
upload_id: &str,
part_number: u32,
body: Incoming,
) -> Result<(String, u64)> {
match self {
StorageBackend::Standalone(fs) => fs.upload_part(upload_id, part_number, body).await,
StorageBackend::Clustered(ds) => ds.upload_part(upload_id, part_number, body).await,
}
}
pub async fn complete_multipart(
&self,
upload_id: &str,
parts: &[(u32, String)],
) -> Result<CompleteMultipartResult> {
match self {
StorageBackend::Standalone(fs) => fs.complete_multipart(upload_id, parts).await,
StorageBackend::Clustered(ds) => ds.complete_multipart(upload_id, parts).await,
}
}
pub async fn abort_multipart(&self, upload_id: &str) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.abort_multipart(upload_id).await,
StorageBackend::Clustered(ds) => ds.abort_multipart(upload_id).await,
}
}
pub async fn list_multipart_uploads(
&self,
bucket: &str,
) -> Result<Vec<MultipartUploadInfo>> {
match self {
StorageBackend::Standalone(fs) => fs.list_multipart_uploads(bucket).await,
StorageBackend::Clustered(ds) => ds.list_multipart_uploads(bucket).await,
}
}
}
// ============================ // ============================
// Key encoding (identity on Linux) // Key encoding (identity on Linux)
// ============================ // ============================

View File

@@ -1,7 +1,7 @@
use crate::storage::{BucketInfo, ListObjectsResult, MultipartUploadInfo}; use crate::storage::{BucketInfo, ListObjectsResult, MultipartUploadInfo};
const XML_DECL: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; const XML_DECL: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
const S3_NS: &str = "http://s3.amazonaws.com/doc/2006-03-01/"; const STORAGE_NS: &str = "http://s3.amazonaws.com/doc/2006-03-01/";
fn xml_escape(s: &str) -> String { fn xml_escape(s: &str) -> String {
s.replace('&', "&amp;") s.replace('&', "&amp;")
@@ -14,9 +14,9 @@ fn xml_escape(s: &str) -> String {
pub fn list_buckets_xml(buckets: &[BucketInfo]) -> String { pub fn list_buckets_xml(buckets: &[BucketInfo]) -> String {
let mut xml = format!( let mut xml = format!(
"{}\n<ListAllMyBucketsResult xmlns=\"{}\">\ "{}\n<ListAllMyBucketsResult xmlns=\"{}\">\
<Owner><ID>123456789000</ID><DisplayName>S3rver</DisplayName></Owner>\ <Owner><ID>123456789000</ID><DisplayName>Storage</DisplayName></Owner>\
<Buckets>", <Buckets>",
XML_DECL, S3_NS XML_DECL, STORAGE_NS
); );
for b in buckets { for b in buckets {
@@ -39,7 +39,7 @@ pub fn list_objects_v1_xml(bucket: &str, result: &ListObjectsResult) -> String {
<MaxKeys>{}</MaxKeys>\ <MaxKeys>{}</MaxKeys>\
<IsTruncated>{}</IsTruncated>", <IsTruncated>{}</IsTruncated>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket), xml_escape(bucket),
xml_escape(&result.prefix), xml_escape(&result.prefix),
result.max_keys, result.max_keys,
@@ -86,7 +86,7 @@ pub fn list_objects_v2_xml(bucket: &str, result: &ListObjectsResult) -> String {
<KeyCount>{}</KeyCount>\ <KeyCount>{}</KeyCount>\
<IsTruncated>{}</IsTruncated>", <IsTruncated>{}</IsTruncated>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket), xml_escape(bucket),
xml_escape(&result.prefix), xml_escape(&result.prefix),
result.max_keys, result.max_keys,
@@ -152,7 +152,7 @@ pub fn initiate_multipart_xml(bucket: &str, key: &str, upload_id: &str) -> Strin
<UploadId>{}</UploadId>\ <UploadId>{}</UploadId>\
</InitiateMultipartUploadResult>", </InitiateMultipartUploadResult>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket), xml_escape(bucket),
xml_escape(key), xml_escape(key),
xml_escape(upload_id) xml_escape(upload_id)
@@ -168,7 +168,7 @@ pub fn complete_multipart_xml(bucket: &str, key: &str, etag: &str) -> String {
<ETag>\"{}\"</ETag>\ <ETag>\"{}\"</ETag>\
</CompleteMultipartUploadResult>", </CompleteMultipartUploadResult>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket), xml_escape(bucket),
xml_escape(key), xml_escape(key),
xml_escape(bucket), xml_escape(bucket),
@@ -186,7 +186,7 @@ pub fn list_multipart_uploads_xml(bucket: &str, uploads: &[MultipartUploadInfo])
<MaxUploads>1000</MaxUploads>\ <MaxUploads>1000</MaxUploads>\
<IsTruncated>false</IsTruncated>", <IsTruncated>false</IsTruncated>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket) xml_escape(bucket)
); );
@@ -195,8 +195,8 @@ pub fn list_multipart_uploads_xml(bucket: &str, uploads: &[MultipartUploadInfo])
"<Upload>\ "<Upload>\
<Key>{}</Key>\ <Key>{}</Key>\
<UploadId>{}</UploadId>\ <UploadId>{}</UploadId>\
<Initiator><ID>S3RVER</ID><DisplayName>S3RVER</DisplayName></Initiator>\ <Initiator><ID>STORAGE</ID><DisplayName>STORAGE</DisplayName></Initiator>\
<Owner><ID>S3RVER</ID><DisplayName>S3RVER</DisplayName></Owner>\ <Owner><ID>STORAGE</ID><DisplayName>STORAGE</DisplayName></Owner>\
<StorageClass>STANDARD</StorageClass>\ <StorageClass>STANDARD</StorageClass>\
<Initiated>{}</Initiated>\ <Initiated>{}</Initiated>\
</Upload>", </Upload>",

View File

@@ -12,9 +12,9 @@ import {
DeleteBucketPolicyCommand, DeleteBucketPolicyCommand,
} from '@aws-sdk/client-s3'; } from '@aws-sdk/client-s3';
import { Readable } from 'stream'; import { Readable } from 'stream';
import * as smarts3 from '../ts/index.js'; import * as smartstorage from '../ts/index.js';
let testSmarts3Instance: smarts3.Smarts3; let testSmartStorageInstance: smartstorage.SmartStorage;
let authClient: S3Client; let authClient: S3Client;
let wrongClient: S3Client; let wrongClient: S3Client;
@@ -35,8 +35,8 @@ async function streamToString(stream: Readable): Promise<string> {
// Server setup // Server setup
// ============================ // ============================
tap.test('should start S3 server with auth enabled', async () => { tap.test('should start storage server with auth enabled', async () => {
testSmarts3Instance = await smarts3.Smarts3.createAndStart({ testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { server: {
port: TEST_PORT, port: TEST_PORT,
silent: true, silent: true,
@@ -294,8 +294,8 @@ tap.test('authenticated: delete the bucket', async () => {
expect(response.$metadata.httpStatusCode).toEqual(204); expect(response.$metadata.httpStatusCode).toEqual(204);
}); });
tap.test('should stop the S3 server', async () => { tap.test('should stop the storage server', async () => {
await testSmarts3Instance.stop(); await testSmartStorageInstance.stop();
}); });
export default tap.start(); export default tap.start();

View File

@@ -1,9 +1,9 @@
import { expect, tap } from '@git.zone/tstest/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import { S3Client, CreateBucketCommand, ListBucketsCommand, PutObjectCommand, GetObjectCommand, DeleteObjectCommand, DeleteBucketCommand } from '@aws-sdk/client-s3'; import { S3Client, CreateBucketCommand, ListBucketsCommand, PutObjectCommand, GetObjectCommand, DeleteObjectCommand, DeleteBucketCommand } from '@aws-sdk/client-s3';
import { Readable } from 'stream'; import { Readable } from 'stream';
import * as smarts3 from '../ts/index.js'; import * as smartstorage from '../ts/index.js';
let testSmarts3Instance: smarts3.Smarts3; let testSmartStorageInstance: smartstorage.SmartStorage;
let s3Client: S3Client; let s3Client: S3Client;
// Helper to convert stream to string // Helper to convert stream to string
@@ -16,8 +16,8 @@ async function streamToString(stream: Readable): Promise<string> {
}); });
} }
tap.test('should start the S3 server and configure client', async () => { tap.test('should start the storage server and configure client', async () => {
testSmarts3Instance = await smarts3.Smarts3.createAndStart({ testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { server: {
port: 3337, port: 3337,
silent: true, silent: true,
@@ -27,7 +27,7 @@ tap.test('should start the S3 server and configure client', async () => {
}, },
}); });
const descriptor = await testSmarts3Instance.getS3Descriptor(); const descriptor = await testSmartStorageInstance.getStorageDescriptor();
s3Client = new S3Client({ s3Client = new S3Client({
endpoint: `http://${descriptor.endpoint}:${descriptor.port}`, endpoint: `http://${descriptor.endpoint}:${descriptor.port}`,
@@ -101,8 +101,8 @@ tap.test('should delete the bucket', async () => {
expect(response.$metadata.httpStatusCode).toEqual(204); expect(response.$metadata.httpStatusCode).toEqual(204);
}); });
tap.test('should stop the S3 server', async () => { tap.test('should stop the storage server', async () => {
await testSmarts3Instance.stop(); await testSmartStorageInstance.stop();
}); });
export default tap.start(); export default tap.start();

View File

@@ -14,9 +14,9 @@ import {
GetBucketPolicyCommand, GetBucketPolicyCommand,
DeleteBucketPolicyCommand, DeleteBucketPolicyCommand,
} from '@aws-sdk/client-s3'; } from '@aws-sdk/client-s3';
import * as smarts3 from '../ts/index.js'; import * as smartstorage from '../ts/index.js';
let testSmarts3Instance: smarts3.Smarts3; let testSmartStorageInstance: smartstorage.SmartStorage;
let authClient: S3Client; let authClient: S3Client;
const TEST_PORT = 3347; const TEST_PORT = 3347;
@@ -56,7 +56,7 @@ function denyStatement(action: string) {
// ============================ // ============================
tap.test('setup: start server, create bucket, upload object', async () => { tap.test('setup: start server, create bucket, upload object', async () => {
testSmarts3Instance = await smarts3.Smarts3.createAndStart({ testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { port: TEST_PORT, silent: true, region: 'us-east-1' }, server: { port: TEST_PORT, silent: true, region: 'us-east-1' },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
auth: { auth: {
@@ -275,7 +275,7 @@ tap.test('ListAllMyBuckets always requires auth → anonymous fetch to / returns
tap.test('Auth disabled mode → anonymous full access works', async () => { tap.test('Auth disabled mode → anonymous full access works', async () => {
// Start a second server with auth disabled // Start a second server with auth disabled
const noAuthInstance = await smarts3.Smarts3.createAndStart({ const noAuthInstance = await smartstorage.SmartStorage.createAndStart({
server: { port: 3348, silent: true, region: 'us-east-1' }, server: { port: 3348, silent: true, region: 'us-east-1' },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
auth: { enabled: false, credentials: [] }, auth: { enabled: false, credentials: [] },
@@ -329,7 +329,7 @@ tap.test('teardown: clean up and stop server', async () => {
} catch { } catch {
// May already be deleted // May already be deleted
} }
await testSmarts3Instance.stop(); await testSmartStorageInstance.stop();
}); });
export default tap.start(); export default tap.start();

View File

@@ -7,9 +7,9 @@ import {
GetBucketPolicyCommand, GetBucketPolicyCommand,
DeleteBucketPolicyCommand, DeleteBucketPolicyCommand,
} from '@aws-sdk/client-s3'; } from '@aws-sdk/client-s3';
import * as smarts3 from '../ts/index.js'; import * as smartstorage from '../ts/index.js';
let testSmarts3Instance: smarts3.Smarts3; let testSmartStorageInstance: smartstorage.SmartStorage;
let authClient: S3Client; let authClient: S3Client;
const TEST_PORT = 3345; const TEST_PORT = 3345;
@@ -33,8 +33,8 @@ const validStatement = {
// Server setup // Server setup
// ============================ // ============================
tap.test('setup: start S3 server with auth enabled', async () => { tap.test('setup: start storage server with auth enabled', async () => {
testSmarts3Instance = await smarts3.Smarts3.createAndStart({ testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { port: TEST_PORT, silent: true, region: 'us-east-1' }, server: { port: TEST_PORT, silent: true, region: 'us-east-1' },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
auth: { auth: {
@@ -246,7 +246,7 @@ tap.test('Bucket deletion cleans up associated policy', async () => {
tap.test('teardown: delete bucket and stop server', async () => { tap.test('teardown: delete bucket and stop server', async () => {
await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET })); await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET }));
await testSmarts3Instance.stop(); await testSmartStorageInstance.stop();
}); });
export default tap.start(); export default tap.start();

View File

@@ -10,9 +10,9 @@ import {
DeleteBucketPolicyCommand, DeleteBucketPolicyCommand,
} from '@aws-sdk/client-s3'; } from '@aws-sdk/client-s3';
import { Readable } from 'stream'; import { Readable } from 'stream';
import * as smarts3 from '../ts/index.js'; import * as smartstorage from '../ts/index.js';
let testSmarts3Instance: smarts3.Smarts3; let testSmartStorageInstance: smartstorage.SmartStorage;
let authClient: S3Client; let authClient: S3Client;
const TEST_PORT = 3346; const TEST_PORT = 3346;
@@ -48,7 +48,7 @@ async function clearPolicy() {
// ============================ // ============================
tap.test('setup: start server, create bucket, upload object', async () => { tap.test('setup: start server, create bucket, upload object', async () => {
testSmarts3Instance = await smarts3.Smarts3.createAndStart({ testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { port: TEST_PORT, silent: true, region: 'us-east-1' }, server: { port: TEST_PORT, silent: true, region: 'us-east-1' },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
auth: { auth: {
@@ -511,7 +511,7 @@ tap.test('Policy allows s3:ListBucket → anonymous GET bucket (list objects) su
tap.test('teardown: clean up and stop server', async () => { tap.test('teardown: clean up and stop server', async () => {
await authClient.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: 'test-obj.txt' })); await authClient.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: 'test-obj.txt' }));
await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET })); await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET }));
await testSmarts3Instance.stop(); await testSmartStorageInstance.stop();
}); });
export default tap.start(); export default tap.start();

View File

@@ -1,12 +1,12 @@
import { expect, tap } from '@git.zone/tstest/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
import * as smarts3 from '../ts/index.js'; import * as smartstorage from '../ts/index.js';
let testSmarts3Instance: smarts3.Smarts3; let testSmartStorageInstance: smartstorage.SmartStorage;
tap.test('should create a smarts3 instance and run it', async (toolsArg) => { tap.test('should create a smartstorage instance and run it', async (toolsArg) => {
testSmarts3Instance = await smarts3.Smarts3.createAndStart({ testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { server: {
port: 3333, port: 3333,
}, },
@@ -20,7 +20,7 @@ tap.test('should create a smarts3 instance and run it', async (toolsArg) => {
tap.test('should be able to access buckets', async () => { tap.test('should be able to access buckets', async () => {
const smartbucketInstance = new plugins.smartbucket.SmartBucket( const smartbucketInstance = new plugins.smartbucket.SmartBucket(
await testSmarts3Instance.getS3Descriptor(), await testSmartStorageInstance.getStorageDescriptor(),
); );
const bucket = await smartbucketInstance.createBucket('testbucket'); const bucket = await smartbucketInstance.createBucket('testbucket');
const baseDirectory = await bucket.getBaseDirectory(); const baseDirectory = await bucket.getBaseDirectory();
@@ -31,7 +31,7 @@ tap.test('should be able to access buckets', async () => {
}); });
tap.test('should stop the instance', async () => { tap.test('should stop the instance', async () => {
await testSmarts3Instance.stop(); await testSmartStorageInstance.stop();
}); });
tap.start(); tap.start();

View File

@@ -2,7 +2,7 @@
* autocreated commitinfo by @push.rocks/commitinfo * autocreated commitinfo by @push.rocks/commitinfo
*/ */
export const commitinfo = { export const commitinfo = {
name: '@push.rocks/smarts3', name: '@push.rocks/smartstorage',
version: '5.3.0', version: '6.3.0',
description: 'A Node.js TypeScript package to create a local S3 endpoint for simulating AWS S3 operations using mapped local directories for development and testing purposes.' description: 'A Node.js TypeScript package to create a local S3-compatible storage server using mapped local directories for development and testing purposes.'
} }

View File

@@ -70,9 +70,39 @@ export interface IStorageConfig {
} }
/** /**
* Complete smarts3 configuration * Erasure coding configuration
*/ */
export interface ISmarts3Config { export interface IErasureConfig {
dataShards?: number;
parityShards?: number;
chunkSizeBytes?: number;
}
/**
* Drive configuration for multi-drive support
*/
export interface IDriveConfig {
paths: string[];
}
/**
* Cluster configuration for distributed mode
*/
export interface IClusterConfig {
enabled: boolean;
nodeId?: string;
quicPort?: number;
seedNodes?: string[];
erasure?: IErasureConfig;
drives?: IDriveConfig;
heartbeatIntervalMs?: number;
heartbeatTimeoutMs?: number;
}
/**
* Complete smartstorage configuration
*/
export interface ISmartStorageConfig {
server?: IServerConfig; server?: IServerConfig;
storage?: IStorageConfig; storage?: IStorageConfig;
auth?: IAuthConfig; auth?: IAuthConfig;
@@ -80,12 +110,13 @@ export interface ISmarts3Config {
logging?: ILoggingConfig; logging?: ILoggingConfig;
limits?: ILimitsConfig; limits?: ILimitsConfig;
multipart?: IMultipartConfig; multipart?: IMultipartConfig;
cluster?: IClusterConfig;
} }
/** /**
* Default configuration values * Default configuration values
*/ */
const DEFAULT_CONFIG: ISmarts3Config = { const DEFAULT_CONFIG: ISmartStorageConfig = {
server: { server: {
port: 3000, port: 3000,
address: '0.0.0.0', address: '0.0.0.0',
@@ -100,8 +131,8 @@ const DEFAULT_CONFIG: ISmarts3Config = {
enabled: false, enabled: false,
credentials: [ credentials: [
{ {
accessKeyId: 'S3RVER', accessKeyId: 'STORAGE',
secretAccessKey: 'S3RVER', secretAccessKey: 'STORAGE',
}, },
], ],
}, },
@@ -133,7 +164,7 @@ const DEFAULT_CONFIG: ISmarts3Config = {
/** /**
* Merge user config with defaults (deep merge) * Merge user config with defaults (deep merge)
*/ */
function mergeConfig(userConfig: ISmarts3Config): Required<ISmarts3Config> { function mergeConfig(userConfig: ISmartStorageConfig): Required<ISmartStorageConfig> {
return { return {
server: { server: {
...DEFAULT_CONFIG.server!, ...DEFAULT_CONFIG.server!,
@@ -163,41 +194,40 @@ function mergeConfig(userConfig: ISmarts3Config): Required<ISmarts3Config> {
...DEFAULT_CONFIG.multipart!, ...DEFAULT_CONFIG.multipart!,
...(userConfig.multipart || {}), ...(userConfig.multipart || {}),
}, },
}; ...(userConfig.cluster ? { cluster: userConfig.cluster } : {}),
} as Required<ISmartStorageConfig>;
} }
/** /**
* IPC command type map for RustBridge * IPC command type map for RustBridge
*/ */
type TRustS3Commands = { type TRustStorageCommands = {
start: { params: { config: Required<ISmarts3Config> }; result: {} }; start: { params: { config: Required<ISmartStorageConfig> }; result: {} };
stop: { params: {}; result: {} }; stop: { params: {}; result: {} };
createBucket: { params: { name: string }; result: {} }; createBucket: { params: { name: string }; result: {} };
}; };
/** /**
* Main Smarts3 class - production-ready S3-compatible server * Main SmartStorage class - production-ready S3-compatible storage server
*/ */
export class Smarts3 { export class SmartStorage {
// STATIC // STATIC
public static async createAndStart(configArg: ISmarts3Config = {}) { public static async createAndStart(configArg: ISmartStorageConfig = {}) {
const smartS3Instance = new Smarts3(configArg); const smartStorageInstance = new SmartStorage(configArg);
await smartS3Instance.start(); await smartStorageInstance.start();
return smartS3Instance; return smartStorageInstance;
} }
// INSTANCE // INSTANCE
public config: Required<ISmarts3Config>; public config: Required<ISmartStorageConfig>;
private bridge: InstanceType<typeof plugins.RustBridge<TRustS3Commands>>; private bridge: InstanceType<typeof plugins.RustBridge<TRustStorageCommands>>;
constructor(configArg: ISmarts3Config = {}) { constructor(configArg: ISmartStorageConfig = {}) {
this.config = mergeConfig(configArg); this.config = mergeConfig(configArg);
this.bridge = new plugins.RustBridge<TRustS3Commands>({ this.bridge = new plugins.RustBridge<TRustStorageCommands>({
binaryName: 'rusts3', binaryName: 'ruststorage',
localPaths: [ localPaths: [
plugins.path.join(paths.packageDir, 'dist_rust', 'rusts3'), plugins.path.join(paths.packageDir, 'dist_rust', 'ruststorage'),
plugins.path.join(paths.packageDir, 'rust', 'target', 'release', 'rusts3'),
plugins.path.join(paths.packageDir, 'rust', 'target', 'debug', 'rusts3'),
], ],
readyTimeoutMs: 30000, readyTimeoutMs: 30000,
requestTimeoutMs: 300000, requestTimeoutMs: 300000,
@@ -207,21 +237,21 @@ export class Smarts3 {
public async start() { public async start() {
const spawned = await this.bridge.spawn(); const spawned = await this.bridge.spawn();
if (!spawned) { if (!spawned) {
throw new Error('Failed to spawn rusts3 binary. Make sure it is compiled (pnpm build).'); throw new Error('Failed to spawn ruststorage binary. Make sure it is compiled (pnpm build).');
} }
await this.bridge.sendCommand('start', { config: this.config }); await this.bridge.sendCommand('start', { config: this.config });
if (!this.config.server.silent) { if (!this.config.server.silent) {
console.log('s3 server is running'); console.log('storage server is running');
} }
} }
public async getS3Descriptor( public async getStorageDescriptor(
optionsArg?: Partial<plugins.tsclass.storage.IS3Descriptor>, optionsArg?: Partial<plugins.tsclass.storage.IS3Descriptor>,
): Promise<plugins.tsclass.storage.IS3Descriptor> { ): Promise<plugins.tsclass.storage.IS3Descriptor> {
const cred = this.config.auth.credentials[0] || { const cred = this.config.auth.credentials[0] || {
accessKeyId: 'S3RVER', accessKeyId: 'STORAGE',
secretAccessKey: 'S3RVER', secretAccessKey: 'STORAGE',
}; };
const descriptor: plugins.tsclass.storage.IS3Descriptor = { const descriptor: plugins.tsclass.storage.IS3Descriptor = {