Compare commits

..

13 Commits

Author SHA1 Message Date
a009d990d0 v6.3.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-21 22:04:36 +00:00
08d545f5db feat(readme): document distributed cluster mode, erasure coding, and QUIC-based architecture 2026-03-21 22:04:36 +00:00
a0a282c712 v6.2.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-21 22:00:41 +00:00
3eb0045676 feat(cluster): add shard healing, drive health heartbeats, and clustered policy directory support 2026-03-21 22:00:41 +00:00
639eb5d36c v6.1.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-21 21:50:42 +00:00
d12d321079 feat(cluster): add clustered storage backend with QUIC transport, erasure coding, and shard management 2026-03-21 21:50:42 +00:00
4fcd05d3c6 v6.0.1
Some checks failed
Default (tags) / security (push) Successful in 34s
Default (tags) / test (push) Failing after 35s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-03-14 23:46:12 +00:00
503e25ff98 fix(rust-bridge): update smartrust and limit RustBridge binary lookup to dist_rust 2026-03-14 23:46:12 +00:00
bba0855218 BREAKING CHANGE(core): rebrand from smarts3 to smartstorage
Some checks failed
Default (tags) / security (push) Successful in 43s
Default (tags) / test (push) Failing after 26s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
- Package renamed from @push.rocks/smarts3 to @push.rocks/smartstorage
- Class: Smarts3 → SmartStorage, Interface: ISmarts3Config → ISmartStorageConfig
- Method: getS3Descriptor → getStorageDescriptor
- Rust binary: rusts3 → ruststorage
- Rust types: S3Error→StorageError, S3Action→StorageAction, S3Config→SmartStorageConfig, S3Server→StorageServer
- On-disk file extension: ._S3_object → ._storage_object
- Default credentials: S3RVER → STORAGE
- All internal S3 branding removed; AWS S3 protocol compatibility fully maintained
2026-03-14 15:20:30 +00:00
d437ffc226 v5.3.0
Some checks failed
Default (tags) / security (push) Successful in 37s
Default (tags) / test (push) Failing after 26s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-02-17 16:50:04 +00:00
e36758f183 feat(auth): add AWS SigV4 authentication and bucket policy support 2026-02-17 16:50:04 +00:00
adf45dce2d v5.2.0
Some checks failed
Default (tags) / security (push) Successful in 40s
Default (tags) / test (push) Failing after 27s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2026-02-17 16:28:50 +00:00
eb232b6e8e feat(auth,policy): add AWS SigV4 authentication and S3 bucket policy support 2026-02-17 16:28:50 +00:00
40 changed files with 11344 additions and 3824 deletions

View File

@@ -1,5 +1,67 @@
# Changelog # Changelog
## 2026-03-21 - 6.3.0 - feat(readme)
document distributed cluster mode, erasure coding, and QUIC-based architecture
- Expand README overview and feature matrix to highlight clustering, multi-drive awareness, and distributed storage capabilities
- Add standalone and cluster mode usage examples plus cluster configuration options
- Document clustering internals including erasure coding, quorum behavior, QUIC transport, self-healing, and on-disk layout
## 2026-03-21 - 6.2.0 - feat(cluster)
add shard healing, drive health heartbeats, and clustered policy directory support
- implements manifest-based healing that scans affected shards on offline nodes, reconstructs data with erasure coding, and rewrites recovered shards to local storage
- includes drive status reporting in membership heartbeats by wiring DriveManager health checks into cluster heartbeat messages
- adds clustered policies directory initialization and exposes policy storage paths from the distributed coordinator
- extends distributed coordinator support for remote shard read and delete operations plus multipart upload session metadata
## 2026-03-21 - 6.1.0 - feat(cluster)
add clustered storage backend with QUIC transport, erasure coding, and shard management
- introduces cluster configuration in Rust and TypeScript, including seed nodes, drive paths, heartbeat settings, and erasure coding options
- adds core cluster modules for membership, topology state, object manifests, placement, shard storage, drive management, healing scaffolding, and inter-node protocol handling
- adds QUIC-based transport for cluster communication and integrates a distributed storage backend alongside the existing standalone FileStore
- updates the server startup path to initialize standalone or clustered storage based on configuration and exposes a basic clusterStatus management endpoint
- refreshes build and dependency versions to support the new clustered storage implementation
## 2026-03-14 - 6.0.1 - fix(rust-bridge)
update smartrust and limit RustBridge binary lookup to dist_rust
- Bumps @push.rocks/smartrust from ^1.0.0 to ^1.3.2.
- Removes rust target debug and release fallback paths from RustBridge local binary resolution, relying on dist_rust/ruststorage.
## 2026-03-14 - 6.0.0 - BREAKING CHANGE(core)
Rebrand from smarts3 to smartstorage
- Package renamed from @push.rocks/smarts3 to @push.rocks/smartstorage
- Class renamed from Smarts3 to SmartStorage (no backward-compatible re-export)
- Interface renamed from ISmarts3Config to ISmartStorageConfig
- Method renamed from getS3Descriptor to getStorageDescriptor
- Rust binary renamed from rusts3 to ruststorage
- Rust types renamed: S3Error→StorageError, S3Action→StorageAction, S3Config→SmartStorageConfig, S3Server→StorageServer
- On-disk file extension changed from ._S3_object to ._storage_object (BREAKING for existing stored data)
- Default credentials changed from S3RVER to STORAGE
- All internal S3 branding removed; AWS S3 protocol compatibility (IAM actions, ARNs, SigV4) fully maintained
## 2026-02-17 - 5.3.0 - feat(auth)
add AWS SigV4 authentication and bucket policy support
- Implement AWS SigV4 full verification (constant-time comparison, 15-minute clock skew enforcement) and expose default signing region (server.region = 'us-east-1').
- Add IAM-style bucket policy engine with Put/Get/Delete policy APIs (GetBucketPolicy/PutBucketPolicy/DeleteBucketPolicy), wildcard action/resource matching, Allow/Deny evaluation, and on-disk persistence under .policies/{bucket}.policy.json.
- Documentation and README expanded with policy usage, examples, API table entries, and notes about policy CRUD and behavior for anonymous/authenticated requests.
- Rust code refactors: simplify storage/server result structs and multipart handling (removed several unused size/key/bucket fields), remove S3Error::to_response and error_xml helpers, and other internal cleanup to support new auth/policy features.
## 2026-02-17 - 5.2.0 - feat(auth,policy)
add AWS SigV4 authentication and S3 bucket policy support
- Implemented real AWS SigV4 verification (HMAC-SHA256), including x-amz-date handling, clock skew enforcement and constant-time signature comparison
- Added bucket policy model, validator and evaluation engine (Deny > Allow > NoOpinion) with a PolicyStore (RwLock cache + disk-backed .policies/*.policy.json)
- Integrated action resolution and auth+policy pipeline into the HTTP server: authorization checks run per-request, anonymous requests are denied by default, ListAllMyBuckets requires authentication
- Added bucket policy CRUD handlers via ?policy query parameter (GET/PUT/DELETE) and cleanup of policies on bucket deletion
- Storage and config updates: created .policies dir and policy path helpers; default region added to server config (TS + Rust)
- Added comprehensive tests for auth and policy behavior (policy CRUD, evaluation, per-action enforcement, auth integration)
- Updated Rust dependencies and Cargo.toml/Cargo.lock to include hmac, sha2, hex, subtle, cpufeatures
## 2026-02-13 - 5.1.1 - fix(smarts3) ## 2026-02-13 - 5.1.1 - fix(smarts3)
replace TypeScript server with Rust-powered core and IPC bridge replace TypeScript server with Rust-powered core and IPC bridge

View File

@@ -10,14 +10,15 @@
"module": { "module": {
"githost": "code.foss.global", "githost": "code.foss.global",
"gitscope": "push.rocks", "gitscope": "push.rocks",
"gitrepo": "smarts3", "gitrepo": "smartstorage",
"description": "A Node.js TypeScript package to create a local S3 endpoint for simulating AWS S3 operations using mapped local directories for development and testing purposes.", "description": "A Node.js TypeScript package to create a local S3-compatible storage server using mapped local directories for development and testing purposes.",
"npmPackagename": "@push.rocks/smarts3", "npmPackagename": "@push.rocks/smartstorage",
"license": "MIT", "license": "MIT",
"projectDomain": "push.rocks", "projectDomain": "push.rocks",
"keywords": [ "keywords": [
"S3 Mock Server", "smartstorage",
"Local S3", "S3 Compatible",
"Local Storage Server",
"Node.js", "Node.js",
"TypeScript", "TypeScript",
"Local Development", "Local Development",
@@ -26,8 +27,8 @@
"File Storage", "File Storage",
"AWS S3 Compatibility", "AWS S3 Compatibility",
"Development Tool", "Development Tool",
"S3 Endpoint", "Storage Endpoint",
"S3 Simulation", "Storage Simulation",
"Bucket Management", "Bucket Management",
"File Upload", "File Upload",
"CI/CD Integration", "CI/CD Integration",

View File

@@ -1,27 +1,28 @@
{ {
"name": "@push.rocks/smarts3", "name": "@push.rocks/smartstorage",
"version": "5.1.1", "version": "6.3.0",
"private": false, "private": false,
"description": "A Node.js TypeScript package to create a local S3 endpoint for simulating AWS S3 operations using mapped local directories for development and testing purposes.", "description": "A Node.js TypeScript package to create a local S3-compatible storage server using mapped local directories for development and testing purposes.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",
"typings": "dist_ts/index.d.ts", "typings": "dist_ts/index.d.ts",
"type": "module", "type": "module",
"author": "Lossless GmbH", "author": "Lossless GmbH",
"license": "MIT", "license": "MIT",
"scripts": { "scripts": {
"test:before": "(tsrust)",
"test": "(tstest test/ --web --verbose --logfile --timeout 60)", "test": "(tstest test/ --web --verbose --logfile --timeout 60)",
"build": "(tsrust && tsbuild --web --allowimplicitany)", "build": "(tsrust && tsbuild tsfolders --allowimplicitany)",
"buildDocs": "tsdoc" "buildDocs": "tsdoc"
}, },
"devDependencies": { "devDependencies": {
"@aws-sdk/client-s3": "^3.937.0", "@aws-sdk/client-s3": "^3.1014.0",
"@git.zone/tsbuild": "^3.1.0", "@git.zone/tsbuild": "^4.3.0",
"@git.zone/tsbundle": "^2.5.2", "@git.zone/tsbundle": "^2.9.1",
"@git.zone/tsrun": "^2.0.0", "@git.zone/tsrun": "^2.0.1",
"@git.zone/tstest": "^3.1.0",
"@push.rocks/smartbucket": "^4.3.0",
"@git.zone/tsrust": "^1.3.0", "@git.zone/tsrust": "^1.3.0",
"@types/node": "^22.9.0" "@git.zone/tstest": "^3.5.0",
"@push.rocks/smartbucket": "^4.5.1",
"@types/node": "^25.5.0"
}, },
"browserslist": [ "browserslist": [
"last 1 chrome versions" "last 1 chrome versions"
@@ -41,12 +42,13 @@
], ],
"dependencies": { "dependencies": {
"@push.rocks/smartpath": "^6.0.0", "@push.rocks/smartpath": "^6.0.0",
"@push.rocks/smartrust": "^1.0.0", "@push.rocks/smartrust": "^1.3.2",
"@tsclass/tsclass": "^9.3.0" "@tsclass/tsclass": "^9.5.0"
}, },
"keywords": [ "keywords": [
"S3 Mock Server", "smartstorage",
"Local S3", "S3 Compatible",
"Local Storage Server",
"Node.js", "Node.js",
"TypeScript", "TypeScript",
"Local Development", "Local Development",
@@ -55,20 +57,20 @@
"File Storage", "File Storage",
"AWS S3 Compatibility", "AWS S3 Compatibility",
"Development Tool", "Development Tool",
"S3 Endpoint", "Storage Endpoint",
"S3 Simulation", "Storage Simulation",
"Bucket Management", "Bucket Management",
"File Upload", "File Upload",
"CI/CD Integration", "CI/CD Integration",
"Developer Onboarding" "Developer Onboarding"
], ],
"homepage": "https://code.foss.global/push.rocks/smarts3#readme", "homepage": "https://code.foss.global/push.rocks/smartstorage#readme",
"repository": { "repository": {
"type": "git", "type": "git",
"url": "https://code.foss.global/push.rocks/smarts3.git" "url": "ssh://git@code.foss.global:29419/push.rocks/smartstorage.git"
}, },
"bugs": { "bugs": {
"url": "https://code.foss.global/push.rocks/smarts3/issues" "url": "https://code.foss.global/push.rocks/smartstorage/issues"
}, },
"packageManager": "pnpm@10.14.0+sha512.ad27a79641b49c3e481a16a805baa71817a04bbe06a38d17e60e2eaee83f6a146c6a688125f5792e48dd5ba30e7da52a5cda4c3992b9ccf333f9ce223af84748", "packageManager": "pnpm@10.14.0+sha512.ad27a79641b49c3e481a16a805baa71817a04bbe06a38d17e60e2eaee83f6a146c6a688125f5792e48dd5ba30e7da52a5cda4c3992b9ccf333f9ce223af84748",
"pnpm": { "pnpm": {

6243
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
# Production-Readiness Plan for smarts3 # Production-Readiness Plan for smartstorage
**Goal:** Make smarts3 production-ready as a MinIO alternative for use cases where: **Goal:** Make smartstorage production-ready as a MinIO alternative for use cases where:
- Running MinIO is out of scope - Running MinIO is out of scope
- You have a program written for S3 and want to use the local filesystem - You have a program written for S3 and want to use the local filesystem
- You need a lightweight, zero-dependency S3-compatible server - You need a lightweight, zero-dependency S3-compatible server
@@ -31,7 +31,7 @@
### 1. Multipart Upload Support 🚀 **HIGHEST PRIORITY** ### 1. Multipart Upload Support 🚀 **HIGHEST PRIORITY**
**Why:** Essential for uploading files >5MB efficiently. Without this, smarts3 can't handle real-world production workloads. **Why:** Essential for uploading files >5MB efficiently. Without this, smartstorage can't handle real-world production workloads.
**Implementation Required:** **Implementation Required:**
- `POST /:bucket/:key?uploads` - CreateMultipartUpload - `POST /:bucket/:key?uploads` - CreateMultipartUpload
@@ -46,13 +46,13 @@
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/controllers/multipart.controller.ts` (new) - `ts/controllers/multipart.controller.ts` (new)
- `ts/classes/filesystem-store.ts` (add multipart methods) - `ts/classes/filesystem-store.ts` (add multipart methods)
- `ts/classes/smarts3-server.ts` (add multipart routes) - `ts/classes/smartstorage-server.ts` (add multipart routes)
--- ---
### 2. Configurable Authentication 🔐 ### 2. Configurable Authentication 🔐
**Why:** Currently hardcoded credentials ('S3RVER'/'S3RVER'). Production needs custom credentials. **Why:** Currently hardcoded credentials ('STORAGE'/'STORAGE'). Production needs custom credentials.
**Implementation Required:** **Implementation Required:**
- Support custom access keys and secrets via configuration - Support custom access keys and secrets via configuration
@@ -75,7 +75,7 @@ interface IAuthConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/auth-middleware.ts` (new) - `ts/classes/auth-middleware.ts` (new)
- `ts/classes/signature-validator.ts` (new) - `ts/classes/signature-validator.ts` (new)
- `ts/classes/smarts3-server.ts` (integrate auth middleware) - `ts/classes/smartstorage-server.ts` (integrate auth middleware)
- `ts/index.ts` (add auth config options) - `ts/index.ts` (add auth config options)
--- ---
@@ -105,7 +105,7 @@ interface ICorsConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/cors-middleware.ts` (new) - `ts/classes/cors-middleware.ts` (new)
- `ts/classes/smarts3-server.ts` (integrate CORS middleware) - `ts/classes/smartstorage-server.ts` (integrate CORS middleware)
- `ts/index.ts` (add CORS config options) - `ts/index.ts` (add CORS config options)
--- ---
@@ -131,7 +131,7 @@ interface ISslConfig {
``` ```
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/smarts3-server.ts` (add HTTPS server creation) - `ts/classes/smartstorage-server.ts` (add HTTPS server creation)
- `ts/index.ts` (add SSL config options) - `ts/index.ts` (add SSL config options)
--- ---
@@ -147,7 +147,7 @@ interface ISslConfig {
- Sensible production defaults - Sensible production defaults
- Example configurations for common use cases - Example configurations for common use cases
**Configuration File Example (`smarts3.config.json`):** **Configuration File Example (`smartstorage.config.json`):**
```json ```json
{ {
"server": { "server": {
@@ -220,7 +220,7 @@ interface ISslConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/logger.ts` (new - use @push.rocks/smartlog?) - `ts/classes/logger.ts` (new - use @push.rocks/smartlog?)
- `ts/classes/access-logger-middleware.ts` (new) - `ts/classes/access-logger-middleware.ts` (new)
- `ts/classes/smarts3-server.ts` (replace console.log with logger) - `ts/classes/smartstorage-server.ts` (replace console.log with logger)
- All controller files (use structured logging) - All controller files (use structured logging)
--- ---
@@ -238,7 +238,7 @@ interface ISslConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/controllers/health.controller.ts` (new) - `ts/controllers/health.controller.ts` (new)
- `ts/classes/metrics-collector.ts` (new) - `ts/classes/metrics-collector.ts` (new)
- `ts/classes/smarts3-server.ts` (add health routes) - `ts/classes/smartstorage-server.ts` (add health routes)
--- ---
@@ -266,7 +266,7 @@ interface ISslConfig {
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/validation-middleware.ts` (new) - `ts/classes/validation-middleware.ts` (new)
- `ts/utils/validators.ts` (new) - `ts/utils/validators.ts` (new)
- `ts/classes/smarts3-server.ts` (integrate validation middleware) - `ts/classes/smartstorage-server.ts` (integrate validation middleware)
--- ---
@@ -291,7 +291,7 @@ interface ISslConfig {
- SIGTERM/SIGINT handling - SIGTERM/SIGINT handling
**Files to Create/Modify:** **Files to Create/Modify:**
- `ts/classes/smarts3-server.ts` (add graceful shutdown logic) - `ts/classes/smartstorage-server.ts` (add graceful shutdown logic)
- `ts/index.ts` (add signal handlers) - `ts/index.ts` (add signal handlers)
--- ---
@@ -336,7 +336,7 @@ interface ISslConfig {
4. ✅ Production configuration system 4. ✅ Production configuration system
5. ✅ Production logging 5. ✅ Production logging
**Outcome:** smarts3 can handle real production workloads **Outcome:** smartstorage can handle real production workloads
--- ---
@@ -350,7 +350,7 @@ interface ISslConfig {
9. ✅ Graceful shutdown 9. ✅ Graceful shutdown
10. ✅ Batch operations 10. ✅ Batch operations
**Outcome:** smarts3 is operationally mature **Outcome:** smartstorage is operationally mature
--- ---
@@ -363,7 +363,7 @@ interface ISslConfig {
13. ✅ Comprehensive test suite 13. ✅ Comprehensive test suite
14. ✅ Documentation updates 14. ✅ Documentation updates
**Outcome:** smarts3 has broad S3 API compatibility **Outcome:** smartstorage has broad S3 API compatibility
--- ---
@@ -375,7 +375,7 @@ interface ISslConfig {
16. ✅ Performance optimization 16. ✅ Performance optimization
17. ✅ Advanced features based on user feedback 17. ✅ Advanced features based on user feedback
**Outcome:** smarts3 is a complete MinIO alternative **Outcome:** smartstorage is a complete MinIO alternative
--- ---
@@ -392,7 +392,7 @@ interface ISslConfig {
## 🎯 Target Use Cases ## 🎯 Target Use Cases
**With this plan implemented, smarts3 will be a solid MinIO alternative for:** **With this plan implemented, smartstorage will be a solid MinIO alternative for:**
**Local S3 development** - Fast, simple, no Docker required **Local S3 development** - Fast, simple, no Docker required
**Testing S3 integrations** - Reliable, repeatable tests **Testing S3 integrations** - Reliable, repeatable tests

View File

@@ -1,13 +1,14 @@
# Project Hints for smarts3 # Project Hints for smartstorage
## Current State (v6.0.0-dev) ## Current State (v6.0.0)
- **Rust-powered S3 server** via `@push.rocks/smartrust` IPC bridge - **Rust-powered S3-compatible storage server** via `@push.rocks/smartrust` IPC bridge
- High-performance: streaming I/O, zero-copy, backpressure, range seek - High-performance: streaming I/O, zero-copy, backpressure, range seek
- TypeScript is thin IPC wrapper; all HTTP/storage/routing in Rust binary `rusts3` - TypeScript is thin IPC wrapper; all HTTP/storage/routing in Rust binary `ruststorage`
- Full S3 compatibility: PUT, GET, HEAD, DELETE for objects and buckets - Full S3 compatibility: PUT, GET, HEAD, DELETE for objects and buckets
- Multipart upload support (streaming, no OOM) - Multipart upload support (streaming, no OOM)
- Authentication (AWS v2/v4 signature key extraction) - **Real AWS SigV4 authentication** (cryptographic signature verification)
- **Bucket policies** (AWS/MinIO-compatible JSON policies, public access support)
- CORS support - CORS support
- ListBuckets, ListObjects (v1/v2), CopyObject - ListBuckets, ListObjects (v1/v2), CopyObject
@@ -15,35 +16,39 @@
### Rust Binary (`rust/src/`) ### Rust Binary (`rust/src/`)
- `main.rs` - Clap CLI, management mode entry - `main.rs` - Clap CLI, management mode entry
- `config.rs` - Serde config structs matching TS interfaces - `config.rs` - Serde config structs matching TS interfaces (includes `region`)
- `management.rs` - IPC loop (newline-delimited JSON over stdin/stdout) - `management.rs` - IPC loop (newline-delimited JSON over stdin/stdout)
- `server.rs` - hyper 1.x HTTP server, routing, CORS, auth, all S3 handlers - `server.rs` - hyper 1.x HTTP server, routing, CORS, auth+policy pipeline, all S3-compatible handlers
- `storage.rs` - FileStore: filesystem-backed storage, multipart manager - `storage.rs` - FileStore: filesystem-backed storage, multipart manager, `.policies/` dir
- `xml_response.rs` - S3 XML response builders - `xml_response.rs` - S3-compatible XML response builders
- `s3_error.rs` - S3 error codes with HTTP status mapping - `error.rs` - StorageError codes with HTTP status mapping
- `auth.rs` - AWS SigV4 signature verification (HMAC-SHA256, clock skew, constant-time compare)
- `action.rs` - StorageAction enum + request-to-IAM-action resolver + RequestContext
- `policy.rs` - BucketPolicy model, evaluation engine (Deny > Allow > NoOpinion), PolicyStore (RwLock cache + disk)
### TypeScript Bridge (`ts/`) ### TypeScript Bridge (`ts/`)
- `ts/index.ts` - Smarts3 class with RustBridge<TRustS3Commands> - `ts/index.ts` - SmartStorage class with RustBridge<TRustStorageCommands>
- `ts/plugins.ts` - path, smartpath, RustBridge, tsclass - `ts/plugins.ts` - path, smartpath, RustBridge, tsclass
- `ts/paths.ts` - packageDir, bucketsDir defaults - `ts/paths.ts` - packageDir, bucketsDir defaults
### IPC Commands ### IPC Commands
| Command | Params | Action | | Command | Params | Action |
|---------|--------|--------| |---------|--------|--------|
| `start` | `{ config: ISmarts3Config }` | Init storage + HTTP server | | `start` | `{ config: ISmartStorageConfig }` | Init storage + HTTP server |
| `stop` | `{}` | Graceful shutdown | | `stop` | `{}` | Graceful shutdown |
| `createBucket` | `{ name: string }` | Create bucket directory | | `createBucket` | `{ name: string }` | Create bucket directory |
### Storage Layout (backward-compatible) ### Storage Layout
- Objects: `{root}/{bucket}/{key}._S3_object` - Objects: `{root}/{bucket}/{key}._storage_object`
- Metadata: `{root}/{bucket}/{key}._S3_object.metadata.json` - Metadata: `{root}/{bucket}/{key}._storage_object.metadata.json`
- MD5: `{root}/{bucket}/{key}._S3_object.md5` - MD5: `{root}/{bucket}/{key}._storage_object.md5`
- Multipart: `{root}/.multipart/{upload_id}/part-{N}` - Multipart: `{root}/.multipart/{upload_id}/part-{N}`
- Policies: `{root}/.policies/{bucket}.policy.json`
## Build ## Build
- `pnpm build` runs `tsrust && tsbuild --web --allowimplicitany` - `pnpm build` runs `tsrust && tsbuild --web --allowimplicitany`
- `tsrust` compiles Rust to `dist_rust/rusts3` - `tsrust` compiles Rust to `dist_rust/ruststorage`
- Targets: linux_amd64, linux_arm64 (configured in npmextra.json) - Targets: linux_amd64, linux_arm64 (configured in npmextra.json)
## Dependencies ## Dependencies
@@ -55,6 +60,10 @@
## Testing ## Testing
- `test/test.aws-sdk.node.ts` - AWS SDK v3 compatibility (10 tests) - `test/test.aws-sdk.node.ts` - AWS SDK v3 compatibility (10 tests, auth disabled, port 3337)
- `test/test.auth.node.ts` - Auth + bucket policy integration (20 tests, auth enabled, port 3344)
- `test/test.policy-crud.node.ts` - Policy API CRUD + validation edge cases (17 tests, port 3345)
- `test/test.policy-eval.node.ts` - Policy evaluation: principals, actions, resources, deny-vs-allow (22 tests, port 3346)
- `test/test.policy-actions.node.ts` - Per-action policy enforcement (15 tests, port 3347)
- `test/test.ts` - SmartBucket integration (3 tests) - `test/test.ts` - SmartBucket integration (3 tests)
- Run: `pnpm test` or `tstest test/test.aws-sdk.node.ts --verbose` - Run: `pnpm test` or `tstest test/test.aws-sdk.node.ts --verbose`

376
readme.md
View File

@@ -1,78 +1,119 @@
# @push.rocks/smarts3 🚀 # @push.rocks/smartstorage
A high-performance, S3-compatible local server powered by a **Rust core** with a clean TypeScript API. Drop-in replacement for AWS S3 during development and testing — no cloud, no Docker, no MinIO. Just `npm install` and go. A high-performance, S3-compatible storage server powered by a **Rust core** with a clean TypeScript API. Runs standalone for dev/test — or scales out as a **distributed, erasure-coded cluster** with QUIC-based inter-node communication. No cloud, no Docker. Just `npm install` and go. 🚀
## Issue Reporting and Security ## Issue Reporting and Security
For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly. For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly.
## 🌟 Why smarts3? ## Why smartstorage?
| Feature | smarts3 | MinIO | s3rver | | Feature | smartstorage | MinIO | s3rver |
|---------|---------|-------|--------| |---------|-------------|-------|--------|
| Install | `pnpm add` | Docker / binary | `npm install` | | Install | `pnpm add` | Docker / binary | `npm install` |
| Startup time | ~20ms | seconds | ~200ms | | Startup time | ~20ms | seconds | ~200ms |
| Large file uploads | Streaming, zero-copy | ✅ | ❌ OOM risk | | Large file uploads | Streaming, zero-copy | Yes | OOM risk |
| Range requests | Seek-based | ✅ | ❌ Full read | | Range requests | Seek-based | Yes | Full read |
| Language | Rust + TypeScript | Go | JavaScript | | Language | Rust + TypeScript | Go | JavaScript |
| Multipart uploads | ✅ Full support | | | | Multipart uploads | ✅ Full support | Yes | No |
| Auth | AWS v2/v4 key extraction | Full IAM | Basic | | Auth | AWS SigV4 (full verification) | Full IAM | Basic |
| Bucket policies | IAM-style evaluation | Yes | No |
| Clustering | ✅ Erasure-coded, QUIC | Yes | No |
| Multi-drive awareness | ✅ Per-drive health | Yes | No |
### Core Features ### Core Features
- **Rust-powered HTTP server** — hyper 1.x with streaming I/O, zero-copy, backpressure - 🦀 **Rust-powered HTTP server** — hyper 1.x with streaming I/O, zero-copy, backpressure
- 🔄 **Full S3 API compatibility** — works with AWS SDK v3, SmartBucket, any S3 client - 📦 **Full S3-compatible API** — works with AWS SDK v3, SmartBucket, any S3 client
- 📂 **Filesystem-backed storage** — buckets map to directories, objects to files - 💾 **Filesystem-backed storage** — buckets map to directories, objects to files
- 📤 **Streaming multipart uploads** — large files without memory pressure - 📤 **Streaming multipart uploads** — large files without memory pressure
- 🎯 **Byte-range requests**`seek()` directly to the requested byte offset - 📐 **Byte-range requests**`seek()` directly to the requested byte offset
- 🔐 **Authentication**AWS v2/v4 signature key extraction - 🔐 **AWS SigV4 authentication**full signature verification with constant-time comparison
- 📋 **Bucket policies** — IAM-style JSON policies with Allow/Deny evaluation and wildcard matching
- 🌐 **CORS middleware** — configurable cross-origin support - 🌐 **CORS middleware** — configurable cross-origin support
- 📊 **Structured logging** — tracing-based, error through debug levels
- 🧹 **Clean slate mode** — wipe storage on startup for test isolation - 🧹 **Clean slate mode** — wipe storage on startup for test isolation
- 🧪 **Test-first design** — start/stop in milliseconds, no port conflicts - **Test-first design** — start/stop in milliseconds, no port conflicts
## 📦 Installation ### Clustering Features
- 🔗 **Erasure coding** — Reed-Solomon (configurable k data + m parity shards) for storage efficiency and fault tolerance
- 🚄 **QUIC transport** — multiplexed, encrypted inter-node communication via `quinn` with zero head-of-line blocking
- 💽 **Multi-drive awareness** — each node manages multiple independent storage paths with health monitoring
- 🤝 **Cluster membership** — static seed config + runtime join, heartbeat-based failure detection
- ✍️ **Quorum writes** — data is only acknowledged after k+1 shards are persisted
- 📖 **Quorum reads** — reconstruct from any k available shards, local-first fast path
- 🩹 **Self-healing** — background scanner detects and reconstructs missing/corrupt shards
## Installation
```bash ```bash
pnpm add @push.rocks/smarts3 -D pnpm add @push.rocks/smartstorage -D
``` ```
> **Note:** The package ships with precompiled Rust binaries for `linux_amd64` and `linux_arm64`. No Rust toolchain needed on your machine. > **Note:** The package ships with precompiled Rust binaries for `linux_amd64` and `linux_arm64`. No Rust toolchain needed on your machine.
## 🚀 Quick Start ## Quick Start
### Standalone Mode (Dev & Test)
```typescript ```typescript
import { Smarts3 } from '@push.rocks/smarts3'; import { SmartStorage } from '@push.rocks/smartstorage';
// Start a local S3 server // Start a local S3-compatible storage server
const s3 = await Smarts3.createAndStart({ const storage = await SmartStorage.createAndStart({
server: { port: 3000 }, server: { port: 3000 },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
}); });
// Create a bucket // Create a bucket
await s3.createBucket('my-bucket'); await storage.createBucket('my-bucket');
// Get connection details for any S3 client // Get connection details for any S3 client
const descriptor = await s3.getS3Descriptor(); const descriptor = await storage.getStorageDescriptor();
// → { endpoint: 'localhost', port: 3000, accessKey: 'S3RVER', accessSecret: 'S3RVER', useSsl: false } // → { endpoint: 'localhost', port: 3000, accessKey: 'STORAGE', accessSecret: 'STORAGE', useSsl: false }
// When done // When done
await s3.stop(); await storage.stop();
``` ```
## 📖 Configuration ### Cluster Mode (Distributed)
```typescript
import { SmartStorage } from '@push.rocks/smartstorage';
const storage = await SmartStorage.createAndStart({
server: { port: 3000 },
cluster: {
enabled: true,
nodeId: 'node-1',
quicPort: 4000,
seedNodes: ['192.168.1.11:4000', '192.168.1.12:4000'],
erasure: {
dataShards: 4, // k: minimum shards to reconstruct data
parityShards: 2, // m: fault tolerance (can lose up to m shards)
},
drives: {
paths: ['/mnt/disk1', '/mnt/disk2', '/mnt/disk3'],
},
},
});
```
Objects are automatically split into chunks (default 4 MB), erasure-coded into 6 shards (4 data + 2 parity), and distributed across drives/nodes. Any 4 of 6 shards can reconstruct the original data.
## Configuration
All config fields are optional — sensible defaults are applied automatically. All config fields are optional — sensible defaults are applied automatically.
```typescript ```typescript
import { Smarts3, ISmarts3Config } from '@push.rocks/smarts3'; import { SmartStorage, ISmartStorageConfig } from '@push.rocks/smartstorage';
const config: ISmarts3Config = { const config: ISmartStorageConfig = {
server: { server: {
port: 3000, // Default: 3000 port: 3000, // Default: 3000
address: '0.0.0.0', // Default: '0.0.0.0' address: '0.0.0.0', // Default: '0.0.0.0'
silent: false, // Default: false silent: false, // Default: false
region: 'us-east-1', // Default: 'us-east-1' — used for SigV4 signing
}, },
storage: { storage: {
directory: './my-data', // Default: .nogit/bucketsDir directory: './my-data', // Default: .nogit/bucketsDir
@@ -108,16 +149,32 @@ const config: ISmarts3Config = {
expirationDays: 7, expirationDays: 7,
cleanupIntervalMinutes: 60, cleanupIntervalMinutes: 60,
}, },
cluster: { // Optional — omit for standalone mode
enabled: true,
nodeId: 'node-1', // Auto-generated UUID if omitted
quicPort: 4000, // Default: 4000
seedNodes: [], // Addresses of existing cluster members
erasure: {
dataShards: 4, // Default: 4
parityShards: 2, // Default: 2
chunkSizeBytes: 4194304, // Default: 4 MB
},
drives: {
paths: ['/mnt/disk1', '/mnt/disk2'],
},
heartbeatIntervalMs: 5000, // Default: 5000
heartbeatTimeoutMs: 30000, // Default: 30000
},
}; };
const s3 = await Smarts3.createAndStart(config); const storage = await SmartStorage.createAndStart(config);
``` ```
### Common Configurations ### Common Configurations
**CI/CD testing** — silent, clean, fast: **CI/CD testing** — silent, clean, fast:
```typescript ```typescript
const s3 = await Smarts3.createAndStart({ const storage = await SmartStorage.createAndStart({
server: { port: 9999, silent: true }, server: { port: 9999, silent: true },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
}); });
@@ -125,7 +182,7 @@ const s3 = await Smarts3.createAndStart({
**Auth enabled:** **Auth enabled:**
```typescript ```typescript
const s3 = await Smarts3.createAndStart({ const storage = await SmartStorage.createAndStart({
auth: { auth: {
enabled: true, enabled: true,
credentials: [{ accessKeyId: 'test', secretAccessKey: 'test123' }], credentials: [{ accessKeyId: 'test', secretAccessKey: 'test123' }],
@@ -135,7 +192,7 @@ const s3 = await Smarts3.createAndStart({
**CORS for local web dev:** **CORS for local web dev:**
```typescript ```typescript
const s3 = await Smarts3.createAndStart({ const storage = await SmartStorage.createAndStart({
cors: { cors: {
enabled: true, enabled: true,
allowedOrigins: ['http://localhost:5173'], allowedOrigins: ['http://localhost:5173'],
@@ -144,12 +201,12 @@ const s3 = await Smarts3.createAndStart({
}); });
``` ```
## 📤 Usage with AWS SDK v3 ## Usage with AWS SDK v3
```typescript ```typescript
import { S3Client, PutObjectCommand, GetObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3'; import { S3Client, PutObjectCommand, GetObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3';
const descriptor = await s3.getS3Descriptor(); const descriptor = await storage.getStorageDescriptor();
const client = new S3Client({ const client = new S3Client({
endpoint: `http://${descriptor.endpoint}:${descriptor.port}`, endpoint: `http://${descriptor.endpoint}:${descriptor.port}`,
@@ -158,14 +215,14 @@ const client = new S3Client({
accessKeyId: descriptor.accessKey, accessKeyId: descriptor.accessKey,
secretAccessKey: descriptor.accessSecret, secretAccessKey: descriptor.accessSecret,
}, },
forcePathStyle: true, // Required for path-style S3 forcePathStyle: true, // Required for path-style access
}); });
// Upload // Upload
await client.send(new PutObjectCommand({ await client.send(new PutObjectCommand({
Bucket: 'my-bucket', Bucket: 'my-bucket',
Key: 'hello.txt', Key: 'hello.txt',
Body: 'Hello, S3!', Body: 'Hello, Storage!',
ContentType: 'text/plain', ContentType: 'text/plain',
})); }));
@@ -174,7 +231,7 @@ const { Body } = await client.send(new GetObjectCommand({
Bucket: 'my-bucket', Bucket: 'my-bucket',
Key: 'hello.txt', Key: 'hello.txt',
})); }));
const content = await Body.transformToString(); // "Hello, S3!" const content = await Body.transformToString(); // "Hello, Storage!"
// Delete // Delete
await client.send(new DeleteObjectCommand({ await client.send(new DeleteObjectCommand({
@@ -183,12 +240,12 @@ await client.send(new DeleteObjectCommand({
})); }));
``` ```
## 🪣 Usage with SmartBucket ## Usage with SmartBucket
```typescript ```typescript
import { SmartBucket } from '@push.rocks/smartbucket'; import { SmartBucket } from '@push.rocks/smartbucket';
const smartbucket = new SmartBucket(await s3.getS3Descriptor()); const smartbucket = new SmartBucket(await storage.getStorageDescriptor());
const bucket = await smartbucket.createBucket('my-bucket'); const bucket = await smartbucket.createBucket('my-bucket');
const dir = await bucket.getBaseDirectory(); const dir = await bucket.getBaseDirectory();
@@ -202,9 +259,9 @@ const content = await dir.fastGet('docs/readme.txt');
const files = await dir.listFiles(); const files = await dir.listFiles();
``` ```
## 📤 Multipart Uploads ## Multipart Uploads
For files larger than 5 MB, use multipart uploads. smarts3 handles them with **streaming I/O** — parts are written directly to disk, never buffered in memory. For files larger than 5 MB, use multipart uploads. smartstorage handles them with **streaming I/O** — parts are written directly to disk, never buffered in memory. In cluster mode, each part is independently erasure-coded and distributed.
```typescript ```typescript
import { import {
@@ -241,38 +298,161 @@ await client.send(new CompleteMultipartUploadCommand({
})); }));
``` ```
## 🧪 Testing Integration ## Bucket Policies
smartstorage supports AWS-style bucket policies for fine-grained access control. Policies use the same IAM JSON format as real S3 — so you can develop and test your policy logic locally before deploying.
When `auth.enabled` is `true`, the auth pipeline works as follows:
1. **Authenticate** — verify the AWS SigV4 signature (anonymous requests skip this step)
2. **Authorize** — evaluate bucket policies against the request action, resource, and caller identity
3. **Default** — authenticated users get full access; anonymous requests are denied unless a policy explicitly allows them
### Setting a Bucket Policy
```typescript ```typescript
import { Smarts3 } from '@push.rocks/smarts3'; import { PutBucketPolicyCommand } from '@aws-sdk/client-s3';
// Allow anonymous read access to all objects in a bucket
await client.send(new PutBucketPolicyCommand({
Bucket: 'public-assets',
Policy: JSON.stringify({
Version: '2012-10-17',
Statement: [{
Sid: 'PublicRead',
Effect: 'Allow',
Principal: '*',
Action: ['s3:GetObject'],
Resource: ['arn:aws:s3:::public-assets/*'],
}],
}),
}));
```
### Policy Features
- **Effect**: `Allow` and `Deny` (explicit Deny always wins)
- **Principal**: `"*"` (everyone) or `{ "AWS": ["arn:..."] }` for specific identities
- **Action**: IAM-style actions like `s3:GetObject`, `s3:PutObject`, `s3:*`, or prefix wildcards like `s3:Get*`
- **Resource**: ARN patterns with `*` and `?` wildcards (e.g. `arn:aws:s3:::my-bucket/*`)
- **Persistence**: Policies survive server restarts — stored as JSON on disk alongside your data
### Policy CRUD Operations
| Operation | AWS SDK Command | HTTP |
|-----------|----------------|------|
| Get policy | `GetBucketPolicyCommand` | `GET /{bucket}?policy` |
| Set policy | `PutBucketPolicyCommand` | `PUT /{bucket}?policy` |
| Delete policy | `DeleteBucketPolicyCommand` | `DELETE /{bucket}?policy` |
Deleting a bucket automatically removes its associated policy.
## Clustering Deep Dive 🔗
smartstorage can run as a distributed storage cluster where multiple nodes cooperate to store and retrieve data with built-in redundancy.
### How It Works
```
Client ──HTTP PUT──▶ Node A (coordinator)
├─ Split object into 4 MB chunks
├─ Erasure-code each chunk (4 data + 2 parity = 6 shards)
├──QUIC──▶ Node B (shard writes)
├──QUIC──▶ Node C (shard writes)
└─ Local disk (shard writes)
```
1. **Any node can coordinate** — the client connects to any cluster member
2. **Objects are chunked** — large objects split into fixed-size pieces (default 4 MB)
3. **Each chunk is erasure-coded** — Reed-Solomon produces k data + m parity shards
4. **Shards are distributed** — placed across different nodes and drives for fault isolation
5. **Quorum guarantees consistency** — writes need k+1 acks, reads need k shards
### Erasure Coding
With the default `4+2` configuration:
- Storage overhead: **33%** (vs. 200% for 3x replication)
- Fault tolerance: **any 2 drives/nodes can fail** simultaneously
- Read efficiency: only **4 of 6 shards** needed to reconstruct data
| Config | Total Shards | Overhead | Tolerance | Min Nodes |
|--------|-------------|----------|-----------|-----------|
| 4+2 | 6 | 33% | 2 failures | 3 |
| 6+3 | 9 | 50% | 3 failures | 5 |
| 2+1 | 3 | 50% | 1 failure | 2 |
### QUIC Transport
Inter-node communication uses [QUIC](https://en.wikipedia.org/wiki/QUIC) via the `quinn` library:
- 🔒 **Built-in TLS** — self-signed certs auto-generated at cluster init
- 🔀 **Multiplexed streams** — concurrent shard transfers without head-of-line blocking
-**Connection pooling** — persistent connections to peer nodes
- 🌊 **Natural backpressure** — QUIC flow control prevents overloading slow peers
### Cluster Membership
- **Static seed nodes** — initial cluster defined in config
- **Runtime join** — new nodes can join a running cluster
- **Heartbeat monitoring** — every 5s (configurable), with suspect/offline detection
- **Split-brain prevention** — nodes only mark peers offline when they have majority
### Self-Healing
A background scanner periodically (default: every 24h):
1. Checks shard checksums (CRC32C) for bit-rot detection
2. Identifies shards on offline nodes
3. Reconstructs missing shards from remaining data using Reed-Solomon
4. Places healed shards on healthy drives
Healing runs at low priority to avoid impacting foreground I/O.
### Erasure Set Formation
Drives are organized into fixed **erasure sets** at cluster initialization:
```
3 nodes × 4 drives each = 12 drives total
With 6-shard erasure sets → 2 erasure sets
Set 0: Node1-Disk0, Node2-Disk0, Node3-Disk0, Node1-Disk1, Node2-Disk1, Node3-Disk1
Set 1: Node1-Disk2, Node2-Disk2, Node3-Disk2, Node1-Disk3, Node2-Disk3, Node3-Disk3
```
Drives are interleaved across nodes for maximum fault isolation. New nodes form new erasure sets — existing data is never rebalanced.
## Testing Integration
```typescript
import { SmartStorage } from '@push.rocks/smartstorage';
import { tap, expect } from '@git.zone/tstest/tapbundle'; import { tap, expect } from '@git.zone/tstest/tapbundle';
let s3: Smarts3; let storage: SmartStorage;
tap.test('setup', async () => { tap.test('setup', async () => {
s3 = await Smarts3.createAndStart({ storage = await SmartStorage.createAndStart({
server: { port: 4567, silent: true }, server: { port: 4567, silent: true },
storage: { cleanSlate: true }, storage: { cleanSlate: true },
}); });
}); });
tap.test('should store and retrieve objects', async () => { tap.test('should store and retrieve objects', async () => {
await s3.createBucket('test'); await storage.createBucket('test');
// ... your test logic using AWS SDK or SmartBucket // ... your test logic using AWS SDK or SmartBucket
}); });
tap.test('teardown', async () => { tap.test('teardown', async () => {
await s3.stop(); await storage.stop();
}); });
export default tap.start(); export default tap.start();
``` ```
## 🔧 API Reference ## API Reference
### `Smarts3` Class ### `SmartStorage` Class
#### `static createAndStart(config?: ISmarts3Config): Promise<Smarts3>` #### `static createAndStart(config?: ISmartStorageConfig): Promise<SmartStorage>`
Create and start a server in one call. Create and start a server in one call.
@@ -286,11 +466,11 @@ Gracefully stop the server and kill the Rust process.
#### `createBucket(name: string): Promise<{ name: string }>` #### `createBucket(name: string): Promise<{ name: string }>`
Create an S3 bucket. Create a storage bucket.
#### `getS3Descriptor(options?): Promise<IS3Descriptor>` #### `getStorageDescriptor(options?): Promise<IS3Descriptor>`
Get connection details for S3 clients. Returns: Get connection details for S3-compatible clients. Returns:
| Field | Type | Description | | Field | Type | Description |
|-------|------|-------------| |-------|------|-------------|
@@ -300,35 +480,42 @@ Get connection details for S3 clients. Returns:
| `accessSecret` | `string` | Secret key from first configured credential | | `accessSecret` | `string` | Secret key from first configured credential |
| `useSsl` | `boolean` | Always `false` (plain HTTP) | | `useSsl` | `boolean` | Always `false` (plain HTTP) |
## 🏗️ Architecture ## Architecture
smarts3 uses a **hybrid Rust + TypeScript** architecture: smartstorage uses a **hybrid Rust + TypeScript** architecture:
``` ```
┌─────────────────────────────────┐ ┌──────────────────────────────────────────────
│ Your Code (AWS SDK, etc.) │ │ Your Code (AWS SDK, SmartBucket, etc.)
│ ↕ HTTP (localhost:3000) │ │ ↕ HTTP (localhost:3000)
├─────────────────────────────────┤ ├──────────────────────────────────────────────
│ rusts3 binary (Rust) │ │ ruststorage binary (Rust)
│ ├─ hyper 1.x HTTP server │ │ ├─ hyper 1.x HTTP server
│ ├─ S3 path-style routing │ │ ├─ S3 path-style routing
│ ├─ Streaming storage layer │ ├─ StorageBackend (Standalone or Clustered)
├─ Multipart manager │ ├─ FileStore (single-node mode)
├─ CORS / Auth middleware │ └─ DistributedStore (cluster mode)
└─ S3 XML response builder │ ├─ ErasureCoder (Reed-Solomon)
├─────────────────────────────────┤ │ │ ├─ ShardStore (per-drive storage) │
TypeScript (thin IPC wrapper) │ ├─ QuicTransport (quinn)
├─ Smarts3 class │ ├─ ClusterState & Membership
├─ RustBridge (stdin/stdout) │ └─ HealingService
Config & S3 descriptor SigV4 auth + policy engine
└─────────────────────────────────┘ │ ├─ CORS middleware │
│ └─ S3 XML response builder │
├──────────────────────────────────────────────┤
│ TypeScript (thin IPC wrapper) │
│ ├─ SmartStorage class │
│ ├─ RustBridge (stdin/stdout JSON IPC) │
│ └─ Config & S3 descriptor │
└──────────────────────────────────────────────┘
``` ```
**Why Rust?** The TypeScript implementation had critical perf issues: OOM on multipart uploads (parts buffered in memory), double stream copying, file descriptor leaks on HEAD requests, full-file reads for range requests, and no backpressure. The Rust binary solves all of these with streaming I/O, zero-copy, and direct `seek()` for range requests. **Why Rust?** The original TypeScript implementation had critical perf issues: OOM on multipart uploads (parts buffered in memory), double stream copying, file descriptor leaks on HEAD requests, full-file reads for range requests, and no backpressure. The Rust binary solves all of these with streaming I/O, zero-copy, and direct `seek()` for range requests.
**IPC Protocol:** TypeScript spawns the `rusts3` binary with `--management` and communicates via newline-delimited JSON over stdin/stdout. Commands: `start`, `stop`, `createBucket`. **IPC Protocol:** TypeScript spawns the `ruststorage` binary with `--management` and communicates via newline-delimited JSON over stdin/stdout. Commands: `start`, `stop`, `createBucket`, `clusterStatus`.
### S3 Operations Supported ### S3-Compatible Operations
| Operation | Method | Path | | Operation | Method | Path |
|-----------|--------|------| |-----------|--------|------|
@@ -347,26 +534,45 @@ smarts3 uses a **hybrid Rust + TypeScript** architecture:
| CompleteMultipartUpload | `POST /{bucket}/{key}?uploadId` | | | CompleteMultipartUpload | `POST /{bucket}/{key}?uploadId` | |
| AbortMultipartUpload | `DELETE /{bucket}/{key}?uploadId` | | | AbortMultipartUpload | `DELETE /{bucket}/{key}?uploadId` | |
| ListMultipartUploads | `GET /{bucket}?uploads` | | | ListMultipartUploads | `GET /{bucket}?uploads` | |
| GetBucketPolicy | `GET /{bucket}?policy` | |
| PutBucketPolicy | `PUT /{bucket}?policy` | |
| DeleteBucketPolicy | `DELETE /{bucket}?policy` | |
### On-Disk Format ### On-Disk Format
**Standalone mode:**
``` ```
{storage.directory}/ {storage.directory}/
{bucket}/ {bucket}/
{key}._S3_object # Object data {key}._storage_object # Object data
{key}._S3_object.metadata.json # Metadata (content-type, x-amz-meta-*, etc.) {key}._storage_object.metadata.json # Metadata (content-type, x-amz-meta-*, etc.)
{key}._S3_object.md5 # Cached MD5 hash {key}._storage_object.md5 # Cached MD5 hash
.multipart/ .multipart/
{upload-id}/ {upload-id}/
metadata.json # Upload metadata (bucket, key, parts) metadata.json # Upload metadata
part-1 # Part data files part-1, part-2, ... # Part data files
part-2 .policies/
... {bucket}.policy.json # Bucket policy (IAM JSON format)
``` ```
## 🔗 Related Packages **Cluster mode:**
```
{drive_path}/.smartstorage/
format.json # Drive metadata (cluster ID, erasure set)
data/{bucket}/{key_hash}/{key}/
chunk-{N}/shard-{M}.dat # Erasure-coded shard data
chunk-{N}/shard-{M}.meta # Shard metadata (checksum, size)
- [`@push.rocks/smartbucket`](https://code.foss.global/push.rocks/smartbucket) — High-level S3 abstraction layer {storage.directory}/
.manifests/{bucket}/
{key}.manifest.json # Object manifest (shard placements, checksums)
.buckets/{bucket}/ # Bucket metadata
.policies/{bucket}.policy.json # Bucket policies
```
## Related Packages
- [`@push.rocks/smartbucket`](https://code.foss.global/push.rocks/smartbucket) — High-level S3-compatible abstraction layer
- [`@push.rocks/smartrust`](https://code.foss.global/push.rocks/smartrust) — TypeScript ↔ Rust IPC bridge - [`@push.rocks/smartrust`](https://code.foss.global/push.rocks/smartrust) — TypeScript ↔ Rust IPC bridge
- [`@git.zone/tsrust`](https://code.foss.global/git.zone/tsrust) — Rust cross-compilation for npm packages - [`@git.zone/tsrust`](https://code.foss.global/git.zone/tsrust) — Rust cross-compilation for npm packages

1024
rust/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +1,10 @@
[package] [package]
name = "rusts3" name = "ruststorage"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
[[bin]] [[bin]]
name = "rusts3" name = "ruststorage"
path = "src/main.rs" path = "src/main.rs"
[dependencies] [dependencies]
@@ -28,3 +28,16 @@ percent-encoding = "2"
url = "2" url = "2"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
futures-core = "0.3" futures-core = "0.3"
futures = "0.3"
async-trait = "0.1"
reed-solomon-erasure = { version = "6", features = ["simd-accel"] }
xxhash-rust = { version = "0.8", features = ["xxh64"] }
crc32c = "0.6"
bincode = "1"
quinn = "0.11"
rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
rcgen = "0.13"
dashmap = "6"
hmac = "0.12"
sha2 = "0.10"
hex = "0.4"

172
rust/src/action.rs Normal file
View File

@@ -0,0 +1,172 @@
use hyper::body::Incoming;
use hyper::{Method, Request};
use std::collections::HashMap;
/// Storage actions that map to IAM permission strings.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StorageAction {
ListAllMyBuckets,
CreateBucket,
DeleteBucket,
HeadBucket,
ListBucket,
GetObject,
HeadObject,
PutObject,
DeleteObject,
CopyObject,
ListBucketMultipartUploads,
AbortMultipartUpload,
InitiateMultipartUpload,
UploadPart,
CompleteMultipartUpload,
GetBucketPolicy,
PutBucketPolicy,
DeleteBucketPolicy,
}
impl StorageAction {
/// Return the IAM-style action string (e.g. "s3:GetObject").
pub fn iam_action(&self) -> &'static str {
match self {
StorageAction::ListAllMyBuckets => "s3:ListAllMyBuckets",
StorageAction::CreateBucket => "s3:CreateBucket",
StorageAction::DeleteBucket => "s3:DeleteBucket",
StorageAction::HeadBucket => "s3:ListBucket",
StorageAction::ListBucket => "s3:ListBucket",
StorageAction::GetObject => "s3:GetObject",
StorageAction::HeadObject => "s3:GetObject",
StorageAction::PutObject => "s3:PutObject",
StorageAction::DeleteObject => "s3:DeleteObject",
StorageAction::CopyObject => "s3:PutObject",
StorageAction::ListBucketMultipartUploads => "s3:ListBucketMultipartUploads",
StorageAction::AbortMultipartUpload => "s3:AbortMultipartUpload",
StorageAction::InitiateMultipartUpload => "s3:PutObject",
StorageAction::UploadPart => "s3:PutObject",
StorageAction::CompleteMultipartUpload => "s3:PutObject",
StorageAction::GetBucketPolicy => "s3:GetBucketPolicy",
StorageAction::PutBucketPolicy => "s3:PutBucketPolicy",
StorageAction::DeleteBucketPolicy => "s3:DeleteBucketPolicy",
}
}
}
/// Context extracted from a request, used for policy evaluation.
#[derive(Debug, Clone)]
pub struct RequestContext {
pub action: StorageAction,
pub bucket: Option<String>,
pub key: Option<String>,
}
impl RequestContext {
/// Build the ARN for this request's resource.
pub fn resource_arn(&self) -> String {
match (&self.bucket, &self.key) {
(Some(bucket), Some(key)) => format!("arn:aws:s3:::{}/{}", bucket, key),
(Some(bucket), None) => format!("arn:aws:s3:::{}", bucket),
_ => "arn:aws:s3:::*".to_string(),
}
}
}
/// Resolve the storage action from an incoming HTTP request.
pub fn resolve_action(req: &Request<Incoming>) -> RequestContext {
let method = req.method().clone();
let path = req.uri().path().to_string();
let query_string = req.uri().query().unwrap_or("").to_string();
let query = parse_query_simple(&query_string);
let segments: Vec<&str> = path
.trim_start_matches('/')
.splitn(2, '/')
.filter(|s| !s.is_empty())
.collect();
match segments.len() {
0 => {
// Root: GET / -> ListBuckets
RequestContext {
action: StorageAction::ListAllMyBuckets,
bucket: None,
key: None,
}
}
1 => {
let bucket = percent_decode(segments[0]);
let has_policy = query.contains_key("policy");
let has_uploads = query.contains_key("uploads");
let action = match (&method, has_policy, has_uploads) {
(&Method::GET, true, _) => StorageAction::GetBucketPolicy,
(&Method::PUT, true, _) => StorageAction::PutBucketPolicy,
(&Method::DELETE, true, _) => StorageAction::DeleteBucketPolicy,
(&Method::GET, _, true) => StorageAction::ListBucketMultipartUploads,
(&Method::GET, _, _) => StorageAction::ListBucket,
(&Method::PUT, _, _) => StorageAction::CreateBucket,
(&Method::DELETE, _, _) => StorageAction::DeleteBucket,
(&Method::HEAD, _, _) => StorageAction::HeadBucket,
_ => StorageAction::ListBucket,
};
RequestContext {
action,
bucket: Some(bucket),
key: None,
}
}
2 => {
let bucket = percent_decode(segments[0]);
let key = percent_decode(segments[1]);
let has_copy_source = req.headers().contains_key("x-amz-copy-source");
let has_part_number = query.contains_key("partNumber");
let has_upload_id = query.contains_key("uploadId");
let has_uploads = query.contains_key("uploads");
let action = match &method {
&Method::PUT if has_part_number && has_upload_id => StorageAction::UploadPart,
&Method::PUT if has_copy_source => StorageAction::CopyObject,
&Method::PUT => StorageAction::PutObject,
&Method::GET => StorageAction::GetObject,
&Method::HEAD => StorageAction::HeadObject,
&Method::DELETE if has_upload_id => StorageAction::AbortMultipartUpload,
&Method::DELETE => StorageAction::DeleteObject,
&Method::POST if has_uploads => StorageAction::InitiateMultipartUpload,
&Method::POST if has_upload_id => StorageAction::CompleteMultipartUpload,
_ => StorageAction::GetObject,
};
RequestContext {
action,
bucket: Some(bucket),
key: Some(key),
}
}
_ => RequestContext {
action: StorageAction::ListAllMyBuckets,
bucket: None,
key: None,
},
}
}
fn parse_query_simple(query_string: &str) -> HashMap<String, String> {
let mut map = HashMap::new();
if query_string.is_empty() {
return map;
}
for pair in query_string.split('&') {
let mut parts = pair.splitn(2, '=');
let key = parts.next().unwrap_or("");
let value = parts.next().unwrap_or("");
map.insert(key.to_string(), value.to_string());
}
map
}
fn percent_decode(s: &str) -> String {
percent_encoding::percent_decode_str(s)
.decode_utf8_lossy()
.to_string()
}

310
rust/src/auth.rs Normal file
View File

@@ -0,0 +1,310 @@
use hmac::{Hmac, Mac};
use hyper::body::Incoming;
use hyper::Request;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use crate::config::{Credential, SmartStorageConfig};
use crate::error::StorageError;
type HmacSha256 = Hmac<Sha256>;
/// The identity of an authenticated caller.
#[derive(Debug, Clone)]
pub struct AuthenticatedIdentity {
pub access_key_id: String,
}
/// Parsed components of an AWS4-HMAC-SHA256 Authorization header.
struct SigV4Header {
access_key_id: String,
date_stamp: String,
region: String,
signed_headers: Vec<String>,
signature: String,
}
/// Verify the request's SigV4 signature. Returns the caller identity on success.
pub fn verify_request(
req: &Request<Incoming>,
config: &SmartStorageConfig,
) -> Result<AuthenticatedIdentity, StorageError> {
let auth_header = req
.headers()
.get("authorization")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
// Reject SigV2
if auth_header.starts_with("AWS ") {
return Err(StorageError::authorization_header_malformed());
}
if !auth_header.starts_with("AWS4-HMAC-SHA256") {
return Err(StorageError::authorization_header_malformed());
}
let parsed = parse_auth_header(auth_header)?;
// Look up credential
let credential = find_credential(&parsed.access_key_id, config)
.ok_or_else(StorageError::invalid_access_key_id)?;
// Get x-amz-date
let amz_date = req
.headers()
.get("x-amz-date")
.and_then(|v| v.to_str().ok())
.or_else(|| {
req.headers()
.get("date")
.and_then(|v| v.to_str().ok())
})
.ok_or_else(|| StorageError::missing_security_header("Missing x-amz-date header"))?;
// Enforce 15-min clock skew
check_clock_skew(amz_date)?;
// Get payload hash
let content_sha256 = req
.headers()
.get("x-amz-content-sha256")
.and_then(|v| v.to_str().ok())
.unwrap_or("UNSIGNED-PAYLOAD");
// Build canonical request
let canonical_request = build_canonical_request(req, &parsed.signed_headers, content_sha256);
// Build string to sign
let scope = format!(
"{}/{}/s3/aws4_request",
parsed.date_stamp, parsed.region
);
let canonical_hash = hex::encode(Sha256::digest(canonical_request.as_bytes()));
let string_to_sign = format!(
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
amz_date, scope, canonical_hash
);
// Derive signing key
let signing_key = derive_signing_key(
&credential.secret_access_key,
&parsed.date_stamp,
&parsed.region,
);
// Compute signature
let computed = hmac_sha256(&signing_key, string_to_sign.as_bytes());
let computed_hex = hex::encode(&computed);
// Constant-time comparison
if !constant_time_eq(computed_hex.as_bytes(), parsed.signature.as_bytes()) {
return Err(StorageError::signature_does_not_match());
}
Ok(AuthenticatedIdentity {
access_key_id: parsed.access_key_id,
})
}
/// Parse the Authorization header into its components.
fn parse_auth_header(header: &str) -> Result<SigV4Header, StorageError> {
// Format: AWS4-HMAC-SHA256 Credential=KEY/YYYYMMDD/region/s3/aws4_request, SignedHeaders=h1;h2, Signature=hex
let after_algo = header
.strip_prefix("AWS4-HMAC-SHA256")
.ok_or_else(StorageError::authorization_header_malformed)?
.trim();
let mut credential_str = None;
let mut signed_headers_str = None;
let mut signature_str = None;
for part in after_algo.split(',') {
let part = part.trim();
if let Some(val) = part.strip_prefix("Credential=") {
credential_str = Some(val.trim());
} else if let Some(val) = part.strip_prefix("SignedHeaders=") {
signed_headers_str = Some(val.trim());
} else if let Some(val) = part.strip_prefix("Signature=") {
signature_str = Some(val.trim());
}
}
let credential_str = credential_str
.ok_or_else(StorageError::authorization_header_malformed)?;
let signed_headers_str = signed_headers_str
.ok_or_else(StorageError::authorization_header_malformed)?;
let signature = signature_str
.ok_or_else(StorageError::authorization_header_malformed)?
.to_string();
// Parse credential: KEY/YYYYMMDD/region/s3/aws4_request
let cred_parts: Vec<&str> = credential_str.splitn(5, '/').collect();
if cred_parts.len() < 5 {
return Err(StorageError::authorization_header_malformed());
}
let access_key_id = cred_parts[0].to_string();
let date_stamp = cred_parts[1].to_string();
let region = cred_parts[2].to_string();
let signed_headers: Vec<String> = signed_headers_str
.split(';')
.map(|s| s.trim().to_lowercase())
.collect();
Ok(SigV4Header {
access_key_id,
date_stamp,
region,
signed_headers,
signature,
})
}
/// Find a credential by access key ID.
fn find_credential<'a>(access_key_id: &str, config: &'a SmartStorageConfig) -> Option<&'a Credential> {
config
.auth
.credentials
.iter()
.find(|c| c.access_key_id == access_key_id)
}
/// Check clock skew (15 minutes max).
fn check_clock_skew(amz_date: &str) -> Result<(), StorageError> {
// Parse ISO 8601 basic format: YYYYMMDDTHHMMSSZ
let parsed = chrono::NaiveDateTime::parse_from_str(amz_date, "%Y%m%dT%H%M%SZ")
.map_err(|_| StorageError::authorization_header_malformed())?;
let request_time = chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(parsed, chrono::Utc);
let now = chrono::Utc::now();
let diff = (now - request_time).num_seconds().unsigned_abs();
if diff > 15 * 60 {
return Err(StorageError::request_time_too_skewed());
}
Ok(())
}
/// Build the canonical request string.
fn build_canonical_request(
req: &Request<Incoming>,
signed_headers: &[String],
payload_hash: &str,
) -> String {
let method = req.method().as_str();
let uri_path = req.uri().path();
// Canonical URI: the path, already percent-encoded by the client
let canonical_uri = if uri_path.is_empty() { "/" } else { uri_path };
// Canonical query string: sorted key=value pairs
let canonical_query = build_canonical_query(req.uri().query().unwrap_or(""));
// Canonical headers: sorted by lowercase header name
let canonical_headers = build_canonical_headers(req, signed_headers);
// Signed headers string
let signed_headers_str = signed_headers.join(";");
// Payload hash — accept UNSIGNED-PAYLOAD and STREAMING-AWS4-HMAC-SHA256-PAYLOAD as-is
let effective_payload_hash = if payload_hash == "UNSIGNED-PAYLOAD"
|| payload_hash == "STREAMING-AWS4-HMAC-SHA256-PAYLOAD"
{
payload_hash.to_string()
} else {
payload_hash.to_string()
};
format!(
"{}\n{}\n{}\n{}\n{}\n{}",
method,
canonical_uri,
canonical_query,
canonical_headers,
signed_headers_str,
effective_payload_hash
)
}
/// Build canonical query string (sorted key=value pairs).
fn build_canonical_query(query: &str) -> String {
if query.is_empty() {
return String::new();
}
let mut pairs: Vec<(String, String)> = Vec::new();
for pair in query.split('&') {
let mut parts = pair.splitn(2, '=');
let key = parts.next().unwrap_or("");
let value = parts.next().unwrap_or("");
pairs.push((key.to_string(), value.to_string()));
}
pairs.sort();
pairs
.iter()
.map(|(k, v)| format!("{}={}", k, v))
.collect::<Vec<_>>()
.join("&")
}
/// Build canonical headers string.
fn build_canonical_headers(req: &Request<Incoming>, signed_headers: &[String]) -> String {
let mut header_map: HashMap<String, Vec<String>> = HashMap::new();
for (name, value) in req.headers() {
let name_lower = name.as_str().to_lowercase();
if signed_headers.contains(&name_lower) {
if let Ok(val) = value.to_str() {
header_map
.entry(name_lower)
.or_default()
.push(val.trim().to_string());
}
}
}
let mut result = String::new();
for header_name in signed_headers {
let values = header_map
.get(header_name)
.map(|v| v.join(","))
.unwrap_or_default();
result.push_str(header_name);
result.push(':');
result.push_str(&values);
result.push('\n');
}
result
}
/// Derive the signing key via 4-step HMAC chain.
fn derive_signing_key(secret_key: &str, date_stamp: &str, region: &str) -> Vec<u8> {
let k_secret = format!("AWS4{}", secret_key);
let k_date = hmac_sha256(k_secret.as_bytes(), date_stamp.as_bytes());
let k_region = hmac_sha256(&k_date, region.as_bytes());
let k_service = hmac_sha256(&k_region, b"s3");
hmac_sha256(&k_service, b"aws4_request")
}
/// Compute HMAC-SHA256.
fn hmac_sha256(key: &[u8], data: &[u8]) -> Vec<u8> {
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
mac.update(data);
mac.finalize().into_bytes().to_vec()
}
/// Constant-time byte comparison.
fn constant_time_eq(a: &[u8], b: &[u8]) -> bool {
if a.len() != b.len() {
return false;
}
let mut diff = 0u8;
for (x, y) in a.iter().zip(b.iter()) {
diff |= x ^ y;
}
diff == 0
}

View File

@@ -0,0 +1,95 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ClusterConfig {
pub enabled: bool,
#[serde(default)]
pub node_id: Option<String>,
#[serde(default = "default_quic_port")]
pub quic_port: u16,
#[serde(default)]
pub seed_nodes: Vec<String>,
#[serde(default)]
pub erasure: ErasureConfig,
#[serde(default)]
pub drives: DriveConfig,
#[serde(default = "default_heartbeat_interval")]
pub heartbeat_interval_ms: u64,
#[serde(default = "default_heartbeat_timeout")]
pub heartbeat_timeout_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ErasureConfig {
#[serde(default = "default_data_shards")]
pub data_shards: usize,
#[serde(default = "default_parity_shards")]
pub parity_shards: usize,
#[serde(default = "default_chunk_size")]
pub chunk_size_bytes: usize,
}
impl ErasureConfig {
pub fn total_shards(&self) -> usize {
self.data_shards + self.parity_shards
}
/// Minimum shards needed for a write to succeed (data_shards + 1)
pub fn write_quorum(&self) -> usize {
self.data_shards + 1
}
/// Minimum shards needed to reconstruct data
pub fn read_quorum(&self) -> usize {
self.data_shards
}
}
impl Default for ErasureConfig {
fn default() -> Self {
Self {
data_shards: default_data_shards(),
parity_shards: default_parity_shards(),
chunk_size_bytes: default_chunk_size(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DriveConfig {
#[serde(default)]
pub paths: Vec<String>,
}
impl Default for DriveConfig {
fn default() -> Self {
Self { paths: Vec::new() }
}
}
fn default_quic_port() -> u16 {
4000
}
fn default_heartbeat_interval() -> u64 {
5000
}
fn default_heartbeat_timeout() -> u64 {
30000
}
fn default_data_shards() -> usize {
4
}
fn default_parity_shards() -> usize {
2
}
fn default_chunk_size() -> usize {
4 * 1024 * 1024 // 4 MB
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,242 @@
use anyhow::Result;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use tokio::fs;
use super::config::DriveConfig;
// ============================
// Drive format (on-disk metadata)
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DriveFormat {
pub cluster_id: String,
pub erasure_set_id: u32,
pub drive_index_in_set: u32,
pub format_version: u32,
}
// ============================
// Drive state tracking
// ============================
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DriveStatus {
Online,
Degraded,
Offline,
Healing,
}
#[derive(Debug, Clone)]
pub struct DriveStats {
pub total_bytes: u64,
pub used_bytes: u64,
pub avg_write_latency_us: u64,
pub avg_read_latency_us: u64,
pub error_count: u64,
pub last_error: Option<String>,
pub last_check: DateTime<Utc>,
}
impl Default for DriveStats {
fn default() -> Self {
Self {
total_bytes: 0,
used_bytes: 0,
avg_write_latency_us: 0,
avg_read_latency_us: 0,
error_count: 0,
last_error: None,
last_check: Utc::now(),
}
}
}
#[derive(Debug)]
pub struct DriveState {
pub path: PathBuf,
pub format: Option<DriveFormat>,
pub status: DriveStatus,
pub stats: DriveStats,
}
// ============================
// Drive manager
// ============================
pub struct DriveManager {
drives: Vec<DriveState>,
}
impl DriveManager {
/// Initialize drive manager with configured drive paths.
pub async fn new(config: &DriveConfig) -> Result<Self> {
let mut drives = Vec::with_capacity(config.paths.len());
for path_str in &config.paths {
let path = PathBuf::from(path_str);
let storage_dir = path.join(".smartstorage");
// Ensure the drive directory exists
fs::create_dir_all(&storage_dir).await?;
// Try to read existing format
let format = Self::read_format(&storage_dir).await;
let status = if path.exists() {
DriveStatus::Online
} else {
DriveStatus::Offline
};
drives.push(DriveState {
path,
format,
status,
stats: DriveStats::default(),
});
}
Ok(Self { drives })
}
/// Format drives for a new cluster. Stamps each drive with cluster and erasure set info.
pub async fn format_drives(
&mut self,
cluster_id: &str,
erasure_set_assignments: &[(u32, u32)], // (erasure_set_id, drive_index_in_set)
) -> Result<()> {
if erasure_set_assignments.len() != self.drives.len() {
anyhow::bail!(
"Erasure set assignments count ({}) doesn't match drive count ({})",
erasure_set_assignments.len(),
self.drives.len()
);
}
for (drive, (set_id, drive_idx)) in
self.drives.iter_mut().zip(erasure_set_assignments.iter())
{
let format = DriveFormat {
cluster_id: cluster_id.to_string(),
erasure_set_id: *set_id,
drive_index_in_set: *drive_idx,
format_version: 1,
};
let storage_dir = drive.path.join(".smartstorage");
fs::create_dir_all(&storage_dir).await?;
let format_path = storage_dir.join("format.json");
let json = serde_json::to_string_pretty(&format)?;
fs::write(&format_path, json).await?;
drive.format = Some(format);
}
Ok(())
}
/// Get the number of drives managed.
pub fn drive_count(&self) -> usize {
self.drives.len()
}
/// Get a drive's state by index.
pub fn drive(&self, index: usize) -> Option<&DriveState> {
self.drives.get(index)
}
/// Get all drives.
pub fn drives(&self) -> &[DriveState] {
&self.drives
}
/// Get drives that are online.
pub fn online_drives(&self) -> Vec<usize> {
self.drives
.iter()
.enumerate()
.filter(|(_, d)| d.status == DriveStatus::Online)
.map(|(i, _)| i)
.collect()
}
/// Check health of a specific drive by writing and reading a probe file.
pub async fn check_drive_health(&mut self, index: usize) -> Result<DriveStatus> {
let drive = self
.drives
.get_mut(index)
.ok_or_else(|| anyhow::anyhow!("Drive index {} out of range", index))?;
let probe_path = drive.path.join(".smartstorage").join(".health_probe");
let start = std::time::Instant::now();
// Write probe
match fs::write(&probe_path, b"health_check").await {
Ok(()) => {}
Err(e) => {
drive.stats.error_count += 1;
drive.stats.last_error = Some(e.to_string());
drive.status = DriveStatus::Offline;
drive.stats.last_check = Utc::now();
return Ok(DriveStatus::Offline);
}
}
// Read probe
match fs::read(&probe_path).await {
Ok(_) => {}
Err(e) => {
drive.stats.error_count += 1;
drive.stats.last_error = Some(e.to_string());
drive.status = DriveStatus::Offline;
drive.stats.last_check = Utc::now();
return Ok(DriveStatus::Offline);
}
}
// Clean up probe
let _ = fs::remove_file(&probe_path).await;
let latency = start.elapsed();
drive.stats.avg_write_latency_us = latency.as_micros() as u64;
drive.stats.last_check = Utc::now();
// Mark degraded if latency is too high (>5 seconds)
if latency.as_secs() > 5 {
drive.status = DriveStatus::Degraded;
} else {
drive.status = DriveStatus::Online;
}
Ok(drive.status.clone())
}
/// Run health checks on all drives.
pub async fn check_all_drives(&mut self) -> Vec<(usize, DriveStatus)> {
let mut results = Vec::new();
let count = self.drives.len();
for i in 0..count {
match self.check_drive_health(i).await {
Ok(status) => results.push((i, status)),
Err(e) => {
tracing::error!(drive = i, error = %e, "Drive health check failed");
results.push((i, DriveStatus::Offline));
}
}
}
results
}
// Internal helpers
async fn read_format(storage_dir: &Path) -> Option<DriveFormat> {
let format_path = storage_dir.join("format.json");
let content = fs::read_to_string(&format_path).await.ok()?;
serde_json::from_str(&content).ok()
}
}

246
rust/src/cluster/erasure.rs Normal file
View File

@@ -0,0 +1,246 @@
use anyhow::Result;
use reed_solomon_erasure::galois_8::ReedSolomon;
use super::config::ErasureConfig;
/// Erasure coder that splits data into data+parity shards using Reed-Solomon.
///
/// Objects are processed in fixed-size chunks (stripes). Each chunk is independently
/// erasure-coded, enabling streaming encode/decode without buffering entire objects.
pub struct ErasureCoder {
rs: ReedSolomon,
config: ErasureConfig,
}
impl ErasureCoder {
pub fn new(config: &ErasureConfig) -> Result<Self> {
let rs = ReedSolomon::new(config.data_shards, config.parity_shards)
.map_err(|e| anyhow::anyhow!("Failed to create Reed-Solomon encoder: {:?}", e))?;
Ok(Self {
rs,
config: config.clone(),
})
}
pub fn config(&self) -> &ErasureConfig {
&self.config
}
/// Encode a single chunk of data into data+parity shards.
///
/// The input data is split into `data_shards` equal-size pieces (padded if needed),
/// then `parity_shards` parity pieces are computed.
///
/// Returns a Vec of length `data_shards + parity_shards`, where:
/// - indices 0..data_shards are data shards
/// - indices data_shards..total are parity shards
pub fn encode_chunk(&self, data: &[u8]) -> Result<Vec<Vec<u8>>> {
let k = self.config.data_shards;
let m = self.config.parity_shards;
// Compute shard size: each data shard holds ceil(data_len / k) bytes
let shard_size = (data.len() + k - 1) / k;
if shard_size == 0 {
anyhow::bail!("Cannot encode empty data");
}
// Pad input to fill exactly k shards
let mut padded = data.to_vec();
padded.resize(shard_size * k, 0);
// Split into k data shards
let mut shards: Vec<Vec<u8>> = padded.chunks(shard_size).map(|c| c.to_vec()).collect();
// Add m empty parity shards
for _ in 0..m {
shards.push(vec![0u8; shard_size]);
}
// Compute parity in-place
self.rs
.encode(&mut shards)
.map_err(|e| anyhow::anyhow!("Reed-Solomon encoding failed: {:?}", e))?;
Ok(shards)
}
/// Decode (reconstruct) original data from a partial set of shards.
///
/// `shards` must have length == total_shards (data + parity).
/// At least `data_shards` entries must be `Some`. Missing shards are `None`.
/// `original_size` is the original data size before padding, used to truncate.
///
/// Returns the reconstructed original data.
pub fn decode_chunk(
&self,
shards: &mut Vec<Option<Vec<u8>>>,
original_size: usize,
) -> Result<Vec<u8>> {
let k = self.config.data_shards;
let total = self.config.total_shards();
if shards.len() != total {
anyhow::bail!(
"Expected {} shards, got {}",
total,
shards.len()
);
}
let available = shards.iter().filter(|s| s.is_some()).count();
if available < k {
anyhow::bail!(
"Need at least {} shards for reconstruction, only {} available",
k,
available
);
}
// Reconstruct missing shards
self.rs
.reconstruct(shards)
.map_err(|e| anyhow::anyhow!("Reed-Solomon reconstruction failed: {:?}", e))?;
// Concatenate data shards (first k) and truncate to original size
let mut result = Vec::with_capacity(original_size);
for i in 0..k {
if let Some(ref shard) = shards[i] {
result.extend_from_slice(shard);
} else {
anyhow::bail!("Data shard {} missing after reconstruction", i);
}
}
result.truncate(original_size);
Ok(result)
}
/// Verify that all shards are consistent (no corruption).
pub fn verify(&self, shards: &[Vec<u8>]) -> Result<bool> {
let shard_refs: Vec<&[u8]> = shards.iter().map(|s| s.as_slice()).collect();
self.rs
.verify(&shard_refs)
.map_err(|e| anyhow::anyhow!("Reed-Solomon verification failed: {:?}", e))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_config() -> ErasureConfig {
ErasureConfig {
data_shards: 4,
parity_shards: 2,
chunk_size_bytes: 4 * 1024 * 1024,
}
}
#[test]
fn test_encode_decode_roundtrip() {
let coder = ErasureCoder::new(&test_config()).unwrap();
let original = b"Hello, erasure coding! This is a test of the Reed-Solomon implementation.";
let shards = coder.encode_chunk(original).unwrap();
assert_eq!(shards.len(), 6); // 4 data + 2 parity
// All shards should be the same size
let shard_size = shards[0].len();
for s in &shards {
assert_eq!(s.len(), shard_size);
}
// Reconstruct with all shards present
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
assert_eq!(&recovered, original);
}
#[test]
fn test_decode_with_missing_shards() {
let coder = ErasureCoder::new(&test_config()).unwrap();
let original = b"Testing reconstruction with missing shards - this should work with 4 of 6.";
let shards = coder.encode_chunk(original).unwrap();
// Remove 2 shards (the maximum we can tolerate with 2 parity)
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
shard_opts[1] = None; // Remove data shard 1
shard_opts[4] = None; // Remove parity shard 0
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
assert_eq!(&recovered, original);
}
#[test]
fn test_decode_with_too_many_missing() {
let coder = ErasureCoder::new(&test_config()).unwrap();
let original = b"This should fail with 3 missing shards.";
let shards = coder.encode_chunk(original).unwrap();
// Remove 3 shards (more than parity count of 2)
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
shard_opts[0] = None;
shard_opts[2] = None;
shard_opts[5] = None;
let result = coder.decode_chunk(&mut shard_opts, original.len());
assert!(result.is_err());
}
#[test]
fn test_encode_large_data() {
let coder = ErasureCoder::new(&test_config()).unwrap();
// 1 MB of data
let original: Vec<u8> = (0..1_000_000).map(|i| (i % 256) as u8).collect();
let shards = coder.encode_chunk(&original).unwrap();
assert_eq!(shards.len(), 6);
// Each shard should be ~250KB (1MB / 4 data shards, rounded up)
let expected_shard_size = (original.len() + 3) / 4;
assert_eq!(shards[0].len(), expected_shard_size);
// Verify roundtrip
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
assert_eq!(recovered, original);
}
#[test]
fn test_verify_shards() {
let coder = ErasureCoder::new(&test_config()).unwrap();
let original = b"Verify test data";
let shards = coder.encode_chunk(original).unwrap();
assert!(coder.verify(&shards).unwrap());
// Corrupt a shard
let mut corrupted = shards.clone();
corrupted[0][0] ^= 0xFF;
assert!(!coder.verify(&corrupted).unwrap());
}
#[test]
fn test_small_config() {
// Minimum viable: 2 data + 1 parity
let config = ErasureConfig {
data_shards: 2,
parity_shards: 1,
chunk_size_bytes: 1024,
};
let coder = ErasureCoder::new(&config).unwrap();
let original = b"Small config test";
let shards = coder.encode_chunk(original).unwrap();
assert_eq!(shards.len(), 3);
// Remove 1 shard
let mut shard_opts: Vec<Option<Vec<u8>>> = shards.iter().map(|s| Some(s.clone())).collect();
shard_opts[0] = None;
let recovered = coder.decode_chunk(&mut shard_opts, original.len()).unwrap();
assert_eq!(&recovered, original);
}
}

344
rust/src/cluster/healing.rs Normal file
View File

@@ -0,0 +1,344 @@
use anyhow::Result;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use tokio::fs;
use super::config::ErasureConfig;
use super::erasure::ErasureCoder;
use super::metadata::ObjectManifest;
use super::shard_store::{ShardId, ShardStore};
use super::state::ClusterState;
/// Background healing service that scans for under-replicated shards
/// and reconstructs them.
pub struct HealingService {
state: Arc<ClusterState>,
erasure_coder: ErasureCoder,
local_shard_stores: Vec<Arc<ShardStore>>,
manifest_dir: PathBuf,
scan_interval: Duration,
}
impl HealingService {
pub fn new(
state: Arc<ClusterState>,
erasure_config: &ErasureConfig,
local_shard_stores: Vec<Arc<ShardStore>>,
manifest_dir: PathBuf,
scan_interval_hours: u64,
) -> Result<Self> {
Ok(Self {
state,
erasure_coder: ErasureCoder::new(erasure_config)?,
local_shard_stores,
manifest_dir,
scan_interval: Duration::from_secs(scan_interval_hours * 3600),
})
}
/// Run the healing loop as a background task.
pub async fn run(&self, mut shutdown: tokio::sync::watch::Receiver<bool>) {
let mut interval = tokio::time::interval(self.scan_interval);
// Skip the first immediate tick
interval.tick().await;
loop {
tokio::select! {
_ = interval.tick() => {
tracing::info!("Starting healing scan");
match self.heal_scan().await {
Ok(stats) => {
tracing::info!(
checked = stats.shards_checked,
healed = stats.shards_healed,
errors = stats.errors,
"Healing scan completed"
);
}
Err(e) => {
tracing::error!("Healing scan failed: {}", e);
}
}
}
_ = shutdown.changed() => {
tracing::info!("Healing service shutting down");
break;
}
}
}
}
/// Scan all manifests for shards on offline nodes, reconstruct and re-place them.
async fn heal_scan(&self) -> Result<HealStats> {
let mut stats = HealStats::default();
let offline_nodes = self.state.offline_nodes().await;
if offline_nodes.is_empty() {
tracing::debug!("No offline nodes, skipping heal scan");
return Ok(stats);
}
// Check that we have majority before healing (split-brain prevention)
if !self.state.has_majority().await {
tracing::warn!("No majority quorum, skipping heal to prevent split-brain");
return Ok(stats);
}
tracing::info!(
"Found {} offline nodes, scanning for affected shards",
offline_nodes.len()
);
// Iterate all bucket directories under manifest_dir
let mut bucket_entries = match fs::read_dir(&self.manifest_dir).await {
Ok(e) => e,
Err(_) => return Ok(stats),
};
while let Some(bucket_entry) = bucket_entries.next_entry().await? {
if !bucket_entry.metadata().await?.is_dir() {
continue;
}
let bucket_name = bucket_entry.file_name().to_string_lossy().to_string();
if bucket_name.starts_with('.') {
continue;
}
// Scan manifests in this bucket
self.heal_bucket(&bucket_name, &offline_nodes, &mut stats)
.await;
// Yield to avoid starving foreground I/O
tokio::task::yield_now().await;
}
Ok(stats)
}
async fn heal_bucket(
&self,
bucket: &str,
offline_nodes: &[String],
stats: &mut HealStats,
) {
let bucket_dir = self.manifest_dir.join(bucket);
let manifests = match self.collect_manifests(&bucket_dir).await {
Ok(m) => m,
Err(e) => {
tracing::warn!(bucket = bucket, error = %e, "Failed to list manifests");
stats.errors += 1;
return;
}
};
let local_id = self.state.local_node_id().to_string();
for manifest in &manifests {
for chunk in &manifest.chunks {
// Check if any shard in this chunk is on an offline node
let affected: Vec<_> = chunk
.shard_placements
.iter()
.filter(|p| offline_nodes.contains(&p.node_id))
.collect();
if affected.is_empty() {
continue;
}
stats.shards_checked += chunk.shard_placements.len() as u64;
// Try to reconstruct missing shards from available ones
let k = manifest.data_shards;
let total = manifest.data_shards + manifest.parity_shards;
// Count available shards (those NOT on offline nodes)
let available_count = chunk
.shard_placements
.iter()
.filter(|p| !offline_nodes.contains(&p.node_id))
.count();
if available_count < k {
tracing::error!(
bucket = manifest.bucket,
key = manifest.key,
chunk = chunk.chunk_index,
available = available_count,
needed = k,
"Cannot heal chunk: not enough available shards"
);
stats.errors += 1;
continue;
}
// Fetch available shards (only local ones for now)
let mut shards: Vec<Option<Vec<u8>>> = vec![None; total];
let mut fetched = 0usize;
for placement in &chunk.shard_placements {
if offline_nodes.contains(&placement.node_id) {
continue; // Skip offline nodes
}
if fetched >= k {
break;
}
if placement.node_id == local_id {
let shard_id = ShardId {
bucket: manifest.bucket.clone(),
key: manifest.key.clone(),
chunk_index: chunk.chunk_index,
shard_index: placement.shard_index,
};
let store_idx = placement.drive_id.parse::<usize>().unwrap_or(0);
if let Some(store) = self.local_shard_stores.get(store_idx) {
if let Ok((data, _)) = store.read_shard(&shard_id).await {
shards[placement.shard_index as usize] = Some(data);
fetched += 1;
}
}
}
// TODO: fetch from other online remote nodes
}
if fetched < k {
tracing::warn!(
bucket = manifest.bucket,
key = manifest.key,
chunk = chunk.chunk_index,
"Not enough local shards to heal, skipping"
);
continue;
}
// Reconstruct all shards
let reconstructed = match self.erasure_coder.decode_chunk(
&mut shards,
chunk.data_size,
) {
Ok(_) => true,
Err(e) => {
tracing::error!(
bucket = manifest.bucket,
key = manifest.key,
chunk = chunk.chunk_index,
error = %e,
"Reconstruction failed"
);
stats.errors += 1;
false
}
};
if !reconstructed {
continue;
}
// Re-encode to get all shards back (including the missing ones)
let full_data_size = chunk.data_size;
let mut data_buf = Vec::with_capacity(full_data_size);
for i in 0..k {
if let Some(ref shard) = shards[i] {
data_buf.extend_from_slice(shard);
}
}
data_buf.truncate(full_data_size);
let all_shards = match self.erasure_coder.encode_chunk(&data_buf) {
Ok(s) => s,
Err(e) => {
tracing::error!(error = %e, "Re-encoding for heal failed");
stats.errors += 1;
continue;
}
};
// Write the missing shards to the first available local drive
for affected_placement in &affected {
let shard_idx = affected_placement.shard_index as usize;
if shard_idx < all_shards.len() {
let shard_data = &all_shards[shard_idx];
let checksum = crc32c::crc32c(shard_data);
let shard_id = ShardId {
bucket: manifest.bucket.clone(),
key: manifest.key.clone(),
chunk_index: chunk.chunk_index,
shard_index: affected_placement.shard_index,
};
// Place on first available local drive
if let Some(store) = self.local_shard_stores.first() {
match store.write_shard(&shard_id, shard_data, checksum).await {
Ok(()) => {
stats.shards_healed += 1;
tracing::info!(
bucket = manifest.bucket,
key = manifest.key,
chunk = chunk.chunk_index,
shard = affected_placement.shard_index,
"Shard healed successfully"
);
}
Err(e) => {
tracing::error!(error = %e, "Failed to write healed shard");
stats.errors += 1;
}
}
}
}
}
tokio::task::yield_now().await;
}
}
}
/// Collect all manifests under a bucket directory.
async fn collect_manifests(&self, dir: &std::path::Path) -> Result<Vec<ObjectManifest>> {
let mut manifests = Vec::new();
self.collect_manifests_recursive(dir, &mut manifests).await?;
Ok(manifests)
}
fn collect_manifests_recursive<'a>(
&'a self,
dir: &'a std::path::Path,
manifests: &'a mut Vec<ObjectManifest>,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
Box::pin(async move {
let mut entries = match fs::read_dir(dir).await {
Ok(e) => e,
Err(_) => return Ok(()),
};
while let Some(entry) = entries.next_entry().await? {
let meta = entry.metadata().await?;
let name = entry.file_name().to_string_lossy().to_string();
if meta.is_dir() {
self.collect_manifests_recursive(&entry.path(), manifests)
.await?;
} else if name.ends_with(".manifest.json") {
if let Ok(content) = fs::read_to_string(entry.path()).await {
if let Ok(manifest) = serde_json::from_str::<ObjectManifest>(&content) {
manifests.push(manifest);
}
}
}
}
Ok(())
})
}
}
#[derive(Debug, Default)]
pub struct HealStats {
pub shards_checked: u64,
pub shards_healed: u64,
pub errors: u64,
}

View File

@@ -0,0 +1,226 @@
use anyhow::Result;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
use super::drive_manager::{DriveManager, DriveStatus};
use super::protocol::{
ClusterRequest, ClusterResponse, DriveStateInfo, HeartbeatMessage, JoinRequestMessage,
NodeInfo,
};
use super::quic_transport::QuicTransport;
use super::state::ClusterState;
/// Manages cluster membership: heartbeating, joining, failure detection.
pub struct MembershipManager {
state: Arc<ClusterState>,
transport: Arc<QuicTransport>,
heartbeat_interval: Duration,
local_node_info: NodeInfo,
drive_manager: Option<Arc<Mutex<DriveManager>>>,
}
impl MembershipManager {
pub fn new(
state: Arc<ClusterState>,
transport: Arc<QuicTransport>,
heartbeat_interval_ms: u64,
local_node_info: NodeInfo,
) -> Self {
Self {
state,
transport,
heartbeat_interval: Duration::from_millis(heartbeat_interval_ms),
local_node_info,
drive_manager: None,
}
}
/// Set the drive manager for health reporting in heartbeats.
pub fn with_drive_manager(mut self, dm: Arc<Mutex<DriveManager>>) -> Self {
self.drive_manager = Some(dm);
self
}
/// Join the cluster by contacting seed nodes.
/// Sends a JoinRequest to each seed node until one accepts.
pub async fn join_cluster(&self, seed_nodes: &[String]) -> Result<()> {
if seed_nodes.is_empty() {
tracing::info!("No seed nodes configured, starting as initial cluster node");
self.state.add_node(self.local_node_info.clone()).await;
return Ok(());
}
for seed in seed_nodes {
let addr: SocketAddr = match seed.parse() {
Ok(a) => a,
Err(e) => {
tracing::warn!("Invalid seed node address '{}': {}", seed, e);
continue;
}
};
tracing::info!("Attempting to join cluster via seed node {}", seed);
match self.try_join(addr).await {
Ok(()) => {
tracing::info!("Successfully joined cluster via {}", seed);
return Ok(());
}
Err(e) => {
tracing::warn!("Failed to join via {}: {}", seed, e);
}
}
}
// If no seed responded, start as a new cluster
tracing::info!("Could not reach any seed nodes, starting as initial cluster node");
self.state.add_node(self.local_node_info.clone()).await;
Ok(())
}
async fn try_join(&self, addr: SocketAddr) -> Result<()> {
let conn = self
.transport
.get_connection("seed", addr)
.await?;
let request = ClusterRequest::JoinRequest(JoinRequestMessage {
node_info: self.local_node_info.clone(),
});
let response = self.transport.send_request(&conn, &request).await?;
match response {
ClusterResponse::JoinResponse(join_resp) => {
if join_resp.accepted {
if let Some(topology) = &join_resp.topology {
self.state.apply_topology(topology).await;
// Also register self
self.state.add_node(self.local_node_info.clone()).await;
tracing::info!(
"Applied cluster topology (version {}, {} nodes, {} erasure sets)",
topology.version,
topology.nodes.len(),
topology.erasure_sets.len(),
);
}
Ok(())
} else {
anyhow::bail!(
"Join rejected: {}",
join_resp.error.unwrap_or_default()
)
}
}
ClusterResponse::Error(e) => {
anyhow::bail!("Join error: {} - {}", e.code, e.message)
}
_ => anyhow::bail!("Unexpected response to join request"),
}
}
/// Run the heartbeat loop. Sends heartbeats to all peers periodically.
pub async fn heartbeat_loop(self: Arc<Self>, mut shutdown: tokio::sync::watch::Receiver<bool>) {
let mut interval = tokio::time::interval(self.heartbeat_interval);
loop {
tokio::select! {
_ = interval.tick() => {
self.send_heartbeats().await;
}
_ = shutdown.changed() => break,
}
}
}
async fn send_heartbeats(&self) {
let peers = self.state.online_peers().await;
let topology_version = self.state.version().await;
let mut responded = Vec::new();
// Collect drive health states
let drive_states = self.collect_drive_states().await;
for peer in &peers {
let addr: SocketAddr = match peer.quic_addr.parse() {
Ok(a) => a,
Err(_) => continue,
};
let heartbeat = ClusterRequest::Heartbeat(HeartbeatMessage {
node_id: self.local_node_info.node_id.clone(),
timestamp: chrono::Utc::now().to_rfc3339(),
drive_states: drive_states.clone(),
topology_version,
});
match tokio::time::timeout(
Duration::from_secs(5),
self.send_heartbeat_to_peer(&peer.node_id, addr, &heartbeat),
)
.await
{
Ok(Ok(())) => {
responded.push(peer.node_id.clone());
}
Ok(Err(e)) => {
tracing::debug!(
peer = %peer.node_id,
error = %e,
"Heartbeat failed"
);
}
Err(_) => {
tracing::debug!(peer = %peer.node_id, "Heartbeat timed out");
}
}
}
// Update state based on responses
let status_changes = self.state.tick_heartbeats(&responded).await;
for (node_id, status) in &status_changes {
tracing::info!(node = %node_id, status = ?status, "Node status changed");
}
}
async fn send_heartbeat_to_peer(
&self,
node_id: &str,
addr: SocketAddr,
heartbeat: &ClusterRequest,
) -> Result<()> {
let conn = self.transport.get_connection(node_id, addr).await?;
let _response = self.transport.send_request(&conn, heartbeat).await?;
Ok(())
}
/// Collect drive health states from the DriveManager, if available.
async fn collect_drive_states(&self) -> Vec<DriveStateInfo> {
let dm = match &self.drive_manager {
Some(dm) => dm,
None => return Vec::new(),
};
let mut manager = dm.lock().await;
let results = manager.check_all_drives().await;
results
.into_iter()
.map(|(idx, status)| {
let status_str = match status {
DriveStatus::Online => "online",
DriveStatus::Degraded => "degraded",
DriveStatus::Offline => "offline",
DriveStatus::Healing => "healing",
};
DriveStateInfo {
drive_index: idx as u32,
status: status_str.to_string(),
}
})
.collect()
}
}

View File

@@ -0,0 +1,85 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Full manifest describing how an object is stored across erasure-coded shards.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ObjectManifest {
/// Bucket name
pub bucket: String,
/// Object key
pub key: String,
/// Unique version ID for this write
pub version_id: String,
/// Total object size in bytes
pub size: u64,
/// MD5 hex digest of the complete object
pub content_md5: String,
/// Content type
pub content_type: String,
/// User metadata (x-amz-meta-*, content-type, etc.)
pub metadata: HashMap<String, String>,
/// When the object was created
pub created_at: String,
/// Last modified timestamp
pub last_modified: String,
/// Number of data shards used
pub data_shards: usize,
/// Number of parity shards used
pub parity_shards: usize,
/// Chunk size in bytes (last chunk may be smaller)
pub chunk_size: usize,
/// Per-chunk shard placement info
pub chunks: Vec<ChunkManifest>,
}
/// Describes the shards for a single chunk of an object.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ChunkManifest {
/// Index of this chunk (0-based)
pub chunk_index: u32,
/// Actual data size of this chunk (before erasure coding)
pub data_size: usize,
/// Where each shard was placed
pub shard_placements: Vec<ShardPlacement>,
}
/// Describes where a specific shard is stored.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ShardPlacement {
/// Shard index within the erasure set (0..data_shards+parity_shards)
pub shard_index: u32,
/// Node that holds this shard
pub node_id: String,
/// Drive ID on that node
pub drive_id: String,
/// CRC32C checksum of the shard data
pub checksum: u32,
/// Size of the shard data in bytes
pub shard_size: usize,
}
/// Manifest for a multipart upload in progress.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct MultipartUploadManifest {
pub upload_id: String,
pub bucket: String,
pub key: String,
pub initiated: String,
pub metadata: HashMap<String, String>,
/// Per-part manifests, keyed by part number.
pub parts: HashMap<u32, PartManifest>,
}
/// Manifest for a single part of a multipart upload.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct PartManifest {
pub part_number: u32,
pub size: u64,
pub md5: String,
pub chunks: Vec<ChunkManifest>,
}

12
rust/src/cluster/mod.rs Normal file
View File

@@ -0,0 +1,12 @@
pub mod config;
pub mod coordinator;
pub mod drive_manager;
pub mod erasure;
pub mod healing;
pub mod membership;
pub mod metadata;
pub mod placement;
pub mod protocol;
pub mod quic_transport;
pub mod shard_store;
pub mod state;

View File

@@ -0,0 +1,140 @@
use xxhash_rust::xxh64::xxh64;
/// Determines which erasure set an object belongs to, based on consistent hashing.
///
/// Uses xxhash64 of "{bucket}/{key}" to deterministically map objects to erasure sets.
/// This is stateless — any node can independently compute the placement.
pub fn erasure_set_for_object(bucket: &str, key: &str, num_erasure_sets: u32) -> u32 {
if num_erasure_sets == 0 {
return 0;
}
let hash_input = format!("{}/{}", bucket, key);
let hash = xxh64(hash_input.as_bytes(), 0);
(hash % num_erasure_sets as u64) as u32
}
/// Represents a drive location within the cluster topology.
#[derive(Debug, Clone)]
pub struct DriveLocation {
pub node_id: String,
pub drive_index: u32,
}
/// An erasure set: a fixed group of drives that together store one complete
/// set of shards for any object placed on them.
#[derive(Debug, Clone)]
pub struct ErasureSet {
pub set_id: u32,
/// Ordered drives: index = shard_index
pub drives: Vec<DriveLocation>,
}
/// Form erasure sets from the available drives across all nodes.
///
/// Interleaves drives from different nodes for fault isolation:
/// e.g., with 3 nodes x 4 drives and total_shards=6:
/// Set 0: N0-D0, N1-D0, N2-D0, N0-D1, N1-D1, N2-D1
/// Set 1: N0-D2, N1-D2, N2-D2, N0-D3, N1-D3, N2-D3
pub fn form_erasure_sets(
nodes: &[(String, u32)], // (node_id, drive_count)
total_shards: usize,
) -> Vec<ErasureSet> {
// Collect all drives as (node_id, drive_index), interleaved by node
let max_drives = nodes.iter().map(|(_, count)| *count).max().unwrap_or(0) as usize;
let mut all_drives: Vec<DriveLocation> = Vec::new();
for drive_idx in 0..max_drives {
for (node_id, drive_count) in nodes {
if (drive_idx as u32) < *drive_count {
all_drives.push(DriveLocation {
node_id: node_id.clone(),
drive_index: drive_idx as u32,
});
}
}
}
// Form sets of total_shards drives each
let num_sets = all_drives.len() / total_shards;
let mut sets = Vec::with_capacity(num_sets);
for set_idx in 0..num_sets {
let start = set_idx * total_shards;
let end = start + total_shards;
let drives = all_drives[start..end].to_vec();
sets.push(ErasureSet {
set_id: set_idx as u32,
drives,
});
}
sets
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erasure_set_assignment_deterministic() {
let set_a = erasure_set_for_object("mybucket", "mykey", 4);
let set_b = erasure_set_for_object("mybucket", "mykey", 4);
assert_eq!(set_a, set_b);
}
#[test]
fn test_erasure_set_distribution() {
// Check that objects are distributed across sets
let num_sets = 4u32;
let mut counts = vec![0u32; num_sets as usize];
for i in 0..1000 {
let key = format!("key-{}", i);
let set = erasure_set_for_object("bucket", &key, num_sets);
assert!(set < num_sets);
counts[set as usize] += 1;
}
// Each set should have some objects (not all in one set)
for count in &counts {
assert!(*count > 100, "Expected >100, got {}", count);
}
}
#[test]
fn test_form_erasure_sets_3x4() {
// 3 nodes, 4 drives each, 6 shards per set => 2 sets
let nodes = vec![
("node1".to_string(), 4),
("node2".to_string(), 4),
("node3".to_string(), 4),
];
let sets = form_erasure_sets(&nodes, 6);
assert_eq!(sets.len(), 2);
// Set 0 should interleave across nodes
let set0_nodes: Vec<&str> = sets[0].drives.iter().map(|d| d.node_id.as_str()).collect();
assert_eq!(set0_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
// Set 1 should also interleave
let set1_nodes: Vec<&str> = sets[1].drives.iter().map(|d| d.node_id.as_str()).collect();
assert_eq!(set1_nodes, vec!["node1", "node2", "node3", "node1", "node2", "node3"]);
// Drive indices should be different between sets
let set0_drives: Vec<u32> = sets[0].drives.iter().map(|d| d.drive_index).collect();
let set1_drives: Vec<u32> = sets[1].drives.iter().map(|d| d.drive_index).collect();
assert_eq!(set0_drives, vec![0, 0, 0, 1, 1, 1]);
assert_eq!(set1_drives, vec![2, 2, 2, 3, 3, 3]);
}
#[test]
fn test_form_erasure_sets_remainder() {
// 2 nodes, 3 drives each, 4 shards => 1 set (2 drives left over)
let nodes = vec![
("a".to_string(), 3),
("b".to_string(), 3),
];
let sets = form_erasure_sets(&nodes, 4);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].drives.len(), 4);
}
}

View File

@@ -0,0 +1,384 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use super::metadata::ObjectManifest;
/// All inter-node cluster messages, serialized with bincode over QUIC streams.
///
/// Each message type gets its own bidirectional QUIC stream.
/// For shard data transfers, the header is sent first (bincode),
/// then raw shard bytes follow on the same stream.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ClusterRequest {
// ============================
// Shard operations
// ============================
/// Write a shard to a specific drive on the target node.
/// Shard data follows after this header on the same stream.
ShardWrite(ShardWriteRequest),
/// Read a shard from the target node.
ShardRead(ShardReadRequest),
/// Delete a shard from the target node.
ShardDelete(ShardDeleteRequest),
/// Check if a shard exists and get its metadata.
ShardHead(ShardHeadRequest),
// ============================
// Manifest operations
// ============================
/// Store an object manifest on the target node.
ManifestWrite(ManifestWriteRequest),
/// Retrieve an object manifest from the target node.
ManifestRead(ManifestReadRequest),
/// Delete an object manifest from the target node.
ManifestDelete(ManifestDeleteRequest),
/// List all manifests for a bucket on the target node.
ManifestList(ManifestListRequest),
// ============================
// Cluster management
// ============================
/// Periodic heartbeat.
Heartbeat(HeartbeatMessage),
/// Request to join the cluster.
JoinRequest(JoinRequestMessage),
/// Synchronize cluster topology.
TopologySync(TopologySyncMessage),
// ============================
// Healing
// ============================
/// Request a shard to be reconstructed and placed on a target drive.
HealRequest(HealRequestMessage),
}
/// Responses to cluster requests.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ClusterResponse {
// Shard ops
ShardWriteAck(ShardWriteAck),
ShardReadResponse(ShardReadResponse),
ShardDeleteAck(ShardDeleteAck),
ShardHeadResponse(ShardHeadResponse),
// Manifest ops
ManifestWriteAck(ManifestWriteAck),
ManifestReadResponse(ManifestReadResponse),
ManifestDeleteAck(ManifestDeleteAck),
ManifestListResponse(ManifestListResponse),
// Cluster mgmt
HeartbeatAck(HeartbeatAckMessage),
JoinResponse(JoinResponseMessage),
TopologySyncAck(TopologySyncAckMessage),
// Healing
HealResponse(HealResponseMessage),
// Error
Error(ErrorResponse),
}
// ============================
// Shard operation messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardWriteRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
pub shard_data_length: u64,
pub checksum: u32, // crc32c of shard data
pub object_metadata: HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardWriteAck {
pub request_id: String,
pub success: bool,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardReadRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardReadResponse {
pub request_id: String,
pub found: bool,
pub shard_data_length: u64,
pub checksum: u32,
// Shard data follows on the stream after this header
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardDeleteRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardDeleteAck {
pub request_id: String,
pub success: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardHeadRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardHeadResponse {
pub request_id: String,
pub found: bool,
pub data_size: u64,
pub checksum: u32,
}
// ============================
// Manifest operation messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestWriteRequest {
pub request_id: String,
pub manifest: ObjectManifest,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestWriteAck {
pub request_id: String,
pub success: bool,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestReadRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestReadResponse {
pub request_id: String,
pub found: bool,
pub manifest: Option<ObjectManifest>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestDeleteRequest {
pub request_id: String,
pub bucket: String,
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestDeleteAck {
pub request_id: String,
pub success: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestListRequest {
pub request_id: String,
pub bucket: String,
pub prefix: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManifestListResponse {
pub request_id: String,
pub manifests: Vec<ObjectManifest>,
}
// ============================
// Cluster management messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DriveStateInfo {
pub drive_index: u32,
pub status: String, // "online", "degraded", "offline", "healing"
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeartbeatMessage {
pub node_id: String,
pub timestamp: String,
pub drive_states: Vec<DriveStateInfo>,
pub topology_version: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeartbeatAckMessage {
pub node_id: String,
pub timestamp: String,
pub topology_version: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeInfo {
pub node_id: String,
pub quic_addr: String,
pub s3_addr: String,
pub drive_count: u32,
pub status: String,
pub version: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JoinRequestMessage {
pub node_info: NodeInfo,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClusterTopology {
pub version: u64,
pub cluster_id: String,
pub nodes: Vec<NodeInfo>,
pub erasure_sets: Vec<ErasureSetInfo>,
pub data_shards: usize,
pub parity_shards: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ErasureSetInfo {
pub set_id: u32,
pub drives: Vec<DriveLocationInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DriveLocationInfo {
pub node_id: String,
pub drive_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JoinResponseMessage {
pub accepted: bool,
pub topology: Option<ClusterTopology>,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopologySyncMessage {
pub topology: ClusterTopology,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopologySyncAckMessage {
pub accepted: bool,
pub current_version: u64,
}
// ============================
// Healing messages
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealRequestMessage {
pub request_id: String,
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
pub target_node_id: String,
pub target_drive_index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealResponseMessage {
pub request_id: String,
pub success: bool,
pub error: Option<String>,
}
// ============================
// Error response
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ErrorResponse {
pub request_id: String,
pub code: String,
pub message: String,
}
// ============================
// Wire format helpers
// ============================
/// Serialize a request to bincode bytes with a 4-byte length prefix.
pub fn encode_request(req: &ClusterRequest) -> anyhow::Result<Vec<u8>> {
let payload = bincode::serialize(req)?;
let mut buf = Vec::with_capacity(4 + payload.len());
buf.extend_from_slice(&(payload.len() as u32).to_le_bytes());
buf.extend_from_slice(&payload);
Ok(buf)
}
/// Serialize a response to bincode bytes with a 4-byte length prefix.
pub fn encode_response(resp: &ClusterResponse) -> anyhow::Result<Vec<u8>> {
let payload = bincode::serialize(resp)?;
let mut buf = Vec::with_capacity(4 + payload.len());
buf.extend_from_slice(&(payload.len() as u32).to_le_bytes());
buf.extend_from_slice(&payload);
Ok(buf)
}
/// Read a length-prefixed bincode message from raw bytes.
/// Returns (decoded message, bytes consumed).
pub fn decode_request(data: &[u8]) -> anyhow::Result<(ClusterRequest, usize)> {
if data.len() < 4 {
anyhow::bail!("Not enough data for length prefix");
}
let len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
if data.len() < 4 + len {
anyhow::bail!("Not enough data for message body");
}
let msg: ClusterRequest = bincode::deserialize(&data[4..4 + len])?;
Ok((msg, 4 + len))
}
/// Read a length-prefixed bincode response from raw bytes.
pub fn decode_response(data: &[u8]) -> anyhow::Result<(ClusterResponse, usize)> {
if data.len() < 4 {
anyhow::bail!("Not enough data for length prefix");
}
let len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
if data.len() < 4 + len {
anyhow::bail!("Not enough data for message body");
}
let msg: ClusterResponse = bincode::deserialize(&data[4..4 + len])?;
Ok((msg, 4 + len))
}

View File

@@ -0,0 +1,455 @@
use anyhow::Result;
use dashmap::DashMap;
use quinn::{ClientConfig, Endpoint, ServerConfig as QuinnServerConfig};
use rustls::pki_types::{CertificateDer, PrivateKeyDer, PrivatePkcs8KeyDer};
use std::net::SocketAddr;
use std::sync::Arc;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use super::protocol::{
self, ClusterRequest, ClusterResponse, ShardReadResponse, ShardWriteAck, ShardWriteRequest,
};
use super::shard_store::{ShardId, ShardStore};
/// QUIC transport layer for inter-node communication.
///
/// Manages a QUIC endpoint for both sending and receiving cluster messages.
/// Uses self-signed TLS certificates generated at init time.
/// Maintains a connection pool to peer nodes.
pub struct QuicTransport {
endpoint: Endpoint,
/// Cached connections to peer nodes: node_id -> Connection
connections: Arc<DashMap<String, quinn::Connection>>,
local_node_id: String,
}
impl QuicTransport {
/// Create a new QUIC transport, binding to the specified address.
pub async fn new(bind_addr: SocketAddr, local_node_id: String) -> Result<Self> {
let (server_config, client_config) = Self::generate_tls_configs()?;
let endpoint = Endpoint::server(server_config, bind_addr)?;
// Also configure the endpoint for client connections
let mut endpoint_client = endpoint.clone();
endpoint_client.set_default_client_config(client_config);
Ok(Self {
endpoint,
connections: Arc::new(DashMap::new()),
local_node_id,
})
}
/// Get or establish a connection to a peer node.
pub async fn get_connection(
&self,
node_id: &str,
addr: SocketAddr,
) -> Result<quinn::Connection> {
// Check cache first
if let Some(conn) = self.connections.get(node_id) {
if conn.close_reason().is_none() {
return Ok(conn.clone());
}
// Connection is closed, remove from cache
drop(conn);
self.connections.remove(node_id);
}
// Establish new connection
let conn = self
.endpoint
.connect(addr, "smartstorage")?
.await?;
self.connections
.insert(node_id.to_string(), conn.clone());
Ok(conn)
}
/// Send a cluster request and receive the response.
pub async fn send_request(
&self,
conn: &quinn::Connection,
request: &ClusterRequest,
) -> Result<ClusterResponse> {
let (mut send, mut recv) = conn.open_bi().await?;
// Encode and send request
let encoded = protocol::encode_request(request)?;
send.write_all(&encoded).await?;
send.finish()?;
// Read response
let response_data = recv.read_to_end(64 * 1024 * 1024).await?; // 64MB max
let (response, _) = protocol::decode_response(&response_data)?;
Ok(response)
}
/// Send a shard write request with streaming data.
///
/// Sends the request header first, then streams the shard data bytes.
pub async fn send_shard_write(
&self,
conn: &quinn::Connection,
request: ShardWriteRequest,
shard_data: &[u8],
) -> Result<ShardWriteAck> {
let (mut send, mut recv) = conn.open_bi().await?;
// Send request header
let encoded = protocol::encode_request(&ClusterRequest::ShardWrite(request))?;
send.write_all(&encoded).await?;
// Stream shard data
send.write_all(shard_data).await?;
send.finish()?;
// Read ack
let response_data = recv.read_to_end(1024).await?;
let (response, _) = protocol::decode_response(&response_data)?;
match response {
ClusterResponse::ShardWriteAck(ack) => Ok(ack),
ClusterResponse::Error(e) => {
anyhow::bail!("Shard write error: {} - {}", e.code, e.message)
}
other => anyhow::bail!("Unexpected response to shard write: {:?}", other),
}
}
/// Send a shard read request and receive the shard data.
///
/// Returns (shard_data, checksum).
pub async fn send_shard_read(
&self,
conn: &quinn::Connection,
request: &ClusterRequest,
) -> Result<Option<(Vec<u8>, u32)>> {
let (mut send, mut recv) = conn.open_bi().await?;
// Send request
let encoded = protocol::encode_request(request)?;
send.write_all(&encoded).await?;
send.finish()?;
// Read response header
let mut header_len_buf = [0u8; 4];
recv.read_exact(&mut header_len_buf).await?;
let header_len = u32::from_le_bytes(header_len_buf) as usize;
let mut header_buf = vec![0u8; header_len];
recv.read_exact(&mut header_buf).await?;
let response: ClusterResponse = bincode::deserialize(&header_buf)?;
match response {
ClusterResponse::ShardReadResponse(read_resp) => {
if !read_resp.found {
return Ok(None);
}
// Read shard data that follows
let mut shard_data = vec![0u8; read_resp.shard_data_length as usize];
recv.read_exact(&mut shard_data).await?;
Ok(Some((shard_data, read_resp.checksum)))
}
ClusterResponse::Error(e) => {
anyhow::bail!("Shard read error: {} - {}", e.code, e.message)
}
other => anyhow::bail!("Unexpected response to shard read: {:?}", other),
}
}
/// Accept incoming connections and dispatch to the handler.
pub async fn accept_loop(
self: Arc<Self>,
shard_store: Arc<ShardStore>,
mut shutdown: tokio::sync::watch::Receiver<bool>,
) {
loop {
tokio::select! {
incoming = self.endpoint.accept() => {
match incoming {
Some(incoming_conn) => {
let transport = self.clone();
let store = shard_store.clone();
tokio::spawn(async move {
match incoming_conn.await {
Ok(conn) => {
transport.handle_connection(conn, store).await;
}
Err(e) => {
tracing::error!("Failed to accept QUIC connection: {}", e);
}
}
});
}
None => break,
}
}
_ = shutdown.changed() => break,
}
}
}
/// Handle a single QUIC connection (may have multiple streams).
async fn handle_connection(
&self,
conn: quinn::Connection,
shard_store: Arc<ShardStore>,
) {
loop {
match conn.accept_bi().await {
Ok((send, recv)) => {
let store = shard_store.clone();
tokio::spawn(async move {
if let Err(e) = Self::handle_stream(send, recv, store).await {
tracing::error!("Stream handler error: {}", e);
}
});
}
Err(quinn::ConnectionError::ApplicationClosed(_)) => break,
Err(e) => {
tracing::error!("Connection error: {}", e);
break;
}
}
}
}
/// Handle a single bidirectional stream (one request-response exchange).
async fn handle_stream(
mut send: quinn::SendStream,
mut recv: quinn::RecvStream,
shard_store: Arc<ShardStore>,
) -> Result<()> {
// Read the length-prefixed request header
let mut len_buf = [0u8; 4];
recv.read_exact(&mut len_buf).await?;
let msg_len = u32::from_le_bytes(len_buf) as usize;
let mut msg_buf = vec![0u8; msg_len];
recv.read_exact(&mut msg_buf).await?;
let request: ClusterRequest = bincode::deserialize(&msg_buf)?;
match request {
ClusterRequest::ShardWrite(write_req) => {
// Read shard data from the stream
let mut shard_data = vec![0u8; write_req.shard_data_length as usize];
recv.read_exact(&mut shard_data).await?;
let shard_id = ShardId {
bucket: write_req.bucket,
key: write_req.key,
chunk_index: write_req.chunk_index,
shard_index: write_req.shard_index,
};
let result = shard_store
.write_shard(&shard_id, &shard_data, write_req.checksum)
.await;
let ack = ShardWriteAck {
request_id: write_req.request_id,
success: result.is_ok(),
error: result.err().map(|e| e.to_string()),
};
let response = protocol::encode_response(&ClusterResponse::ShardWriteAck(ack))?;
send.write_all(&response).await?;
send.finish()?;
}
ClusterRequest::ShardRead(read_req) => {
let shard_id = ShardId {
bucket: read_req.bucket,
key: read_req.key,
chunk_index: read_req.chunk_index,
shard_index: read_req.shard_index,
};
match shard_store.read_shard(&shard_id).await {
Ok((data, checksum)) => {
let header = ShardReadResponse {
request_id: read_req.request_id,
found: true,
shard_data_length: data.len() as u64,
checksum,
};
// Send header
let header_bytes = bincode::serialize(&ClusterResponse::ShardReadResponse(header))?;
send.write_all(&(header_bytes.len() as u32).to_le_bytes()).await?;
send.write_all(&header_bytes).await?;
// Send shard data
send.write_all(&data).await?;
send.finish()?;
}
Err(_) => {
let header = ShardReadResponse {
request_id: read_req.request_id,
found: false,
shard_data_length: 0,
checksum: 0,
};
let header_bytes = bincode::serialize(&ClusterResponse::ShardReadResponse(header))?;
send.write_all(&(header_bytes.len() as u32).to_le_bytes()).await?;
send.write_all(&header_bytes).await?;
send.finish()?;
}
}
}
ClusterRequest::ShardDelete(del_req) => {
let shard_id = ShardId {
bucket: del_req.bucket,
key: del_req.key,
chunk_index: del_req.chunk_index,
shard_index: del_req.shard_index,
};
let result = shard_store.delete_shard(&shard_id).await;
let ack = protocol::ClusterResponse::ShardDeleteAck(protocol::ShardDeleteAck {
request_id: del_req.request_id,
success: result.is_ok(),
});
let response = protocol::encode_response(&ack)?;
send.write_all(&response).await?;
send.finish()?;
}
ClusterRequest::ShardHead(head_req) => {
let shard_id = ShardId {
bucket: head_req.bucket,
key: head_req.key,
chunk_index: head_req.chunk_index,
shard_index: head_req.shard_index,
};
let resp = match shard_store.head_shard(&shard_id).await {
Ok(Some(meta)) => protocol::ShardHeadResponse {
request_id: head_req.request_id,
found: true,
data_size: meta.data_size,
checksum: meta.checksum,
},
_ => protocol::ShardHeadResponse {
request_id: head_req.request_id,
found: false,
data_size: 0,
checksum: 0,
},
};
let response =
protocol::encode_response(&ClusterResponse::ShardHeadResponse(resp))?;
send.write_all(&response).await?;
send.finish()?;
}
// Heartbeat, Join, TopologySync, Heal, and Manifest operations
// will be handled by the membership and coordinator modules.
// For now, send a generic ack.
_ => {
let response_data = recv.read_to_end(0).await.unwrap_or_default();
drop(response_data);
let err = protocol::ErrorResponse {
request_id: String::new(),
code: "NotImplemented".to_string(),
message: "This cluster operation is not yet implemented".to_string(),
};
let response = protocol::encode_response(&ClusterResponse::Error(err))?;
send.write_all(&response).await?;
send.finish()?;
}
}
Ok(())
}
/// Generate self-signed TLS certificates for cluster-internal communication.
fn generate_tls_configs() -> Result<(QuinnServerConfig, ClientConfig)> {
// Generate self-signed certificate
let cert = rcgen::generate_simple_self_signed(vec!["smartstorage".to_string()])?;
let cert_der = CertificateDer::from(cert.cert);
let key_der = PrivateKeyDer::Pkcs8(PrivatePkcs8KeyDer::from(cert.key_pair.serialize_der()));
// Server config
let mut server_crypto = rustls::ServerConfig::builder()
.with_no_client_auth()
.with_single_cert(vec![cert_der.clone()], key_der.clone_key())?;
server_crypto.alpn_protocols = vec![b"smartstorage".to_vec()];
let server_config = QuinnServerConfig::with_crypto(Arc::new(
quinn::crypto::rustls::QuicServerConfig::try_from(server_crypto)?,
));
// Client config: skip server certificate verification (cluster-internal)
let mut client_crypto = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(SkipServerVerification))
.with_no_client_auth();
client_crypto.alpn_protocols = vec![b"smartstorage".to_vec()];
let client_config = ClientConfig::new(Arc::new(
quinn::crypto::rustls::QuicClientConfig::try_from(client_crypto)?,
));
Ok((server_config, client_config))
}
/// Close the QUIC endpoint gracefully.
pub fn close(&self) {
self.endpoint
.close(quinn::VarInt::from_u32(0), b"shutdown");
}
/// Get the local node ID.
pub fn local_node_id(&self) -> &str {
&self.local_node_id
}
}
/// Certificate verifier that skips verification (for cluster-internal self-signed certs).
#[derive(Debug)]
struct SkipServerVerification;
impl rustls::client::danger::ServerCertVerifier for SkipServerVerification {
fn verify_server_cert(
&self,
_end_entity: &CertificateDer<'_>,
_intermediates: &[CertificateDer<'_>],
_server_name: &rustls::pki_types::ServerName<'_>,
_ocsp_response: &[u8],
_now: rustls::pki_types::UnixTime,
) -> Result<rustls::client::danger::ServerCertVerified, rustls::Error> {
Ok(rustls::client::danger::ServerCertVerified::assertion())
}
fn verify_tls12_signature(
&self,
_message: &[u8],
_cert: &CertificateDer<'_>,
_dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
}
fn verify_tls13_signature(
&self,
_message: &[u8],
_cert: &CertificateDer<'_>,
_dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
}
fn supported_verify_schemes(&self) -> Vec<rustls::SignatureScheme> {
vec![
rustls::SignatureScheme::RSA_PKCS1_SHA256,
rustls::SignatureScheme::RSA_PKCS1_SHA384,
rustls::SignatureScheme::RSA_PKCS1_SHA512,
rustls::SignatureScheme::ECDSA_NISTP256_SHA256,
rustls::SignatureScheme::ECDSA_NISTP384_SHA384,
rustls::SignatureScheme::ED25519,
rustls::SignatureScheme::RSA_PSS_SHA256,
rustls::SignatureScheme::RSA_PSS_SHA384,
rustls::SignatureScheme::RSA_PSS_SHA512,
]
}
}

View File

@@ -0,0 +1,226 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use tokio::fs;
use tokio::io::AsyncWriteExt;
/// Identifies a specific shard on disk.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct ShardId {
pub bucket: String,
pub key: String,
pub chunk_index: u32,
pub shard_index: u32,
}
/// Per-shard metadata stored alongside shard data.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardMeta {
pub shard_index: u32,
pub chunk_index: u32,
pub data_size: u64,
pub checksum: u32, // crc32c
}
/// Manages shard storage on a single drive.
///
/// Layout on disk:
/// ```text
/// {base_path}/.smartstorage/data/{bucket}/{key_prefix}/{key}/
/// chunk-{N}/shard-{M}.dat (shard data)
/// chunk-{N}/shard-{M}.meta (shard metadata JSON)
/// ```
pub struct ShardStore {
base_path: PathBuf,
}
impl ShardStore {
pub fn new(base_path: PathBuf) -> Self {
Self { base_path }
}
/// Write a shard to disk atomically (write to temp file, then rename).
pub async fn write_shard(
&self,
shard_id: &ShardId,
data: &[u8],
checksum: u32,
) -> Result<()> {
let shard_path = self.shard_data_path(shard_id);
let meta_path = self.shard_meta_path(shard_id);
// Ensure parent directory exists
if let Some(parent) = shard_path.parent() {
fs::create_dir_all(parent).await?;
}
// Write data atomically via temp file + rename
let temp_data_path = shard_path.with_extension("dat.tmp");
{
let mut file = fs::File::create(&temp_data_path).await?;
file.write_all(data).await?;
file.flush().await?;
file.sync_all().await?;
}
fs::rename(&temp_data_path, &shard_path).await?;
// Write metadata
let meta = ShardMeta {
shard_index: shard_id.shard_index,
chunk_index: shard_id.chunk_index,
data_size: data.len() as u64,
checksum,
};
let meta_json = serde_json::to_string(&meta)?;
let temp_meta_path = meta_path.with_extension("meta.tmp");
fs::write(&temp_meta_path, meta_json).await?;
fs::rename(&temp_meta_path, &meta_path).await?;
Ok(())
}
/// Read a shard's data from disk.
pub async fn read_shard(&self, shard_id: &ShardId) -> Result<(Vec<u8>, u32)> {
let shard_path = self.shard_data_path(shard_id);
let meta_path = self.shard_meta_path(shard_id);
let data = fs::read(&shard_path).await?;
let meta_json = fs::read_to_string(&meta_path).await?;
let meta: ShardMeta = serde_json::from_str(&meta_json)?;
Ok((data, meta.checksum))
}
/// Check if a shard exists and return its metadata.
pub async fn head_shard(&self, shard_id: &ShardId) -> Result<Option<ShardMeta>> {
let meta_path = self.shard_meta_path(shard_id);
if !meta_path.exists() {
return Ok(None);
}
let meta_json = fs::read_to_string(&meta_path).await?;
let meta: ShardMeta = serde_json::from_str(&meta_json)?;
Ok(Some(meta))
}
/// Delete a shard and its metadata.
pub async fn delete_shard(&self, shard_id: &ShardId) -> Result<()> {
let shard_path = self.shard_data_path(shard_id);
let meta_path = self.shard_meta_path(shard_id);
let _ = fs::remove_file(&shard_path).await;
let _ = fs::remove_file(&meta_path).await;
// Clean up empty parent directories
self.cleanup_empty_dirs(shard_id).await;
Ok(())
}
/// List all shard IDs for a given bucket and key (across all chunks).
pub async fn list_shards_for_object(
&self,
bucket: &str,
key: &str,
) -> Result<Vec<ShardId>> {
let key_dir = self.key_dir(bucket, key);
if !key_dir.exists() {
return Ok(Vec::new());
}
let mut result = Vec::new();
let mut entries = fs::read_dir(&key_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let name = entry.file_name().to_string_lossy().to_string();
if !name.starts_with("chunk-") || !entry.metadata().await?.is_dir() {
continue;
}
let chunk_index: u32 = match name.strip_prefix("chunk-").and_then(|s| s.parse().ok()) {
Some(idx) => idx,
None => continue,
};
let mut chunk_entries = fs::read_dir(entry.path()).await?;
while let Some(shard_entry) = chunk_entries.next_entry().await? {
let shard_name = shard_entry.file_name().to_string_lossy().to_string();
if shard_name.starts_with("shard-") && shard_name.ends_with(".dat") {
let shard_index: u32 = match shard_name
.strip_prefix("shard-")
.and_then(|s| s.strip_suffix(".dat"))
.and_then(|s| s.parse().ok())
{
Some(idx) => idx,
None => continue,
};
result.push(ShardId {
bucket: bucket.to_string(),
key: key.to_string(),
chunk_index,
shard_index,
});
}
}
}
result.sort_by(|a, b| {
a.chunk_index
.cmp(&b.chunk_index)
.then(a.shard_index.cmp(&b.shard_index))
});
Ok(result)
}
// ============================
// Path helpers
// ============================
fn data_root(&self) -> PathBuf {
self.base_path.join(".smartstorage").join("data")
}
fn key_prefix(key: &str) -> String {
// Use first 2 hex chars of a simple hash for directory fan-out
let hash = xxhash_rust::xxh64::xxh64(key.as_bytes(), 0);
format!("{:02x}", hash & 0xFF)
}
fn key_dir(&self, bucket: &str, key: &str) -> PathBuf {
self.data_root()
.join(bucket)
.join(Self::key_prefix(key))
.join(key)
}
fn chunk_dir(&self, shard_id: &ShardId) -> PathBuf {
self.key_dir(&shard_id.bucket, &shard_id.key)
.join(format!("chunk-{}", shard_id.chunk_index))
}
fn shard_data_path(&self, shard_id: &ShardId) -> PathBuf {
self.chunk_dir(shard_id)
.join(format!("shard-{}.dat", shard_id.shard_index))
}
fn shard_meta_path(&self, shard_id: &ShardId) -> PathBuf {
self.chunk_dir(shard_id)
.join(format!("shard-{}.meta", shard_id.shard_index))
}
async fn cleanup_empty_dirs(&self, shard_id: &ShardId) {
// Try to remove chunk dir if empty
let chunk_dir = self.chunk_dir(shard_id);
let _ = fs::remove_dir(&chunk_dir).await; // fails silently if not empty
// Try to remove key dir if empty
let key_dir = self.key_dir(&shard_id.bucket, &shard_id.key);
let _ = fs::remove_dir(&key_dir).await;
// Try to remove prefix dir if empty
if let Some(prefix_dir) = key_dir.parent() {
let _ = fs::remove_dir(prefix_dir).await;
}
}
}

291
rust/src/cluster/state.rs Normal file
View File

@@ -0,0 +1,291 @@
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use super::placement::{DriveLocation, ErasureSet};
use super::protocol::{ClusterTopology, ErasureSetInfo, DriveLocationInfo, NodeInfo};
/// Node status for tracking liveness.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NodeStatus {
Online,
Suspect, // missed 2+ heartbeats
Offline, // missed 5+ heartbeats
}
/// Tracked state for a peer node.
#[derive(Debug, Clone)]
pub struct NodeState {
pub info: NodeInfo,
pub status: NodeStatus,
pub missed_heartbeats: u32,
pub last_heartbeat: chrono::DateTime<chrono::Utc>,
}
/// Shared cluster state, protected by RwLock for concurrent access.
pub struct ClusterState {
inner: Arc<RwLock<ClusterStateInner>>,
local_node_id: String,
}
struct ClusterStateInner {
cluster_id: String,
version: u64,
nodes: HashMap<String, NodeState>,
erasure_sets: Vec<ErasureSet>,
data_shards: usize,
parity_shards: usize,
}
impl ClusterState {
pub fn new(
local_node_id: String,
cluster_id: String,
data_shards: usize,
parity_shards: usize,
) -> Self {
Self {
inner: Arc::new(RwLock::new(ClusterStateInner {
cluster_id,
version: 0,
nodes: HashMap::new(),
erasure_sets: Vec::new(),
data_shards,
parity_shards,
})),
local_node_id,
}
}
pub fn local_node_id(&self) -> &str {
&self.local_node_id
}
/// Register a node in the cluster.
pub async fn add_node(&self, info: NodeInfo) {
let mut inner = self.inner.write().await;
let node_id = info.node_id.clone();
inner.nodes.insert(
node_id,
NodeState {
info,
status: NodeStatus::Online,
missed_heartbeats: 0,
last_heartbeat: chrono::Utc::now(),
},
);
inner.version += 1;
}
/// Remove a node from the cluster.
pub async fn remove_node(&self, node_id: &str) {
let mut inner = self.inner.write().await;
inner.nodes.remove(node_id);
inner.version += 1;
}
/// Update heartbeat for a node (reset missed count).
pub async fn record_heartbeat(&self, node_id: &str) {
let mut inner = self.inner.write().await;
if let Some(node) = inner.nodes.get_mut(node_id) {
node.missed_heartbeats = 0;
node.status = NodeStatus::Online;
node.last_heartbeat = chrono::Utc::now();
}
}
/// Increment missed heartbeat count for all nodes, updating status.
/// Called by the heartbeat checker when a round completes.
pub async fn tick_heartbeats(&self, responded_nodes: &[String]) -> Vec<(String, NodeStatus)> {
let mut inner = self.inner.write().await;
let mut status_changes = Vec::new();
for (node_id, node) in inner.nodes.iter_mut() {
if *node_id == self.local_node_id {
continue; // Don't track self
}
if responded_nodes.contains(node_id) {
node.missed_heartbeats = 0;
if node.status != NodeStatus::Online {
node.status = NodeStatus::Online;
status_changes.push((node_id.clone(), NodeStatus::Online));
}
} else {
node.missed_heartbeats += 1;
let new_status = if node.missed_heartbeats >= 5 {
NodeStatus::Offline
} else if node.missed_heartbeats >= 2 {
NodeStatus::Suspect
} else {
NodeStatus::Online
};
if new_status != node.status {
node.status = new_status.clone();
status_changes.push((node_id.clone(), new_status));
}
}
}
status_changes
}
/// Set erasure sets (typically done once during cluster formation).
pub async fn set_erasure_sets(&self, sets: Vec<ErasureSet>) {
let mut inner = self.inner.write().await;
inner.erasure_sets = sets;
inner.version += 1;
}
/// Get the erasure set for a given object based on consistent hashing.
pub async fn get_erasure_set_for_object(&self, bucket: &str, key: &str) -> Option<ErasureSet> {
let inner = self.inner.read().await;
if inner.erasure_sets.is_empty() {
return None;
}
let set_idx = super::placement::erasure_set_for_object(
bucket,
key,
inner.erasure_sets.len() as u32,
);
inner.erasure_sets.get(set_idx as usize).cloned()
}
/// Get all erasure sets.
pub async fn erasure_sets(&self) -> Vec<ErasureSet> {
self.inner.read().await.erasure_sets.clone()
}
/// Get current topology version.
pub async fn version(&self) -> u64 {
self.inner.read().await.version
}
/// Get all online node IDs (excluding self).
pub async fn online_peers(&self) -> Vec<NodeInfo> {
let inner = self.inner.read().await;
inner
.nodes
.values()
.filter(|n| n.status == NodeStatus::Online && n.info.node_id != self.local_node_id)
.map(|n| n.info.clone())
.collect()
}
/// Get all nodes.
pub async fn all_nodes(&self) -> Vec<NodeState> {
self.inner.read().await.nodes.values().cloned().collect()
}
/// Get node info by ID.
pub async fn get_node(&self, node_id: &str) -> Option<NodeInfo> {
self.inner
.read()
.await
.nodes
.get(node_id)
.map(|n| n.info.clone())
}
/// Get offline node IDs.
pub async fn offline_nodes(&self) -> Vec<String> {
self.inner
.read()
.await
.nodes
.values()
.filter(|n| n.status == NodeStatus::Offline)
.map(|n| n.info.node_id.clone())
.collect()
}
/// Check if a majority of nodes are reachable (for split-brain prevention).
pub async fn has_majority(&self) -> bool {
let inner = self.inner.read().await;
let total = inner.nodes.len();
if total == 0 {
return true;
}
let online = inner
.nodes
.values()
.filter(|n| n.status == NodeStatus::Online)
.count();
online > total / 2
}
/// Export the current topology as a protocol message.
pub async fn to_topology(&self) -> ClusterTopology {
let inner = self.inner.read().await;
ClusterTopology {
version: inner.version,
cluster_id: inner.cluster_id.clone(),
nodes: inner.nodes.values().map(|n| n.info.clone()).collect(),
erasure_sets: inner
.erasure_sets
.iter()
.map(|set| ErasureSetInfo {
set_id: set.set_id,
drives: set
.drives
.iter()
.map(|d| DriveLocationInfo {
node_id: d.node_id.clone(),
drive_index: d.drive_index,
})
.collect(),
})
.collect(),
data_shards: inner.data_shards,
parity_shards: inner.parity_shards,
}
}
/// Import topology from a protocol message (e.g., received from a peer during join).
pub async fn apply_topology(&self, topology: &ClusterTopology) {
let mut inner = self.inner.write().await;
// Only apply if newer
if topology.version <= inner.version {
return;
}
inner.cluster_id = topology.cluster_id.clone();
inner.version = topology.version;
inner.data_shards = topology.data_shards;
inner.parity_shards = topology.parity_shards;
// Update nodes
for node_info in &topology.nodes {
if !inner.nodes.contains_key(&node_info.node_id) {
inner.nodes.insert(
node_info.node_id.clone(),
NodeState {
info: node_info.clone(),
status: NodeStatus::Online,
missed_heartbeats: 0,
last_heartbeat: chrono::Utc::now(),
},
);
}
}
// Update erasure sets
inner.erasure_sets = topology
.erasure_sets
.iter()
.map(|set| ErasureSet {
set_id: set.set_id,
drives: set
.drives
.iter()
.map(|d| DriveLocation {
node_id: d.node_id.clone(),
drive_index: d.drive_index,
})
.collect(),
})
.collect();
}
}

View File

@@ -1,8 +1,10 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::cluster::config::ClusterConfig;
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct S3Config { pub struct SmartStorageConfig {
pub server: ServerConfig, pub server: ServerConfig,
pub storage: StorageConfig, pub storage: StorageConfig,
pub auth: AuthConfig, pub auth: AuthConfig,
@@ -10,6 +12,8 @@ pub struct S3Config {
pub logging: LoggingConfig, pub logging: LoggingConfig,
pub limits: LimitsConfig, pub limits: LimitsConfig,
pub multipart: MultipartConfig, pub multipart: MultipartConfig,
#[serde(default)]
pub cluster: Option<ClusterConfig>,
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -18,6 +22,12 @@ pub struct ServerConfig {
pub port: u16, pub port: u16,
pub address: String, pub address: String,
pub silent: bool, pub silent: bool,
#[serde(default = "default_region")]
pub region: String,
}
fn default_region() -> String {
"us-east-1".to_string()
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -1,16 +1,14 @@
use hyper::{Response, StatusCode}; use hyper::StatusCode;
use http_body_util::Full;
use bytes::Bytes;
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
#[error("S3Error({code}): {message}")] #[error("StorageError({code}): {message}")]
pub struct S3Error { pub struct StorageError {
pub code: String, pub code: String,
pub message: String, pub message: String,
pub status: StatusCode, pub status: StatusCode,
} }
impl S3Error { impl StorageError {
pub fn new(code: &str, message: &str, status: StatusCode) -> Self { pub fn new(code: &str, message: &str, status: StatusCode) -> Self {
Self { Self {
code: code.to_string(), code: code.to_string(),
@@ -51,20 +49,58 @@ impl S3Error {
Self::new("InvalidRequest", msg, StatusCode::BAD_REQUEST) Self::new("InvalidRequest", msg, StatusCode::BAD_REQUEST)
} }
pub fn signature_does_not_match() -> Self {
Self::new(
"SignatureDoesNotMatch",
"The request signature we calculated does not match the signature you provided.",
StatusCode::FORBIDDEN,
)
}
pub fn invalid_access_key_id() -> Self {
Self::new(
"InvalidAccessKeyId",
"The AWS Access Key Id you provided does not exist in our records.",
StatusCode::FORBIDDEN,
)
}
pub fn request_time_too_skewed() -> Self {
Self::new(
"RequestTimeTooSkewed",
"The difference between the request time and the current time is too large.",
StatusCode::FORBIDDEN,
)
}
pub fn authorization_header_malformed() -> Self {
Self::new(
"AuthorizationHeaderMalformed",
"The authorization header is malformed.",
StatusCode::BAD_REQUEST,
)
}
pub fn missing_security_header(msg: &str) -> Self {
Self::new("MissingSecurityHeader", msg, StatusCode::BAD_REQUEST)
}
pub fn no_such_bucket_policy() -> Self {
Self::new(
"NoSuchBucketPolicy",
"The bucket policy does not exist.",
StatusCode::NOT_FOUND,
)
}
pub fn malformed_policy(msg: &str) -> Self {
Self::new("MalformedPolicy", msg, StatusCode::BAD_REQUEST)
}
pub fn to_xml(&self) -> String { pub fn to_xml(&self) -> String {
format!( format!(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Error><Code>{}</Code><Message>{}</Message></Error>", "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Error><Code>{}</Code><Message>{}</Message></Error>",
self.code, self.message self.code, self.message
) )
} }
pub fn to_response(&self, request_id: &str) -> Response<Full<Bytes>> {
let xml = self.to_xml();
Response::builder()
.status(self.status)
.header("content-type", "application/xml")
.header("x-amz-request-id", request_id)
.body(Full::new(Bytes::from(xml)))
.unwrap()
}
} }

View File

@@ -1,6 +1,10 @@
mod action;
mod auth;
mod cluster;
mod config; mod config;
mod management; mod management;
mod s3_error; mod policy;
mod error;
mod server; mod server;
mod storage; mod storage;
mod xml_response; mod xml_response;
@@ -8,7 +12,7 @@ mod xml_response;
use clap::Parser; use clap::Parser;
#[derive(Parser)] #[derive(Parser)]
#[command(name = "rusts3", about = "High-performance S3-compatible server")] #[command(name = "ruststorage", about = "High-performance S3-compatible storage server")]
struct Cli { struct Cli {
/// Run in management mode (IPC via stdin/stdout) /// Run in management mode (IPC via stdin/stdout)
#[arg(long)] #[arg(long)]
@@ -35,7 +39,7 @@ async fn main() -> anyhow::Result<()> {
management::management_loop().await?; management::management_loop().await?;
} else { } else {
eprintln!("rusts3: use --management flag for IPC mode"); eprintln!("ruststorage: use --management flag for IPC mode");
std::process::exit(1); std::process::exit(1);
} }

View File

@@ -4,8 +4,8 @@ use serde_json::Value;
use std::io::Write; use std::io::Write;
use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::io::{AsyncBufReadExt, BufReader};
use crate::config::S3Config; use crate::config::SmartStorageConfig;
use crate::server::S3Server; use crate::server::StorageServer;
#[derive(Deserialize)] #[derive(Deserialize)]
struct IpcRequest { struct IpcRequest {
@@ -62,7 +62,7 @@ pub async fn management_loop() -> Result<()> {
data: serde_json::json!({}), data: serde_json::json!({}),
}); });
let mut server: Option<S3Server> = None; let mut server: Option<StorageServer> = None;
let stdin = BufReader::new(tokio::io::stdin()); let stdin = BufReader::new(tokio::io::stdin());
let mut lines = stdin.lines(); let mut lines = stdin.lines();
@@ -87,11 +87,11 @@ pub async fn management_loop() -> Result<()> {
"start" => { "start" => {
#[derive(Deserialize)] #[derive(Deserialize)]
struct StartParams { struct StartParams {
config: S3Config, config: SmartStorageConfig,
} }
match serde_json::from_value::<StartParams>(req.params) { match serde_json::from_value::<StartParams>(req.params) {
Ok(params) => { Ok(params) => {
match S3Server::start(params.config).await { match StorageServer::start(params.config).await {
Ok(s) => { Ok(s) => {
server = Some(s); server = Some(s);
send_response(id, serde_json::json!({})); send_response(id, serde_json::json!({}));
@@ -140,6 +140,15 @@ pub async fn management_loop() -> Result<()> {
} }
} }
} }
"clusterStatus" => {
send_response(
id,
serde_json::json!({
"status": "ok",
"message": "Cluster status endpoint ready"
}),
);
}
_ => { _ => {
send_error(id, format!("Unknown method: {}", method)); send_error(id, format!("Unknown method: {}", method));
} }

429
rust/src/policy.rs Normal file
View File

@@ -0,0 +1,429 @@
use serde::{Deserialize, Deserializer, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
use tokio::fs;
use tokio::sync::RwLock;
use crate::action::RequestContext;
use crate::auth::AuthenticatedIdentity;
use crate::error::StorageError;
// ============================
// Policy data model
// ============================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BucketPolicy {
#[serde(rename = "Version")]
pub version: String,
#[serde(rename = "Statement")]
pub statements: Vec<PolicyStatement>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PolicyStatement {
#[serde(rename = "Sid", default, skip_serializing_if = "Option::is_none")]
pub sid: Option<String>,
#[serde(rename = "Effect")]
pub effect: PolicyEffect,
#[serde(rename = "Principal", deserialize_with = "deserialize_principal")]
pub principal: Principal,
#[serde(rename = "Action", deserialize_with = "deserialize_string_or_vec")]
pub action: Vec<String>,
#[serde(rename = "Resource", deserialize_with = "deserialize_string_or_vec")]
pub resource: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum PolicyEffect {
Allow,
Deny,
}
#[derive(Debug, Clone)]
pub enum Principal {
Wildcard,
Aws(Vec<String>),
}
impl Serialize for Principal {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
Principal::Wildcard => serializer.serialize_str("*"),
Principal::Aws(ids) => {
use serde::ser::SerializeMap;
let mut map = serializer.serialize_map(Some(1))?;
if ids.len() == 1 {
map.serialize_entry("AWS", &ids[0])?;
} else {
map.serialize_entry("AWS", ids)?;
}
map.end()
}
}
}
}
fn deserialize_principal<'de, D>(deserializer: D) -> Result<Principal, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum PrincipalRaw {
Star(String),
Map(HashMap<String, StringOrVec>),
}
let raw = PrincipalRaw::deserialize(deserializer)?;
match raw {
PrincipalRaw::Star(s) if s == "*" => Ok(Principal::Wildcard),
PrincipalRaw::Star(_) => Err(serde::de::Error::custom(
"Principal string must be \"*\"",
)),
PrincipalRaw::Map(map) => {
if let Some(aws) = map.get("AWS") {
Ok(Principal::Aws(aws.clone().into_vec()))
} else {
Err(serde::de::Error::custom("Principal map must contain \"AWS\" key"))
}
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
enum StringOrVec {
Single(String),
Multiple(Vec<String>),
}
impl StringOrVec {
fn into_vec(self) -> Vec<String> {
match self {
StringOrVec::Single(s) => vec![s],
StringOrVec::Multiple(v) => v,
}
}
}
fn deserialize_string_or_vec<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
where
D: Deserializer<'de>,
{
let raw = StringOrVec::deserialize(deserializer)?;
Ok(raw.into_vec())
}
// ============================
// Policy evaluation
// ============================
#[derive(Debug, Clone, PartialEq)]
pub enum PolicyDecision {
Allow,
Deny,
NoOpinion,
}
/// Evaluate a bucket policy against a request context and caller identity.
pub fn evaluate_policy(
policy: &BucketPolicy,
ctx: &RequestContext,
identity: Option<&AuthenticatedIdentity>,
) -> PolicyDecision {
let resource_arn = ctx.resource_arn();
let iam_action = ctx.action.iam_action();
let mut has_allow = false;
for stmt in &policy.statements {
// Check principal match
if !principal_matches(&stmt.principal, identity) {
continue;
}
// Check action match
if !action_matches(&stmt.action, iam_action) {
continue;
}
// Check resource match
if !resource_matches(&stmt.resource, &resource_arn, ctx.bucket.as_deref()) {
continue;
}
// Statement matches — apply effect
match stmt.effect {
PolicyEffect::Deny => return PolicyDecision::Deny,
PolicyEffect::Allow => has_allow = true,
}
}
if has_allow {
PolicyDecision::Allow
} else {
PolicyDecision::NoOpinion
}
}
/// Check if the principal matches the caller.
fn principal_matches(principal: &Principal, identity: Option<&AuthenticatedIdentity>) -> bool {
match principal {
Principal::Wildcard => true,
Principal::Aws(ids) => {
if let Some(id) = identity {
ids.iter().any(|arn| {
// Match against full ARN or just the access key ID
arn == "*" || arn.ends_with(&id.access_key_id)
})
} else {
false
}
}
}
}
/// Check if the action matches. Supports wildcard `s3:*` and `*`.
fn action_matches(policy_actions: &[String], request_action: &str) -> bool {
for pa in policy_actions {
if pa == "*" || pa == "s3:*" {
return true;
}
if pa.eq_ignore_ascii_case(request_action) {
return true;
}
// Simple prefix wildcard: "s3:Get*" matches "s3:GetObject"
if let Some(prefix) = pa.strip_suffix('*') {
if request_action
.to_lowercase()
.starts_with(&prefix.to_lowercase())
{
return true;
}
}
}
false
}
/// Check if the resource matches. Supports wildcard patterns.
fn resource_matches(policy_resources: &[String], request_arn: &str, bucket: Option<&str>) -> bool {
for pr in policy_resources {
if pr == "*" {
return true;
}
if arn_pattern_matches(pr, request_arn) {
return true;
}
// Also check bucket-level ARN if the request is for an object
if let Some(b) = bucket {
let bucket_arn = format!("arn:aws:s3:::{}", b);
if arn_pattern_matches(pr, &bucket_arn) {
return true;
}
}
}
false
}
/// Simple ARN pattern matching with `*` and `?` wildcards.
fn arn_pattern_matches(pattern: &str, value: &str) -> bool {
// Handle trailing /* specifically: arn:aws:s3:::bucket/* matches arn:aws:s3:::bucket/anything
if pattern.ends_with("/*") {
let prefix = &pattern[..pattern.len() - 1]; // Remove trailing *
if value.starts_with(prefix) {
return true;
}
// Also match exact bucket without trailing /
let bucket_only = &pattern[..pattern.len() - 2];
if value == bucket_only {
return true;
}
}
simple_wildcard_match(pattern, value)
}
fn simple_wildcard_match(pattern: &str, value: &str) -> bool {
let pat_bytes = pattern.as_bytes();
let val_bytes = value.as_bytes();
let mut pi = 0;
let mut vi = 0;
let mut star_pi = usize::MAX;
let mut star_vi = 0;
while vi < val_bytes.len() {
if pi < pat_bytes.len() && (pat_bytes[pi] == b'?' || pat_bytes[pi] == val_bytes[vi]) {
pi += 1;
vi += 1;
} else if pi < pat_bytes.len() && pat_bytes[pi] == b'*' {
star_pi = pi;
star_vi = vi;
pi += 1;
} else if star_pi != usize::MAX {
pi = star_pi + 1;
star_vi += 1;
vi = star_vi;
} else {
return false;
}
}
while pi < pat_bytes.len() && pat_bytes[pi] == b'*' {
pi += 1;
}
pi == pat_bytes.len()
}
// ============================
// Policy validation
// ============================
const MAX_POLICY_SIZE: usize = 20 * 1024; // 20 KB
pub fn validate_policy(json: &str) -> Result<BucketPolicy, StorageError> {
if json.len() > MAX_POLICY_SIZE {
return Err(StorageError::malformed_policy("Policy exceeds maximum size of 20KB"));
}
let policy: BucketPolicy =
serde_json::from_str(json).map_err(|e| StorageError::malformed_policy(&e.to_string()))?;
if policy.version != "2012-10-17" {
return Err(StorageError::malformed_policy(
"Policy version must be \"2012-10-17\"",
));
}
if policy.statements.is_empty() {
return Err(StorageError::malformed_policy(
"Policy must contain at least one statement",
));
}
for (i, stmt) in policy.statements.iter().enumerate() {
if stmt.action.is_empty() {
return Err(StorageError::malformed_policy(&format!(
"Statement {} has no actions",
i
)));
}
for action in &stmt.action {
if action != "*" && !action.starts_with("s3:") {
return Err(StorageError::malformed_policy(&format!(
"Action \"{}\" must start with \"s3:\"",
action
)));
}
}
if stmt.resource.is_empty() {
return Err(StorageError::malformed_policy(&format!(
"Statement {} has no resources",
i
)));
}
for resource in &stmt.resource {
if resource != "*" && !resource.starts_with("arn:aws:s3:::") {
return Err(StorageError::malformed_policy(&format!(
"Resource \"{}\" must start with \"arn:aws:s3:::\"",
resource
)));
}
}
}
Ok(policy)
}
// ============================
// PolicyStore — in-memory cache + disk
// ============================
pub struct PolicyStore {
policies: RwLock<HashMap<String, BucketPolicy>>,
policies_dir: PathBuf,
}
impl PolicyStore {
pub fn new(policies_dir: PathBuf) -> Self {
Self {
policies: RwLock::new(HashMap::new()),
policies_dir,
}
}
/// Load all policies from disk into cache.
pub async fn load_from_disk(&self) -> anyhow::Result<()> {
let dir = &self.policies_dir;
if !dir.exists() {
return Ok(());
}
let mut entries = fs::read_dir(dir).await?;
let mut policies = HashMap::new();
while let Some(entry) = entries.next_entry().await? {
let name = entry.file_name().to_string_lossy().to_string();
if let Some(bucket) = name.strip_suffix(".policy.json") {
match fs::read_to_string(entry.path()).await {
Ok(json) => match serde_json::from_str::<BucketPolicy>(&json) {
Ok(policy) => {
tracing::info!("Loaded policy for bucket: {}", bucket);
policies.insert(bucket.to_string(), policy);
}
Err(e) => {
tracing::warn!("Failed to parse policy for {}: {}", bucket, e);
}
},
Err(e) => {
tracing::warn!("Failed to read policy file {}: {}", name, e);
}
}
}
}
let mut cache = self.policies.write().await;
*cache = policies;
Ok(())
}
/// Get a policy for a bucket.
pub async fn get_policy(&self, bucket: &str) -> Option<BucketPolicy> {
let cache = self.policies.read().await;
cache.get(bucket).cloned()
}
/// Store a policy for a bucket (atomic write + cache update).
pub async fn put_policy(&self, bucket: &str, policy: BucketPolicy) -> anyhow::Result<()> {
let json = serde_json::to_string_pretty(&policy)?;
// Atomic write: temp file + rename
let policy_path = self.policies_dir.join(format!("{}.policy.json", bucket));
let temp_path = self
.policies_dir
.join(format!("{}.policy.json.tmp", bucket));
fs::write(&temp_path, &json).await?;
fs::rename(&temp_path, &policy_path).await?;
// Update cache
let mut cache = self.policies.write().await;
cache.insert(bucket.to_string(), policy);
Ok(())
}
/// Delete a policy for a bucket.
pub async fn delete_policy(&self, bucket: &str) -> anyhow::Result<()> {
let policy_path = self.policies_dir.join(format!("{}.policy.json", bucket));
let _ = fs::remove_file(&policy_path).await;
let mut cache = self.policies.write().await;
cache.remove(bucket);
Ok(())
}
}

View File

@@ -18,28 +18,44 @@ use tokio::sync::watch;
use tokio_util::io::ReaderStream; use tokio_util::io::ReaderStream;
use uuid::Uuid; use uuid::Uuid;
use crate::config::S3Config; use crate::action::{self, RequestContext, StorageAction};
use crate::s3_error::S3Error; use crate::auth::{self, AuthenticatedIdentity};
use crate::storage::FileStore; use crate::config::SmartStorageConfig;
use crate::policy::{self, PolicyDecision, PolicyStore};
use crate::error::StorageError;
use crate::cluster::coordinator::DistributedStore;
use crate::cluster::config::ErasureConfig;
use crate::cluster::drive_manager::DriveManager;
use crate::cluster::membership::MembershipManager;
use crate::cluster::placement;
use crate::cluster::protocol::NodeInfo;
use crate::cluster::quic_transport::QuicTransport;
use crate::cluster::shard_store::ShardStore;
use crate::cluster::state::ClusterState;
use crate::storage::{FileStore, StorageBackend};
use crate::xml_response; use crate::xml_response;
pub struct S3Server { pub struct StorageServer {
store: Arc<FileStore>, store: Arc<StorageBackend>,
config: S3Config,
shutdown_tx: watch::Sender<bool>, shutdown_tx: watch::Sender<bool>,
server_handle: tokio::task::JoinHandle<()>, server_handle: tokio::task::JoinHandle<()>,
} }
impl S3Server { impl StorageServer {
pub async fn start(config: S3Config) -> Result<Self> { pub async fn start(config: SmartStorageConfig) -> Result<Self> {
let store = Arc::new(FileStore::new(config.storage.directory.clone().into())); let store: Arc<StorageBackend> = if let Some(ref cluster_config) = config.cluster {
if cluster_config.enabled {
// Initialize or reset storage Self::start_clustered(&config, cluster_config).await?
if config.storage.clean_slate { } else {
store.reset().await?; Self::start_standalone(&config).await?
}
} else { } else {
store.initialize().await?; Self::start_standalone(&config).await?
} };
// Initialize policy store
let policy_store = Arc::new(PolicyStore::new(store.policies_dir()));
policy_store.load_from_disk().await?;
let addr: SocketAddr = format!("{}:{}", config.address(), config.server.port) let addr: SocketAddr = format!("{}:{}", config.address(), config.server.port)
.parse()?; .parse()?;
@@ -49,6 +65,7 @@ impl S3Server {
let server_store = store.clone(); let server_store = store.clone();
let server_config = config.clone(); let server_config = config.clone();
let server_policy_store = policy_store.clone();
let server_handle = tokio::spawn(async move { let server_handle = tokio::spawn(async move {
loop { loop {
@@ -61,13 +78,15 @@ impl S3Server {
let io = TokioIo::new(stream); let io = TokioIo::new(stream);
let store = server_store.clone(); let store = server_store.clone();
let cfg = server_config.clone(); let cfg = server_config.clone();
let ps = server_policy_store.clone();
tokio::spawn(async move { tokio::spawn(async move {
let svc = service_fn(move |req: Request<Incoming>| { let svc = service_fn(move |req: Request<Incoming>| {
let store = store.clone(); let store = store.clone();
let cfg = cfg.clone(); let cfg = cfg.clone();
let ps = ps.clone();
async move { async move {
handle_request(req, store, cfg).await handle_request(req, store, cfg, ps).await
} }
}); });
@@ -95,12 +114,11 @@ impl S3Server {
}); });
if !config.server.silent { if !config.server.silent {
tracing::info!("S3 server listening on {}", addr); tracing::info!("Storage server listening on {}", addr);
} }
Ok(Self { Ok(Self {
store, store,
config,
shutdown_tx, shutdown_tx,
server_handle, server_handle,
}) })
@@ -111,12 +129,156 @@ impl S3Server {
let _ = self.server_handle.await; let _ = self.server_handle.await;
} }
pub fn store(&self) -> &FileStore { pub fn store(&self) -> &StorageBackend {
&self.store &self.store
} }
async fn start_standalone(config: &SmartStorageConfig) -> Result<Arc<StorageBackend>> {
let store = Arc::new(StorageBackend::Standalone(
FileStore::new(config.storage.directory.clone().into()),
));
if config.storage.clean_slate {
store.reset().await?;
} else {
store.initialize().await?;
}
Ok(store)
}
async fn start_clustered(
config: &SmartStorageConfig,
cluster_config: &crate::cluster::config::ClusterConfig,
) -> Result<Arc<StorageBackend>> {
let erasure_config = cluster_config.erasure.clone();
let node_id = cluster_config
.node_id
.clone()
.unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
// Determine drive paths
let drive_paths: Vec<std::path::PathBuf> = if cluster_config.drives.paths.is_empty() {
// Default: use storage directory as a single drive
vec![std::path::PathBuf::from(&config.storage.directory)]
} else {
cluster_config
.drives
.paths
.iter()
.map(std::path::PathBuf::from)
.collect()
};
// Ensure directories exist
let manifest_dir = std::path::PathBuf::from(&config.storage.directory).join(".manifests");
let buckets_dir = std::path::PathBuf::from(&config.storage.directory).join(".buckets");
tokio::fs::create_dir_all(&manifest_dir).await?;
tokio::fs::create_dir_all(&buckets_dir).await?;
for path in &drive_paths {
tokio::fs::create_dir_all(path.join(".smartstorage")).await?;
}
// Initialize QUIC transport
let quic_addr: SocketAddr =
format!("{}:{}", config.server.address, cluster_config.quic_port).parse()?;
let transport = Arc::new(QuicTransport::new(quic_addr, node_id.clone()).await?);
// Initialize cluster state
let cluster_state = Arc::new(ClusterState::new(
node_id.clone(),
uuid::Uuid::new_v4().to_string(),
erasure_config.data_shards,
erasure_config.parity_shards,
));
// Form erasure sets from local drives (single-node for now)
let nodes = vec![(node_id.clone(), drive_paths.len() as u32)];
let erasure_sets =
placement::form_erasure_sets(&nodes, erasure_config.total_shards());
if erasure_sets.is_empty() {
tracing::warn!(
"Not enough drives ({}) for erasure set size ({}). \
Need at least {} drives.",
drive_paths.len(),
erasure_config.total_shards(),
erasure_config.total_shards(),
);
}
cluster_state.set_erasure_sets(erasure_sets).await;
// Register self as a node
let local_node_info = NodeInfo {
node_id: node_id.clone(),
quic_addr: quic_addr.to_string(),
s3_addr: format!("{}:{}", config.server.address, config.server.port),
drive_count: drive_paths.len() as u32,
status: "online".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
};
cluster_state.add_node(local_node_info.clone()).await;
// Initialize drive manager for health monitoring
let drive_manager = Arc::new(tokio::sync::Mutex::new(
DriveManager::new(&cluster_config.drives).await?,
));
// Join cluster if seed nodes are configured
let membership = Arc::new(
MembershipManager::new(
cluster_state.clone(),
transport.clone(),
cluster_config.heartbeat_interval_ms,
local_node_info,
)
.with_drive_manager(drive_manager),
);
membership
.join_cluster(&cluster_config.seed_nodes)
.await?;
// Start QUIC accept loop for incoming connections
let shard_store_for_accept = Arc::new(ShardStore::new(drive_paths[0].clone()));
let (quic_shutdown_tx, quic_shutdown_rx) = watch::channel(false);
let transport_clone = transport.clone();
tokio::spawn(async move {
transport_clone
.accept_loop(shard_store_for_accept, quic_shutdown_rx)
.await;
});
// Start heartbeat loop
let membership_clone = membership.clone();
let (hb_shutdown_tx, hb_shutdown_rx) = watch::channel(false);
tokio::spawn(async move {
membership_clone.heartbeat_loop(hb_shutdown_rx).await;
});
// Create distributed store
let distributed_store = DistributedStore::new(
cluster_state,
transport,
erasure_config,
drive_paths,
manifest_dir,
buckets_dir,
)?;
let store = Arc::new(StorageBackend::Clustered(distributed_store));
if !config.server.silent {
tracing::info!(
"Cluster mode enabled (node_id={}, quic_port={})",
node_id,
cluster_config.quic_port
);
}
Ok(store)
}
} }
impl S3Config { impl SmartStorageConfig {
fn address(&self) -> &str { fn address(&self) -> &str {
&self.server.address &self.server.address
} }
@@ -184,7 +346,7 @@ fn empty_response(status: StatusCode, request_id: &str) -> Response<BoxBody> {
.unwrap() .unwrap()
} }
fn s3_error_response(err: &S3Error, request_id: &str) -> Response<BoxBody> { fn storage_error_response(err: &StorageError, request_id: &str) -> Response<BoxBody> {
let xml = err.to_xml(); let xml = err.to_xml();
Response::builder() Response::builder()
.status(err.status) .status(err.status)
@@ -196,8 +358,9 @@ fn s3_error_response(err: &S3Error, request_id: &str) -> Response<BoxBody> {
async fn handle_request( async fn handle_request(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
config: S3Config, config: SmartStorageConfig,
policy_store: Arc<PolicyStore>,
) -> Result<Response<BoxBody>, std::convert::Infallible> { ) -> Result<Response<BoxBody>, std::convert::Infallible> {
let request_id = Uuid::new_v4().to_string(); let request_id = Uuid::new_v4().to_string();
let method = req.method().clone(); let method = req.method().clone();
@@ -210,24 +373,49 @@ async fn handle_request(
return Ok(resp); return Ok(resp);
} }
// Auth check // Step 1: Resolve storage action from request
let request_ctx = action::resolve_action(&req);
// Step 2: Auth + policy pipeline
if config.auth.enabled { if config.auth.enabled {
if let Err(e) = check_auth(&req, &config) { // Attempt authentication
tracing::warn!("Auth failed: {}", e.message); let identity = {
return Ok(s3_error_response(&e, &request_id)); let has_auth_header = req
.headers()
.get("authorization")
.and_then(|v| v.to_str().ok())
.map(|s| !s.is_empty())
.unwrap_or(false);
if has_auth_header {
match auth::verify_request(&req, &config) {
Ok(id) => Some(id),
Err(e) => {
tracing::warn!("Auth failed: {}", e.message);
return Ok(storage_error_response(&e, &request_id));
}
}
} else {
None // Anonymous request
}
};
// Step 3: Authorization (policy evaluation)
if let Err(e) = authorize_request(&request_ctx, identity.as_ref(), &policy_store).await {
return Ok(storage_error_response(&e, &request_id));
} }
} }
// Route and handle // Route and handle
let mut response = match route_request(req, store, &config, &request_id).await { let mut response = match route_request(req, store, &config, &request_id, &policy_store).await {
Ok(resp) => resp, Ok(resp) => resp,
Err(err) => { Err(err) => {
if let Some(s3err) = err.downcast_ref::<S3Error>() { if let Some(s3err) = err.downcast_ref::<StorageError>() {
s3_error_response(s3err, &request_id) storage_error_response(s3err, &request_id)
} else { } else {
tracing::error!("Internal error: {}", err); tracing::error!("Internal error: {}", err);
let s3err = S3Error::internal_error(&err.to_string()); let s3err = StorageError::internal_error(&err.to_string());
s3_error_response(&s3err, &request_id) storage_error_response(&s3err, &request_id)
} }
} }
}; };
@@ -249,15 +437,52 @@ async fn handle_request(
Ok(response) Ok(response)
} }
/// Authorize a request based on bucket policies and authentication state.
async fn authorize_request(
ctx: &RequestContext,
identity: Option<&AuthenticatedIdentity>,
policy_store: &PolicyStore,
) -> Result<(), StorageError> {
// ListAllMyBuckets requires authentication (no bucket to apply policy to)
if ctx.action == StorageAction::ListAllMyBuckets {
if identity.is_none() {
return Err(StorageError::access_denied());
}
return Ok(());
}
// If there's a bucket, check its policy
if let Some(ref bucket) = ctx.bucket {
if let Some(bucket_policy) = policy_store.get_policy(bucket).await {
let decision = policy::evaluate_policy(&bucket_policy, ctx, identity);
match decision {
PolicyDecision::Deny => return Err(StorageError::access_denied()),
PolicyDecision::Allow => return Ok(()),
PolicyDecision::NoOpinion => {
// Fall through to default behavior
}
}
}
}
// Default: authenticated users get full access, anonymous denied
if identity.is_none() {
return Err(StorageError::access_denied());
}
Ok(())
}
// ============================ // ============================
// Routing // Routing
// ============================ // ============================
async fn route_request( async fn route_request(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
_config: &S3Config, _config: &SmartStorageConfig,
request_id: &str, request_id: &str,
policy_store: &Arc<PolicyStore>,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
let method = req.method().clone(); let method = req.method().clone();
let path = req.uri().path().to_string(); let path = req.uri().path().to_string();
@@ -282,6 +507,17 @@ async fn route_request(
1 => { 1 => {
// Bucket level: /{bucket} // Bucket level: /{bucket}
let bucket = percent_decode(segments[0]); let bucket = percent_decode(segments[0]);
// Check for ?policy query parameter
if query.contains_key("policy") {
return match method {
Method::GET => handle_get_bucket_policy(policy_store, &bucket, request_id).await,
Method::PUT => handle_put_bucket_policy(req, &store, policy_store, &bucket, request_id).await,
Method::DELETE => handle_delete_bucket_policy(policy_store, &bucket, request_id).await,
_ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)),
};
}
match method { match method {
Method::GET => { Method::GET => {
if query.contains_key("uploads") { if query.contains_key("uploads") {
@@ -291,7 +527,7 @@ async fn route_request(
} }
} }
Method::PUT => handle_create_bucket(store, &bucket, request_id).await, Method::PUT => handle_create_bucket(store, &bucket, request_id).await,
Method::DELETE => handle_delete_bucket(store, &bucket, request_id).await, Method::DELETE => handle_delete_bucket(store, &bucket, request_id, policy_store).await,
Method::HEAD => handle_head_bucket(store, &bucket, request_id).await, Method::HEAD => handle_head_bucket(store, &bucket, request_id).await,
_ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)), _ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)),
} }
@@ -332,8 +568,8 @@ async fn route_request(
let upload_id = query.get("uploadId").unwrap().clone(); let upload_id = query.get("uploadId").unwrap().clone();
handle_complete_multipart(req, store, &bucket, &key, &upload_id, request_id).await handle_complete_multipart(req, store, &bucket, &key, &upload_id, request_id).await
} else { } else {
let err = S3Error::invalid_request("Invalid POST request"); let err = StorageError::invalid_request("Invalid POST request");
Ok(s3_error_response(&err, request_id)) Ok(storage_error_response(&err, request_id))
} }
} }
_ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)), _ => Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED, request_id)),
@@ -348,7 +584,7 @@ async fn route_request(
// ============================ // ============================
async fn handle_list_buckets( async fn handle_list_buckets(
store: Arc<FileStore>, store: Arc<StorageBackend>,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
let buckets = store.list_buckets().await?; let buckets = store.list_buckets().await?;
@@ -357,7 +593,7 @@ async fn handle_list_buckets(
} }
async fn handle_create_bucket( async fn handle_create_bucket(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
@@ -366,28 +602,31 @@ async fn handle_create_bucket(
} }
async fn handle_delete_bucket( async fn handle_delete_bucket(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
request_id: &str, request_id: &str,
policy_store: &Arc<PolicyStore>,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
store.delete_bucket(bucket).await?; store.delete_bucket(bucket).await?;
// Clean up bucket policy on deletion
let _ = policy_store.delete_policy(bucket).await;
Ok(empty_response(StatusCode::NO_CONTENT, request_id)) Ok(empty_response(StatusCode::NO_CONTENT, request_id))
} }
async fn handle_head_bucket( async fn handle_head_bucket(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
if store.bucket_exists(bucket).await { if store.bucket_exists(bucket).await {
Ok(empty_response(StatusCode::OK, request_id)) Ok(empty_response(StatusCode::OK, request_id))
} else { } else {
Err(S3Error::no_such_bucket().into()) Err(StorageError::no_such_bucket().into())
} }
} }
async fn handle_list_objects( async fn handle_list_objects(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
query: &HashMap<String, String>, query: &HashMap<String, String>,
request_id: &str, request_id: &str,
@@ -416,7 +655,7 @@ async fn handle_list_objects(
async fn handle_put_object( async fn handle_put_object(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -438,7 +677,7 @@ async fn handle_put_object(
async fn handle_get_object( async fn handle_get_object(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -491,7 +730,7 @@ async fn handle_get_object(
} }
async fn handle_head_object( async fn handle_head_object(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -523,7 +762,7 @@ async fn handle_head_object(
} }
async fn handle_delete_object( async fn handle_delete_object(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -534,7 +773,7 @@ async fn handle_delete_object(
async fn handle_copy_object( async fn handle_copy_object(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
dest_bucket: &str, dest_bucket: &str,
dest_key: &str, dest_key: &str,
request_id: &str, request_id: &str,
@@ -577,13 +816,77 @@ async fn handle_copy_object(
Ok(xml_response(StatusCode::OK, xml, request_id)) Ok(xml_response(StatusCode::OK, xml, request_id))
} }
// ============================
// Policy handlers
// ============================
async fn handle_get_bucket_policy(
policy_store: &Arc<PolicyStore>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
match policy_store.get_policy(bucket).await {
Some(p) => {
let json = serde_json::to_string_pretty(&p)?;
let resp = Response::builder()
.status(StatusCode::OK)
.header("content-type", "application/json")
.header("x-amz-request-id", request_id)
.body(full_body(json))
.unwrap();
Ok(resp)
}
None => Err(StorageError::no_such_bucket_policy().into()),
}
}
async fn handle_put_bucket_policy(
req: Request<Incoming>,
store: &Arc<StorageBackend>,
policy_store: &Arc<PolicyStore>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
// Verify bucket exists
if !store.bucket_exists(bucket).await {
return Err(StorageError::no_such_bucket().into());
}
// Read body
let body_bytes = req.collect().await.map_err(|e| anyhow::anyhow!("Body error: {}", e))?.to_bytes();
let body_str = String::from_utf8_lossy(&body_bytes);
// Validate and parse
let validated_policy = policy::validate_policy(&body_str)?;
// Store
policy_store
.put_policy(bucket, validated_policy)
.await
.map_err(|e| StorageError::internal_error(&e.to_string()))?;
Ok(empty_response(StatusCode::NO_CONTENT, request_id))
}
async fn handle_delete_bucket_policy(
policy_store: &Arc<PolicyStore>,
bucket: &str,
request_id: &str,
) -> Result<Response<BoxBody>> {
policy_store
.delete_policy(bucket)
.await
.map_err(|e| StorageError::internal_error(&e.to_string()))?;
Ok(empty_response(StatusCode::NO_CONTENT, request_id))
}
// ============================ // ============================
// Multipart handlers // Multipart handlers
// ============================ // ============================
async fn handle_initiate_multipart( async fn handle_initiate_multipart(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
request_id: &str, request_id: &str,
@@ -596,7 +899,7 @@ async fn handle_initiate_multipart(
async fn handle_upload_part( async fn handle_upload_part(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
query: &HashMap<String, String>, query: &HashMap<String, String>,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
@@ -607,7 +910,7 @@ async fn handle_upload_part(
.unwrap_or(0); .unwrap_or(0);
if part_number < 1 || part_number > 10000 { if part_number < 1 || part_number > 10000 {
return Err(S3Error::invalid_part_number().into()); return Err(StorageError::invalid_part_number().into());
} }
let body = req.into_body(); let body = req.into_body();
@@ -625,7 +928,7 @@ async fn handle_upload_part(
async fn handle_complete_multipart( async fn handle_complete_multipart(
req: Request<Incoming>, req: Request<Incoming>,
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
key: &str, key: &str,
upload_id: &str, upload_id: &str,
@@ -645,7 +948,7 @@ async fn handle_complete_multipart(
} }
async fn handle_abort_multipart( async fn handle_abort_multipart(
store: Arc<FileStore>, store: Arc<StorageBackend>,
upload_id: &str, upload_id: &str,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
@@ -654,7 +957,7 @@ async fn handle_abort_multipart(
} }
async fn handle_list_multipart_uploads( async fn handle_list_multipart_uploads(
store: Arc<FileStore>, store: Arc<StorageBackend>,
bucket: &str, bucket: &str,
request_id: &str, request_id: &str,
) -> Result<Response<BoxBody>> { ) -> Result<Response<BoxBody>> {
@@ -776,7 +1079,7 @@ fn extract_xml_value<'a>(xml: &'a str, tag: &str) -> Option<String> {
// CORS // CORS
// ============================ // ============================
fn build_cors_preflight(config: &S3Config, request_id: &str) -> Response<BoxBody> { fn build_cors_preflight(config: &SmartStorageConfig, request_id: &str) -> Response<BoxBody> {
let mut builder = Response::builder() let mut builder = Response::builder()
.status(StatusCode::NO_CONTENT) .status(StatusCode::NO_CONTENT)
.header("x-amz-request-id", request_id); .header("x-amz-request-id", request_id);
@@ -800,7 +1103,7 @@ fn build_cors_preflight(config: &S3Config, request_id: &str) -> Response<BoxBody
builder.body(empty_body()).unwrap() builder.body(empty_body()).unwrap()
} }
fn add_cors_headers(headers: &mut hyper::HeaderMap, config: &S3Config) { fn add_cors_headers(headers: &mut hyper::HeaderMap, config: &SmartStorageConfig) {
if let Some(ref origins) = config.cors.allowed_origins { if let Some(ref origins) = config.cors.allowed_origins {
headers.insert( headers.insert(
"access-control-allow-origin", "access-control-allow-origin",
@@ -820,46 +1123,3 @@ fn add_cors_headers(headers: &mut hyper::HeaderMap, config: &S3Config) {
); );
} }
} }
// ============================
// Auth
// ============================
fn check_auth(req: &Request<Incoming>, config: &S3Config) -> Result<(), S3Error> {
let auth_header = req
.headers()
.get("authorization")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if auth_header.is_empty() {
return Err(S3Error::access_denied());
}
// Extract access key from AWS v2 or v4 signature
let access_key = if auth_header.starts_with("AWS4-HMAC-SHA256") {
// v4: AWS4-HMAC-SHA256 Credential=KEY/date/region/s3/aws4_request, ...
auth_header
.split("Credential=")
.nth(1)
.and_then(|s| s.split('/').next())
} else if auth_header.starts_with("AWS ") {
// v2: AWS KEY:signature
auth_header
.strip_prefix("AWS ")
.and_then(|s| s.split(':').next())
} else {
None
};
let access_key = access_key.unwrap_or("");
// Check against configured credentials
for cred in &config.auth.credentials {
if cred.access_key_id == access_key {
return Ok(());
}
}
Err(S3Error::access_denied())
}

View File

@@ -10,19 +10,18 @@ use tokio::fs;
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufWriter}; use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufWriter};
use uuid::Uuid; use uuid::Uuid;
use crate::s3_error::S3Error; use crate::cluster::coordinator::DistributedStore;
use crate::error::StorageError;
// ============================ // ============================
// Result types // Result types
// ============================ // ============================
pub struct PutResult { pub struct PutResult {
pub size: u64,
pub md5: String, pub md5: String,
} }
pub struct GetResult { pub struct GetResult {
pub key: String,
pub size: u64, pub size: u64,
pub last_modified: DateTime<Utc>, pub last_modified: DateTime<Utc>,
pub md5: String, pub md5: String,
@@ -32,7 +31,6 @@ pub struct GetResult {
} }
pub struct HeadResult { pub struct HeadResult {
pub key: String,
pub size: u64, pub size: u64,
pub last_modified: DateTime<Utc>, pub last_modified: DateTime<Utc>,
pub md5: String, pub md5: String,
@@ -40,7 +38,6 @@ pub struct HeadResult {
} }
pub struct CopyResult { pub struct CopyResult {
pub size: u64,
pub md5: String, pub md5: String,
pub last_modified: DateTime<Utc>, pub last_modified: DateTime<Utc>,
} }
@@ -69,14 +66,12 @@ pub struct BucketInfo {
pub struct MultipartUploadInfo { pub struct MultipartUploadInfo {
pub upload_id: String, pub upload_id: String,
pub bucket: String,
pub key: String, pub key: String,
pub initiated: DateTime<Utc>, pub initiated: DateTime<Utc>,
} }
pub struct CompleteMultipartResult { pub struct CompleteMultipartResult {
pub etag: String, pub etag: String,
pub size: u64,
} }
// ============================ // ============================
@@ -118,14 +113,20 @@ impl FileStore {
pub async fn initialize(&self) -> Result<()> { pub async fn initialize(&self) -> Result<()> {
fs::create_dir_all(&self.root_dir).await?; fs::create_dir_all(&self.root_dir).await?;
fs::create_dir_all(self.policies_dir()).await?;
Ok(()) Ok(())
} }
pub fn policies_dir(&self) -> PathBuf {
self.root_dir.join(".policies")
}
pub async fn reset(&self) -> Result<()> { pub async fn reset(&self) -> Result<()> {
if self.root_dir.exists() { if self.root_dir.exists() {
fs::remove_dir_all(&self.root_dir).await?; fs::remove_dir_all(&self.root_dir).await?;
} }
fs::create_dir_all(&self.root_dir).await?; fs::create_dir_all(&self.root_dir).await?;
fs::create_dir_all(self.policies_dir()).await?;
Ok(()) Ok(())
} }
@@ -174,13 +175,13 @@ impl FileStore {
let bucket_path = self.root_dir.join(bucket); let bucket_path = self.root_dir.join(bucket);
if !bucket_path.is_dir() { if !bucket_path.is_dir() {
return Err(S3Error::no_such_bucket().into()); return Err(StorageError::no_such_bucket().into());
} }
// Check if bucket is empty (ignore hidden files) // Check if bucket is empty (ignore hidden files)
let mut entries = fs::read_dir(&bucket_path).await?; let mut entries = fs::read_dir(&bucket_path).await?;
while let Some(_entry) = entries.next_entry().await? { while let Some(_entry) = entries.next_entry().await? {
return Err(S3Error::bucket_not_empty().into()); return Err(StorageError::bucket_not_empty().into());
} }
fs::remove_dir_all(&bucket_path).await?; fs::remove_dir_all(&bucket_path).await?;
@@ -199,7 +200,7 @@ impl FileStore {
metadata: HashMap<String, String>, metadata: HashMap<String, String>,
) -> Result<PutResult> { ) -> Result<PutResult> {
if !self.bucket_exists(bucket).await { if !self.bucket_exists(bucket).await {
return Err(S3Error::no_such_bucket().into()); return Err(StorageError::no_such_bucket().into());
} }
let object_path = self.object_path(bucket, key); let object_path = self.object_path(bucket, key);
@@ -210,7 +211,6 @@ impl FileStore {
let file = fs::File::create(&object_path).await?; let file = fs::File::create(&object_path).await?;
let mut writer = BufWriter::new(file); let mut writer = BufWriter::new(file);
let mut hasher = Md5::new(); let mut hasher = Md5::new();
let mut total_size: u64 = 0;
// Stream body frames directly to file // Stream body frames directly to file
let mut body = body; let mut body = body;
@@ -219,7 +219,6 @@ impl FileStore {
Some(Ok(frame)) => { Some(Ok(frame)) => {
if let Ok(data) = frame.into_data() { if let Ok(data) = frame.into_data() {
hasher.update(&data); hasher.update(&data);
total_size += data.len() as u64;
writer.write_all(&data).await?; writer.write_all(&data).await?;
} }
} }
@@ -245,44 +244,6 @@ impl FileStore {
fs::write(&metadata_path, metadata_json).await?; fs::write(&metadata_path, metadata_json).await?;
Ok(PutResult { Ok(PutResult {
size: total_size,
md5: md5_hex,
})
}
pub async fn put_object_bytes(
&self,
bucket: &str,
key: &str,
data: &[u8],
metadata: HashMap<String, String>,
) -> Result<PutResult> {
if !self.bucket_exists(bucket).await {
return Err(S3Error::no_such_bucket().into());
}
let object_path = self.object_path(bucket, key);
if let Some(parent) = object_path.parent() {
fs::create_dir_all(parent).await?;
}
let mut hasher = Md5::new();
hasher.update(data);
let md5_hex = format!("{:x}", hasher.finalize());
fs::write(&object_path, data).await?;
// Write MD5 sidecar
let md5_path = format!("{}.md5", object_path.display());
fs::write(&md5_path, &md5_hex).await?;
// Write metadata sidecar
let metadata_path = format!("{}.metadata.json", object_path.display());
let metadata_json = serde_json::to_string_pretty(&metadata)?;
fs::write(&metadata_path, metadata_json).await?;
Ok(PutResult {
size: data.len() as u64,
md5: md5_hex, md5: md5_hex,
}) })
} }
@@ -296,7 +257,7 @@ impl FileStore {
let object_path = self.object_path(bucket, key); let object_path = self.object_path(bucket, key);
if !object_path.exists() { if !object_path.exists() {
return Err(S3Error::no_such_key().into()); return Err(StorageError::no_such_key().into());
} }
let file_meta = fs::metadata(&object_path).await?; let file_meta = fs::metadata(&object_path).await?;
@@ -316,7 +277,6 @@ impl FileStore {
}; };
Ok(GetResult { Ok(GetResult {
key: key.to_string(),
size, size,
last_modified, last_modified,
md5, md5,
@@ -330,7 +290,7 @@ impl FileStore {
let object_path = self.object_path(bucket, key); let object_path = self.object_path(bucket, key);
if !object_path.exists() { if !object_path.exists() {
return Err(S3Error::no_such_key().into()); return Err(StorageError::no_such_key().into());
} }
// Only stat the file, don't open it // Only stat the file, don't open it
@@ -342,7 +302,6 @@ impl FileStore {
let metadata = self.read_metadata(&object_path).await; let metadata = self.read_metadata(&object_path).await;
Ok(HeadResult { Ok(HeadResult {
key: key.to_string(),
size, size,
last_modified, last_modified,
md5, md5,
@@ -394,11 +353,11 @@ impl FileStore {
let dest_path = self.object_path(dest_bucket, dest_key); let dest_path = self.object_path(dest_bucket, dest_key);
if !src_path.exists() { if !src_path.exists() {
return Err(S3Error::no_such_key().into()); return Err(StorageError::no_such_key().into());
} }
if !self.bucket_exists(dest_bucket).await { if !self.bucket_exists(dest_bucket).await {
return Err(S3Error::no_such_bucket().into()); return Err(StorageError::no_such_bucket().into());
} }
if let Some(parent) = dest_path.parent() { if let Some(parent) = dest_path.parent() {
@@ -429,7 +388,6 @@ impl FileStore {
let last_modified: DateTime<Utc> = file_meta.modified()?.into(); let last_modified: DateTime<Utc> = file_meta.modified()?.into();
Ok(CopyResult { Ok(CopyResult {
size: file_meta.len(),
md5, md5,
last_modified, last_modified,
}) })
@@ -446,7 +404,7 @@ impl FileStore {
let bucket_path = self.root_dir.join(bucket); let bucket_path = self.root_dir.join(bucket);
if !bucket_path.is_dir() { if !bucket_path.is_dir() {
return Err(S3Error::no_such_bucket().into()); return Err(StorageError::no_such_bucket().into());
} }
// Collect all object keys recursively // Collect all object keys recursively
@@ -571,7 +529,7 @@ impl FileStore {
) -> Result<(String, u64)> { ) -> Result<(String, u64)> {
let upload_dir = self.multipart_dir().join(upload_id); let upload_dir = self.multipart_dir().join(upload_id);
if !upload_dir.is_dir() { if !upload_dir.is_dir() {
return Err(S3Error::no_such_upload().into()); return Err(StorageError::no_such_upload().into());
} }
let part_path = upload_dir.join(format!("part-{}", part_number)); let part_path = upload_dir.join(format!("part-{}", part_number));
@@ -645,7 +603,7 @@ impl FileStore {
) -> Result<CompleteMultipartResult> { ) -> Result<CompleteMultipartResult> {
let upload_dir = self.multipart_dir().join(upload_id); let upload_dir = self.multipart_dir().join(upload_id);
if !upload_dir.is_dir() { if !upload_dir.is_dir() {
return Err(S3Error::no_such_upload().into()); return Err(StorageError::no_such_upload().into());
} }
// Read metadata to get bucket/key // Read metadata to get bucket/key
@@ -662,7 +620,6 @@ impl FileStore {
let dest_file = fs::File::create(&object_path).await?; let dest_file = fs::File::create(&object_path).await?;
let mut writer = BufWriter::new(dest_file); let mut writer = BufWriter::new(dest_file);
let mut hasher = Md5::new(); let mut hasher = Md5::new();
let mut total_size: u64 = 0;
for (part_number, _etag) in parts { for (part_number, _etag) in parts {
let part_path = upload_dir.join(format!("part-{}", part_number)); let part_path = upload_dir.join(format!("part-{}", part_number));
@@ -679,7 +636,6 @@ impl FileStore {
} }
hasher.update(&buf[..n]); hasher.update(&buf[..n]);
writer.write_all(&buf[..n]).await?; writer.write_all(&buf[..n]).await?;
total_size += n as u64;
} }
} }
@@ -702,14 +658,13 @@ impl FileStore {
Ok(CompleteMultipartResult { Ok(CompleteMultipartResult {
etag, etag,
size: total_size,
}) })
} }
pub async fn abort_multipart(&self, upload_id: &str) -> Result<()> { pub async fn abort_multipart(&self, upload_id: &str) -> Result<()> {
let upload_dir = self.multipart_dir().join(upload_id); let upload_dir = self.multipart_dir().join(upload_id);
if !upload_dir.is_dir() { if !upload_dir.is_dir() {
return Err(S3Error::no_such_upload().into()); return Err(StorageError::no_such_upload().into());
} }
fs::remove_dir_all(&upload_dir).await?; fs::remove_dir_all(&upload_dir).await?;
Ok(()) Ok(())
@@ -742,7 +697,6 @@ impl FileStore {
uploads.push(MultipartUploadInfo { uploads.push(MultipartUploadInfo {
upload_id: meta.upload_id, upload_id: meta.upload_id,
bucket: meta.bucket,
key: meta.key, key: meta.key,
initiated, initiated,
}); });
@@ -762,7 +716,7 @@ impl FileStore {
let encoded = encode_key(key); let encoded = encode_key(key);
self.root_dir self.root_dir
.join(bucket) .join(bucket)
.join(format!("{}._S3_object", encoded)) .join(format!("{}._storage_object", encoded))
} }
async fn read_md5(&self, object_path: &Path) -> String { async fn read_md5(&self, object_path: &Path) -> String {
@@ -822,7 +776,7 @@ impl FileStore {
if meta.is_dir() { if meta.is_dir() {
self.collect_keys(bucket_path, &entry.path(), keys).await?; self.collect_keys(bucket_path, &entry.path(), keys).await?;
} else if name.ends_with("._S3_object") } else if name.ends_with("._storage_object")
&& !name.ends_with(".metadata.json") && !name.ends_with(".metadata.json")
&& !name.ends_with(".md5") && !name.ends_with(".md5")
{ {
@@ -832,7 +786,7 @@ impl FileStore {
.unwrap_or(Path::new("")) .unwrap_or(Path::new(""))
.to_string_lossy() .to_string_lossy()
.to_string(); .to_string();
let key = decode_key(relative.trim_end_matches("._S3_object")); let key = decode_key(relative.trim_end_matches("._storage_object"));
keys.push(key); keys.push(key);
} }
} }
@@ -842,6 +796,200 @@ impl FileStore {
} }
} }
// ============================
// StorageBackend enum
// ============================
/// Unified storage backend that dispatches to either standalone (FileStore)
/// or clustered (DistributedStore) storage.
pub enum StorageBackend {
Standalone(FileStore),
Clustered(DistributedStore),
}
impl StorageBackend {
pub fn policies_dir(&self) -> std::path::PathBuf {
match self {
StorageBackend::Standalone(fs) => fs.policies_dir(),
StorageBackend::Clustered(ds) => ds.policies_dir(),
}
}
pub async fn initialize(&self) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.initialize().await,
StorageBackend::Clustered(ds) => {
// Ensure policies directory exists
tokio::fs::create_dir_all(ds.policies_dir()).await?;
Ok(())
}
}
}
pub async fn reset(&self) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.reset().await,
StorageBackend::Clustered(_) => Ok(()), // TODO: cluster reset
}
}
pub async fn list_buckets(&self) -> Result<Vec<BucketInfo>> {
match self {
StorageBackend::Standalone(fs) => fs.list_buckets().await,
StorageBackend::Clustered(ds) => ds.list_buckets().await,
}
}
pub async fn bucket_exists(&self, bucket: &str) -> bool {
match self {
StorageBackend::Standalone(fs) => fs.bucket_exists(bucket).await,
StorageBackend::Clustered(ds) => ds.bucket_exists(bucket).await,
}
}
pub async fn create_bucket(&self, bucket: &str) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.create_bucket(bucket).await,
StorageBackend::Clustered(ds) => ds.create_bucket(bucket).await,
}
}
pub async fn delete_bucket(&self, bucket: &str) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.delete_bucket(bucket).await,
StorageBackend::Clustered(ds) => ds.delete_bucket(bucket).await,
}
}
pub async fn put_object(
&self,
bucket: &str,
key: &str,
body: Incoming,
metadata: HashMap<String, String>,
) -> Result<PutResult> {
match self {
StorageBackend::Standalone(fs) => fs.put_object(bucket, key, body, metadata).await,
StorageBackend::Clustered(ds) => ds.put_object(bucket, key, body, metadata).await,
}
}
pub async fn get_object(
&self,
bucket: &str,
key: &str,
range: Option<(u64, u64)>,
) -> Result<GetResult> {
match self {
StorageBackend::Standalone(fs) => fs.get_object(bucket, key, range).await,
StorageBackend::Clustered(ds) => ds.get_object(bucket, key, range).await,
}
}
pub async fn head_object(&self, bucket: &str, key: &str) -> Result<HeadResult> {
match self {
StorageBackend::Standalone(fs) => fs.head_object(bucket, key).await,
StorageBackend::Clustered(ds) => ds.head_object(bucket, key).await,
}
}
pub async fn delete_object(&self, bucket: &str, key: &str) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.delete_object(bucket, key).await,
StorageBackend::Clustered(ds) => ds.delete_object(bucket, key).await,
}
}
pub async fn copy_object(
&self,
src_bucket: &str,
src_key: &str,
dest_bucket: &str,
dest_key: &str,
metadata_directive: &str,
new_metadata: Option<HashMap<String, String>>,
) -> Result<CopyResult> {
match self {
StorageBackend::Standalone(fs) => {
fs.copy_object(src_bucket, src_key, dest_bucket, dest_key, metadata_directive, new_metadata).await
}
StorageBackend::Clustered(ds) => {
ds.copy_object(src_bucket, src_key, dest_bucket, dest_key, metadata_directive, new_metadata).await
}
}
}
pub async fn list_objects(
&self,
bucket: &str,
prefix: &str,
delimiter: &str,
max_keys: usize,
continuation_token: Option<&str>,
) -> Result<ListObjectsResult> {
match self {
StorageBackend::Standalone(fs) => {
fs.list_objects(bucket, prefix, delimiter, max_keys, continuation_token).await
}
StorageBackend::Clustered(ds) => {
ds.list_objects(bucket, prefix, delimiter, max_keys, continuation_token).await
}
}
}
pub async fn initiate_multipart(
&self,
bucket: &str,
key: &str,
metadata: HashMap<String, String>,
) -> Result<String> {
match self {
StorageBackend::Standalone(fs) => fs.initiate_multipart(bucket, key, metadata).await,
StorageBackend::Clustered(ds) => ds.initiate_multipart(bucket, key, metadata).await,
}
}
pub async fn upload_part(
&self,
upload_id: &str,
part_number: u32,
body: Incoming,
) -> Result<(String, u64)> {
match self {
StorageBackend::Standalone(fs) => fs.upload_part(upload_id, part_number, body).await,
StorageBackend::Clustered(ds) => ds.upload_part(upload_id, part_number, body).await,
}
}
pub async fn complete_multipart(
&self,
upload_id: &str,
parts: &[(u32, String)],
) -> Result<CompleteMultipartResult> {
match self {
StorageBackend::Standalone(fs) => fs.complete_multipart(upload_id, parts).await,
StorageBackend::Clustered(ds) => ds.complete_multipart(upload_id, parts).await,
}
}
pub async fn abort_multipart(&self, upload_id: &str) -> Result<()> {
match self {
StorageBackend::Standalone(fs) => fs.abort_multipart(upload_id).await,
StorageBackend::Clustered(ds) => ds.abort_multipart(upload_id).await,
}
}
pub async fn list_multipart_uploads(
&self,
bucket: &str,
) -> Result<Vec<MultipartUploadInfo>> {
match self {
StorageBackend::Standalone(fs) => fs.list_multipart_uploads(bucket).await,
StorageBackend::Clustered(ds) => ds.list_multipart_uploads(bucket).await,
}
}
}
// ============================ // ============================
// Key encoding (identity on Linux) // Key encoding (identity on Linux)
// ============================ // ============================

View File

@@ -1,7 +1,7 @@
use crate::storage::{BucketInfo, ListObjectsResult, MultipartUploadInfo}; use crate::storage::{BucketInfo, ListObjectsResult, MultipartUploadInfo};
const XML_DECL: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; const XML_DECL: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
const S3_NS: &str = "http://s3.amazonaws.com/doc/2006-03-01/"; const STORAGE_NS: &str = "http://s3.amazonaws.com/doc/2006-03-01/";
fn xml_escape(s: &str) -> String { fn xml_escape(s: &str) -> String {
s.replace('&', "&amp;") s.replace('&', "&amp;")
@@ -14,9 +14,9 @@ fn xml_escape(s: &str) -> String {
pub fn list_buckets_xml(buckets: &[BucketInfo]) -> String { pub fn list_buckets_xml(buckets: &[BucketInfo]) -> String {
let mut xml = format!( let mut xml = format!(
"{}\n<ListAllMyBucketsResult xmlns=\"{}\">\ "{}\n<ListAllMyBucketsResult xmlns=\"{}\">\
<Owner><ID>123456789000</ID><DisplayName>S3rver</DisplayName></Owner>\ <Owner><ID>123456789000</ID><DisplayName>Storage</DisplayName></Owner>\
<Buckets>", <Buckets>",
XML_DECL, S3_NS XML_DECL, STORAGE_NS
); );
for b in buckets { for b in buckets {
@@ -39,7 +39,7 @@ pub fn list_objects_v1_xml(bucket: &str, result: &ListObjectsResult) -> String {
<MaxKeys>{}</MaxKeys>\ <MaxKeys>{}</MaxKeys>\
<IsTruncated>{}</IsTruncated>", <IsTruncated>{}</IsTruncated>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket), xml_escape(bucket),
xml_escape(&result.prefix), xml_escape(&result.prefix),
result.max_keys, result.max_keys,
@@ -86,7 +86,7 @@ pub fn list_objects_v2_xml(bucket: &str, result: &ListObjectsResult) -> String {
<KeyCount>{}</KeyCount>\ <KeyCount>{}</KeyCount>\
<IsTruncated>{}</IsTruncated>", <IsTruncated>{}</IsTruncated>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket), xml_escape(bucket),
xml_escape(&result.prefix), xml_escape(&result.prefix),
result.max_keys, result.max_keys,
@@ -132,15 +132,6 @@ pub fn list_objects_v2_xml(bucket: &str, result: &ListObjectsResult) -> String {
xml xml
} }
pub fn error_xml(code: &str, message: &str) -> String {
format!(
"{}\n<Error><Code>{}</Code><Message>{}</Message></Error>",
XML_DECL,
xml_escape(code),
xml_escape(message)
)
}
pub fn copy_object_result_xml(etag: &str, last_modified: &str) -> String { pub fn copy_object_result_xml(etag: &str, last_modified: &str) -> String {
format!( format!(
"{}\n<CopyObjectResult>\ "{}\n<CopyObjectResult>\
@@ -161,7 +152,7 @@ pub fn initiate_multipart_xml(bucket: &str, key: &str, upload_id: &str) -> Strin
<UploadId>{}</UploadId>\ <UploadId>{}</UploadId>\
</InitiateMultipartUploadResult>", </InitiateMultipartUploadResult>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket), xml_escape(bucket),
xml_escape(key), xml_escape(key),
xml_escape(upload_id) xml_escape(upload_id)
@@ -177,7 +168,7 @@ pub fn complete_multipart_xml(bucket: &str, key: &str, etag: &str) -> String {
<ETag>\"{}\"</ETag>\ <ETag>\"{}\"</ETag>\
</CompleteMultipartUploadResult>", </CompleteMultipartUploadResult>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket), xml_escape(bucket),
xml_escape(key), xml_escape(key),
xml_escape(bucket), xml_escape(bucket),
@@ -195,7 +186,7 @@ pub fn list_multipart_uploads_xml(bucket: &str, uploads: &[MultipartUploadInfo])
<MaxUploads>1000</MaxUploads>\ <MaxUploads>1000</MaxUploads>\
<IsTruncated>false</IsTruncated>", <IsTruncated>false</IsTruncated>",
XML_DECL, XML_DECL,
S3_NS, STORAGE_NS,
xml_escape(bucket) xml_escape(bucket)
); );
@@ -204,8 +195,8 @@ pub fn list_multipart_uploads_xml(bucket: &str, uploads: &[MultipartUploadInfo])
"<Upload>\ "<Upload>\
<Key>{}</Key>\ <Key>{}</Key>\
<UploadId>{}</UploadId>\ <UploadId>{}</UploadId>\
<Initiator><ID>S3RVER</ID><DisplayName>S3RVER</DisplayName></Initiator>\ <Initiator><ID>STORAGE</ID><DisplayName>STORAGE</DisplayName></Initiator>\
<Owner><ID>S3RVER</ID><DisplayName>S3RVER</DisplayName></Owner>\ <Owner><ID>STORAGE</ID><DisplayName>STORAGE</DisplayName></Owner>\
<StorageClass>STANDARD</StorageClass>\ <StorageClass>STANDARD</StorageClass>\
<Initiated>{}</Initiated>\ <Initiated>{}</Initiated>\
</Upload>", </Upload>",

301
test/test.auth.node.ts Normal file
View File

@@ -0,0 +1,301 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import {
S3Client,
CreateBucketCommand,
ListBucketsCommand,
PutObjectCommand,
GetObjectCommand,
DeleteObjectCommand,
DeleteBucketCommand,
PutBucketPolicyCommand,
GetBucketPolicyCommand,
DeleteBucketPolicyCommand,
} from '@aws-sdk/client-s3';
import { Readable } from 'stream';
import * as smartstorage from '../ts/index.js';
let testSmartStorageInstance: smartstorage.SmartStorage;
let authClient: S3Client;
let wrongClient: S3Client;
const TEST_PORT = 3344;
const ACCESS_KEY = 'TESTAKID';
const SECRET_KEY = 'TESTSECRETKEY123';
async function streamToString(stream: Readable): Promise<string> {
const chunks: Buffer[] = [];
return new Promise((resolve, reject) => {
stream.on('data', (chunk) => chunks.push(Buffer.from(chunk)));
stream.on('error', reject);
stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
});
}
// ============================
// Server setup
// ============================
tap.test('should start storage server with auth enabled', async () => {
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: {
port: TEST_PORT,
silent: true,
region: 'us-east-1',
},
storage: {
cleanSlate: true,
},
auth: {
enabled: true,
credentials: [
{
accessKeyId: ACCESS_KEY,
secretAccessKey: SECRET_KEY,
},
],
},
});
// Authenticated client with correct credentials
authClient = new S3Client({
endpoint: `http://localhost:${TEST_PORT}`,
region: 'us-east-1',
credentials: {
accessKeyId: ACCESS_KEY,
secretAccessKey: SECRET_KEY,
},
forcePathStyle: true,
});
// Client with wrong credentials
wrongClient = new S3Client({
endpoint: `http://localhost:${TEST_PORT}`,
region: 'us-east-1',
credentials: {
accessKeyId: 'WRONGKEY',
secretAccessKey: 'WRONGSECRET',
},
forcePathStyle: true,
});
});
// ============================
// Authenticated CRUD
// ============================
tap.test('authenticated: should list buckets', async () => {
const response = await authClient.send(new ListBucketsCommand({}));
expect(response.$metadata.httpStatusCode).toEqual(200);
expect(Array.isArray(response.Buckets)).toEqual(true);
});
tap.test('authenticated: should create a bucket', async () => {
const response = await authClient.send(new CreateBucketCommand({ Bucket: 'auth-test-bucket' }));
expect(response.$metadata.httpStatusCode).toEqual(200);
});
tap.test('authenticated: should upload an object', async () => {
const response = await authClient.send(
new PutObjectCommand({
Bucket: 'auth-test-bucket',
Key: 'hello.txt',
Body: 'Hello authenticated world!',
ContentType: 'text/plain',
}),
);
expect(response.$metadata.httpStatusCode).toEqual(200);
});
tap.test('authenticated: should download the object', async () => {
const response = await authClient.send(
new GetObjectCommand({
Bucket: 'auth-test-bucket',
Key: 'hello.txt',
}),
);
expect(response.$metadata.httpStatusCode).toEqual(200);
const content = await streamToString(response.Body as Readable);
expect(content).toEqual('Hello authenticated world!');
});
// ============================
// Wrong credentials → 403
// ============================
tap.test('wrong credentials: should fail to list buckets', async () => {
await expect(wrongClient.send(new ListBucketsCommand({}))).rejects.toThrow();
});
tap.test('wrong credentials: should fail to get object', async () => {
await expect(
wrongClient.send(
new GetObjectCommand({
Bucket: 'auth-test-bucket',
Key: 'hello.txt',
}),
),
).rejects.toThrow();
});
// ============================
// Anonymous → 403 (no policy yet)
// ============================
tap.test('anonymous: should fail to list buckets', async () => {
const resp = await fetch(`http://localhost:${TEST_PORT}/`);
expect(resp.status).toEqual(403);
});
tap.test('anonymous: should fail to get object (no policy)', async () => {
const resp = await fetch(`http://localhost:${TEST_PORT}/auth-test-bucket/hello.txt`);
expect(resp.status).toEqual(403);
});
// ============================
// Bucket policy: public read
// ============================
tap.test('should PUT a public-read bucket policy', async () => {
const policy = {
Version: '2012-10-17',
Statement: [
{
Sid: 'PublicRead',
Effect: 'Allow',
Principal: '*',
Action: ['s3:GetObject'],
Resource: [`arn:aws:s3:::auth-test-bucket/*`],
},
],
};
const response = await authClient.send(
new PutBucketPolicyCommand({
Bucket: 'auth-test-bucket',
Policy: JSON.stringify(policy),
}),
);
expect(response.$metadata.httpStatusCode).toEqual(204);
});
tap.test('should GET the bucket policy', async () => {
const response = await authClient.send(
new GetBucketPolicyCommand({
Bucket: 'auth-test-bucket',
}),
);
expect(response.$metadata.httpStatusCode).toEqual(200);
const policy = JSON.parse(response.Policy!);
expect(policy.Statement[0].Sid).toEqual('PublicRead');
});
tap.test('anonymous: should GET object after public-read policy', async () => {
const resp = await fetch(`http://localhost:${TEST_PORT}/auth-test-bucket/hello.txt`);
expect(resp.status).toEqual(200);
const content = await resp.text();
expect(content).toEqual('Hello authenticated world!');
});
tap.test('anonymous: should still fail to PUT object (policy only allows GET)', async () => {
const resp = await fetch(`http://localhost:${TEST_PORT}/auth-test-bucket/anon-file.txt`, {
method: 'PUT',
body: 'should fail',
});
expect(resp.status).toEqual(403);
});
// ============================
// Deny policy
// ============================
tap.test('should PUT a deny policy that blocks authenticated delete', async () => {
const policy = {
Version: '2012-10-17',
Statement: [
{
Sid: 'PublicRead',
Effect: 'Allow',
Principal: '*',
Action: ['s3:GetObject'],
Resource: [`arn:aws:s3:::auth-test-bucket/*`],
},
{
Sid: 'DenyDelete',
Effect: 'Deny',
Principal: '*',
Action: ['s3:DeleteObject'],
Resource: [`arn:aws:s3:::auth-test-bucket/*`],
},
],
};
const response = await authClient.send(
new PutBucketPolicyCommand({
Bucket: 'auth-test-bucket',
Policy: JSON.stringify(policy),
}),
);
expect(response.$metadata.httpStatusCode).toEqual(204);
});
tap.test('authenticated: should be denied delete by policy', async () => {
await expect(
authClient.send(
new DeleteObjectCommand({
Bucket: 'auth-test-bucket',
Key: 'hello.txt',
}),
),
).rejects.toThrow();
});
// ============================
// DELETE bucket policy
// ============================
tap.test('should DELETE the bucket policy', async () => {
const response = await authClient.send(
new DeleteBucketPolicyCommand({
Bucket: 'auth-test-bucket',
}),
);
expect(response.$metadata.httpStatusCode).toEqual(204);
});
tap.test('should GET policy → 404 after deletion', async () => {
await expect(
authClient.send(
new GetBucketPolicyCommand({
Bucket: 'auth-test-bucket',
}),
),
).rejects.toThrow();
});
// ============================
// Cleanup
// ============================
tap.test('authenticated: delete object after policy removed', async () => {
const response = await authClient.send(
new DeleteObjectCommand({
Bucket: 'auth-test-bucket',
Key: 'hello.txt',
}),
);
expect(response.$metadata.httpStatusCode).toEqual(204);
});
tap.test('authenticated: delete the bucket', async () => {
const response = await authClient.send(
new DeleteBucketCommand({ Bucket: 'auth-test-bucket' }),
);
expect(response.$metadata.httpStatusCode).toEqual(204);
});
tap.test('should stop the storage server', async () => {
await testSmartStorageInstance.stop();
});
export default tap.start();

View File

@@ -1,9 +1,9 @@
import { expect, tap } from '@git.zone/tstest/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import { S3Client, CreateBucketCommand, ListBucketsCommand, PutObjectCommand, GetObjectCommand, DeleteObjectCommand, DeleteBucketCommand } from '@aws-sdk/client-s3'; import { S3Client, CreateBucketCommand, ListBucketsCommand, PutObjectCommand, GetObjectCommand, DeleteObjectCommand, DeleteBucketCommand } from '@aws-sdk/client-s3';
import { Readable } from 'stream'; import { Readable } from 'stream';
import * as smarts3 from '../ts/index.js'; import * as smartstorage from '../ts/index.js';
let testSmarts3Instance: smarts3.Smarts3; let testSmartStorageInstance: smartstorage.SmartStorage;
let s3Client: S3Client; let s3Client: S3Client;
// Helper to convert stream to string // Helper to convert stream to string
@@ -16,8 +16,8 @@ async function streamToString(stream: Readable): Promise<string> {
}); });
} }
tap.test('should start the S3 server and configure client', async () => { tap.test('should start the storage server and configure client', async () => {
testSmarts3Instance = await smarts3.Smarts3.createAndStart({ testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { server: {
port: 3337, port: 3337,
silent: true, silent: true,
@@ -27,7 +27,7 @@ tap.test('should start the S3 server and configure client', async () => {
}, },
}); });
const descriptor = await testSmarts3Instance.getS3Descriptor(); const descriptor = await testSmartStorageInstance.getStorageDescriptor();
s3Client = new S3Client({ s3Client = new S3Client({
endpoint: `http://${descriptor.endpoint}:${descriptor.port}`, endpoint: `http://${descriptor.endpoint}:${descriptor.port}`,
@@ -101,8 +101,8 @@ tap.test('should delete the bucket', async () => {
expect(response.$metadata.httpStatusCode).toEqual(204); expect(response.$metadata.httpStatusCode).toEqual(204);
}); });
tap.test('should stop the S3 server', async () => { tap.test('should stop the storage server', async () => {
await testSmarts3Instance.stop(); await testSmartStorageInstance.stop();
}); });
export default tap.start(); export default tap.start();

View File

@@ -0,0 +1,335 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import {
S3Client,
CreateBucketCommand,
DeleteBucketCommand,
ListBucketsCommand,
ListObjectsV2Command,
PutObjectCommand,
GetObjectCommand,
DeleteObjectCommand,
CopyObjectCommand,
HeadBucketCommand,
PutBucketPolicyCommand,
GetBucketPolicyCommand,
DeleteBucketPolicyCommand,
} from '@aws-sdk/client-s3';
import * as smartstorage from '../ts/index.js';
let testSmartStorageInstance: smartstorage.SmartStorage;
let authClient: S3Client;
const TEST_PORT = 3347;
const ACCESS_KEY = 'TESTAKID';
const SECRET_KEY = 'TESTSECRETKEY123';
const BUCKET = 'actions-bucket';
const BASE_URL = `http://localhost:${TEST_PORT}`;
async function putPolicy(statements: any[]) {
await authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: JSON.stringify({ Version: '2012-10-17', Statement: statements }),
})
);
}
async function clearPolicy() {
await authClient.send(new DeleteBucketPolicyCommand({ Bucket: BUCKET }));
}
function denyStatement(action: string) {
return {
Sid: `Deny_${action.replace(':', '_')}`,
Effect: 'Deny' as const,
Principal: '*',
Action: action,
Resource: [
`arn:aws:s3:::${BUCKET}`,
`arn:aws:s3:::${BUCKET}/*`,
],
};
}
// ============================
// Server setup
// ============================
tap.test('setup: start server, create bucket, upload object', async () => {
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { port: TEST_PORT, silent: true, region: 'us-east-1' },
storage: { cleanSlate: true },
auth: {
enabled: true,
credentials: [{ accessKeyId: ACCESS_KEY, secretAccessKey: SECRET_KEY }],
},
});
authClient = new S3Client({
endpoint: BASE_URL,
region: 'us-east-1',
credentials: { accessKeyId: ACCESS_KEY, secretAccessKey: SECRET_KEY },
forcePathStyle: true,
});
await authClient.send(new CreateBucketCommand({ Bucket: BUCKET }));
await authClient.send(
new PutObjectCommand({
Bucket: BUCKET,
Key: 'obj.txt',
Body: 'test content for actions',
ContentType: 'text/plain',
})
);
});
// ============================
// Per-action deny enforcement
// ============================
tap.test('Deny s3:ListBucket → authenticated ListObjects fails', async () => {
await putPolicy([denyStatement('s3:ListBucket')]);
await expect(
authClient.send(new ListObjectsV2Command({ Bucket: BUCKET }))
).rejects.toThrow();
await clearPolicy();
});
tap.test('Deny s3:CreateBucket → authenticated CreateBucket on new bucket fails', async () => {
// We need to create a policy on the target bucket, but the target doesn't exist yet.
// Instead, we use a different approach: deny on existing bucket and test HeadBucket works
// but for CreateBucket, use fetch to target a new bucket name with the deny check.
// Actually, CreateBucket has no bucket policy to evaluate against (the bucket doesn't exist yet).
// The deny would need to be on the bucket being created.
// Since the bucket doesn't exist, there's no policy to load — so CreateBucket can't be denied via policy.
// This is expected AWS behavior. Skip this test and note it.
// Verify CreateBucket still works (no policy can deny it since bucket doesn't exist yet)
await authClient.send(new CreateBucketCommand({ Bucket: 'new-test-bucket' }));
await authClient.send(new DeleteBucketCommand({ Bucket: 'new-test-bucket' }));
});
tap.test('Deny s3:DeleteBucket → authenticated DeleteBucket fails', async () => {
await putPolicy([denyStatement('s3:DeleteBucket')]);
await expect(
authClient.send(new DeleteBucketCommand({ Bucket: BUCKET }))
).rejects.toThrow();
await clearPolicy();
});
tap.test('Deny s3:GetObject → authenticated GetObject fails', async () => {
await putPolicy([denyStatement('s3:GetObject')]);
await expect(
authClient.send(new GetObjectCommand({ Bucket: BUCKET, Key: 'obj.txt' }))
).rejects.toThrow();
await clearPolicy();
});
tap.test('Deny s3:PutObject → authenticated PutObject fails', async () => {
await putPolicy([denyStatement('s3:PutObject')]);
await expect(
authClient.send(
new PutObjectCommand({
Bucket: BUCKET,
Key: 'new-obj.txt',
Body: 'should fail',
})
)
).rejects.toThrow();
await clearPolicy();
});
tap.test('Deny s3:DeleteObject → authenticated DeleteObject fails', async () => {
await putPolicy([denyStatement('s3:DeleteObject')]);
await expect(
authClient.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: 'obj.txt' }))
).rejects.toThrow();
await clearPolicy();
});
tap.test('Deny s3:PutObject → authenticated CopyObject fails (maps to s3:PutObject)', async () => {
await putPolicy([denyStatement('s3:PutObject')]);
await expect(
authClient.send(
new CopyObjectCommand({
Bucket: BUCKET,
Key: 'obj-copy.txt',
CopySource: `${BUCKET}/obj.txt`,
})
)
).rejects.toThrow();
await clearPolicy();
});
tap.test('Deny s3:GetBucketPolicy → authenticated GetBucketPolicy fails', async () => {
// First put a policy that denies GetBucketPolicy
// We need to be careful: put the deny policy, then try to get it
await putPolicy([denyStatement('s3:GetBucketPolicy')]);
await expect(
authClient.send(new GetBucketPolicyCommand({ Bucket: BUCKET }))
).rejects.toThrow();
// Clear using direct delete (which isn't denied)
await clearPolicy();
});
tap.test('Deny s3:PutBucketPolicy → authenticated PutBucketPolicy fails (for second policy)', async () => {
// First put a policy that denies PutBucketPolicy
await putPolicy([denyStatement('s3:PutBucketPolicy')]);
// Now try to put another policy — should fail
await expect(
authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: JSON.stringify({
Version: '2012-10-17',
Statement: [
{
Sid: 'SomeOtherPolicy',
Effect: 'Allow',
Principal: '*',
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
],
}),
})
)
).rejects.toThrow();
await clearPolicy();
});
tap.test('Deny s3:DeleteBucketPolicy → authenticated DeleteBucketPolicy fails', async () => {
await putPolicy([denyStatement('s3:DeleteBucketPolicy')]);
await expect(
authClient.send(new DeleteBucketPolicyCommand({ Bucket: BUCKET }))
).rejects.toThrow();
// We need another way to clean up — use fetch with auth to bypass? No, the deny is on all principals.
// Actually, we can't clear the policy via SDK since delete is denied.
// The server still denies it. We need to stop and restart or use a different mechanism.
// For test cleanup, just stop the server at end and it will be wiped with cleanSlate on next start.
});
tap.test('Recovery: remove deny policy → authenticated operations resume working', async () => {
// The previous test left a deny policy on DeleteBucketPolicy.
// But we can work around it by stopping/restarting or if the deny is still in place.
// Actually, we denied s3:DeleteBucketPolicy but NOT s3:PutBucketPolicy.
// So we can overwrite the policy with an empty-ish one, then delete.
await authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: JSON.stringify({
Version: '2012-10-17',
Statement: [
{
Sid: 'AllowAll',
Effect: 'Allow',
Principal: '*',
Action: 's3:*',
Resource: [`arn:aws:s3:::${BUCKET}`, `arn:aws:s3:::${BUCKET}/*`],
},
],
}),
})
);
// Now all operations should work again
const getResp = await authClient.send(
new GetObjectCommand({ Bucket: BUCKET, Key: 'obj.txt' })
);
expect(getResp.$metadata.httpStatusCode).toEqual(200);
const listResp = await authClient.send(
new ListObjectsV2Command({ Bucket: BUCKET })
);
expect(listResp.$metadata.httpStatusCode).toEqual(200);
await clearPolicy();
});
// ============================
// Special cases
// ============================
tap.test('ListAllMyBuckets always requires auth → anonymous fetch to / returns 403', async () => {
const resp = await fetch(`${BASE_URL}/`);
expect(resp.status).toEqual(403);
});
tap.test('Auth disabled mode → anonymous full access works', async () => {
// Start a second server with auth disabled
const noAuthInstance = await smartstorage.SmartStorage.createAndStart({
server: { port: 3348, silent: true, region: 'us-east-1' },
storage: { cleanSlate: true },
auth: { enabled: false, credentials: [] },
});
// Anonymous operations should all work
const listResp = await fetch('http://localhost:3348/');
expect(listResp.status).toEqual(200);
// Create bucket via fetch
const createResp = await fetch('http://localhost:3348/anon-bucket', { method: 'PUT' });
expect(createResp.status).toEqual(200);
// Put object
const putResp = await fetch('http://localhost:3348/anon-bucket/file.txt', {
method: 'PUT',
body: 'hello anon',
});
expect(putResp.status).toEqual(200);
// Get object
const getResp = await fetch('http://localhost:3348/anon-bucket/file.txt');
expect(getResp.status).toEqual(200);
const text = await getResp.text();
expect(text).toEqual('hello anon');
// Delete object
const delObjResp = await fetch('http://localhost:3348/anon-bucket/file.txt', { method: 'DELETE' });
expect(delObjResp.status).toEqual(204);
// Delete bucket
const delBucketResp = await fetch('http://localhost:3348/anon-bucket', { method: 'DELETE' });
expect(delBucketResp.status).toEqual(204);
await noAuthInstance.stop();
});
// ============================
// Teardown
// ============================
tap.test('teardown: clean up and stop server', async () => {
// Clean up any remaining objects
try {
await authClient.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: 'obj.txt' }));
} catch {
// May already be deleted
}
try {
await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET }));
} catch {
// May already be deleted
}
await testSmartStorageInstance.stop();
});
export default tap.start();

View File

@@ -0,0 +1,252 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import {
S3Client,
CreateBucketCommand,
DeleteBucketCommand,
PutBucketPolicyCommand,
GetBucketPolicyCommand,
DeleteBucketPolicyCommand,
} from '@aws-sdk/client-s3';
import * as smartstorage from '../ts/index.js';
let testSmartStorageInstance: smartstorage.SmartStorage;
let authClient: S3Client;
const TEST_PORT = 3345;
const ACCESS_KEY = 'TESTAKID';
const SECRET_KEY = 'TESTSECRETKEY123';
const BUCKET = 'policy-crud-bucket';
function makePolicy(statements: any[]) {
return JSON.stringify({ Version: '2012-10-17', Statement: statements });
}
const validStatement = {
Sid: 'Test1',
Effect: 'Allow',
Principal: '*',
Action: ['s3:GetObject'],
Resource: [`arn:aws:s3:::${BUCKET}/*`],
};
// ============================
// Server setup
// ============================
tap.test('setup: start storage server with auth enabled', async () => {
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { port: TEST_PORT, silent: true, region: 'us-east-1' },
storage: { cleanSlate: true },
auth: {
enabled: true,
credentials: [{ accessKeyId: ACCESS_KEY, secretAccessKey: SECRET_KEY }],
},
});
authClient = new S3Client({
endpoint: `http://localhost:${TEST_PORT}`,
region: 'us-east-1',
credentials: { accessKeyId: ACCESS_KEY, secretAccessKey: SECRET_KEY },
forcePathStyle: true,
});
});
tap.test('setup: create bucket', async () => {
await authClient.send(new CreateBucketCommand({ Bucket: BUCKET }));
});
// ============================
// CRUD tests
// ============================
tap.test('GET policy on bucket with no policy → throws (NoSuchBucketPolicy)', async () => {
await expect(
authClient.send(new GetBucketPolicyCommand({ Bucket: BUCKET }))
).rejects.toThrow();
});
tap.test('PUT valid policy → 204', async () => {
const response = await authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: makePolicy([validStatement]),
})
);
expect(response.$metadata.httpStatusCode).toEqual(204);
});
tap.test('GET policy back → returns matching JSON', async () => {
const response = await authClient.send(
new GetBucketPolicyCommand({ Bucket: BUCKET })
);
expect(response.$metadata.httpStatusCode).toEqual(200);
const policy = JSON.parse(response.Policy!);
expect(policy.Version).toEqual('2012-10-17');
expect(policy.Statement[0].Sid).toEqual('Test1');
expect(policy.Statement[0].Effect).toEqual('Allow');
});
tap.test('PUT updated policy (overwrite) → 204, GET returns new version', async () => {
const updatedStatement = {
Sid: 'Updated',
Effect: 'Deny',
Principal: '*',
Action: ['s3:DeleteObject'],
Resource: [`arn:aws:s3:::${BUCKET}/*`],
};
const putResp = await authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: makePolicy([updatedStatement]),
})
);
expect(putResp.$metadata.httpStatusCode).toEqual(204);
const getResp = await authClient.send(
new GetBucketPolicyCommand({ Bucket: BUCKET })
);
const policy = JSON.parse(getResp.Policy!);
expect(policy.Statement[0].Sid).toEqual('Updated');
expect(policy.Statement[0].Effect).toEqual('Deny');
});
tap.test('DELETE policy → 204', async () => {
const response = await authClient.send(
new DeleteBucketPolicyCommand({ Bucket: BUCKET })
);
expect(response.$metadata.httpStatusCode).toEqual(204);
});
tap.test('DELETE policy again (idempotent) → 204', async () => {
const response = await authClient.send(
new DeleteBucketPolicyCommand({ Bucket: BUCKET })
);
expect(response.$metadata.httpStatusCode).toEqual(204);
});
tap.test('GET policy after delete → throws', async () => {
await expect(
authClient.send(new GetBucketPolicyCommand({ Bucket: BUCKET }))
).rejects.toThrow();
});
tap.test('PUT policy on non-existent bucket → throws (NoSuchBucket)', async () => {
await expect(
authClient.send(
new PutBucketPolicyCommand({
Bucket: 'nonexistent-bucket-xyz',
Policy: makePolicy([validStatement]),
})
)
).rejects.toThrow();
});
tap.test('PUT invalid JSON → throws (MalformedPolicy)', async () => {
await expect(
authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: '{not valid json!!!',
})
)
).rejects.toThrow();
});
tap.test('PUT policy with wrong version → throws (MalformedPolicy)', async () => {
await expect(
authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: JSON.stringify({
Version: '2023-01-01',
Statement: [validStatement],
}),
})
)
).rejects.toThrow();
});
tap.test('PUT policy with empty statements array → throws (MalformedPolicy)', async () => {
await expect(
authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: JSON.stringify({
Version: '2012-10-17',
Statement: [],
}),
})
)
).rejects.toThrow();
});
tap.test('PUT policy with action missing s3: prefix → throws (MalformedPolicy)', async () => {
await expect(
authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: makePolicy([
{
Sid: 'BadAction',
Effect: 'Allow',
Principal: '*',
Action: ['GetObject'],
Resource: [`arn:aws:s3:::${BUCKET}/*`],
},
]),
})
)
).rejects.toThrow();
});
tap.test('PUT policy with resource missing arn:aws:s3::: prefix → throws (MalformedPolicy)', async () => {
await expect(
authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: makePolicy([
{
Sid: 'BadResource',
Effect: 'Allow',
Principal: '*',
Action: ['s3:GetObject'],
Resource: ['policy-crud-bucket/*'],
},
]),
})
)
).rejects.toThrow();
});
tap.test('Bucket deletion cleans up associated policy', async () => {
// PUT a policy
await authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: makePolicy([validStatement]),
})
);
// Delete the bucket
await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET }));
// Re-create the bucket
await authClient.send(new CreateBucketCommand({ Bucket: BUCKET }));
// GET policy should now be gone
await expect(
authClient.send(new GetBucketPolicyCommand({ Bucket: BUCKET }))
).rejects.toThrow();
});
// ============================
// Teardown
// ============================
tap.test('teardown: delete bucket and stop server', async () => {
await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET }));
await testSmartStorageInstance.stop();
});
export default tap.start();

View File

@@ -0,0 +1,517 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import {
S3Client,
CreateBucketCommand,
DeleteBucketCommand,
PutObjectCommand,
GetObjectCommand,
DeleteObjectCommand,
PutBucketPolicyCommand,
DeleteBucketPolicyCommand,
} from '@aws-sdk/client-s3';
import { Readable } from 'stream';
import * as smartstorage from '../ts/index.js';
let testSmartStorageInstance: smartstorage.SmartStorage;
let authClient: S3Client;
const TEST_PORT = 3346;
const ACCESS_KEY = 'TESTAKID';
const SECRET_KEY = 'TESTSECRETKEY123';
const BUCKET = 'eval-bucket';
const BASE_URL = `http://localhost:${TEST_PORT}`;
async function streamToString(stream: Readable): Promise<string> {
const chunks: Buffer[] = [];
return new Promise((resolve, reject) => {
stream.on('data', (chunk) => chunks.push(Buffer.from(chunk)));
stream.on('error', reject);
stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
});
}
async function putPolicy(statements: any[]) {
await authClient.send(
new PutBucketPolicyCommand({
Bucket: BUCKET,
Policy: JSON.stringify({ Version: '2012-10-17', Statement: statements }),
})
);
}
async function clearPolicy() {
await authClient.send(new DeleteBucketPolicyCommand({ Bucket: BUCKET }));
}
// ============================
// Server setup
// ============================
tap.test('setup: start server, create bucket, upload object', async () => {
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { port: TEST_PORT, silent: true, region: 'us-east-1' },
storage: { cleanSlate: true },
auth: {
enabled: true,
credentials: [{ accessKeyId: ACCESS_KEY, secretAccessKey: SECRET_KEY }],
},
});
authClient = new S3Client({
endpoint: BASE_URL,
region: 'us-east-1',
credentials: { accessKeyId: ACCESS_KEY, secretAccessKey: SECRET_KEY },
forcePathStyle: true,
});
await authClient.send(new CreateBucketCommand({ Bucket: BUCKET }));
await authClient.send(
new PutObjectCommand({
Bucket: BUCKET,
Key: 'test-obj.txt',
Body: 'hello policy eval',
ContentType: 'text/plain',
})
);
});
// ============================
// Principal matching
// ============================
tap.test('Principal: "*" → anonymous fetch GET succeeds', async () => {
await putPolicy([
{
Sid: 'PrincipalWildcard',
Effect: 'Allow',
Principal: '*',
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const resp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(resp.status).toEqual(200);
const text = await resp.text();
expect(text).toEqual('hello policy eval');
await clearPolicy();
});
tap.test('Principal: {"AWS": "*"} → anonymous GET fails, authenticated GET succeeds', async () => {
await putPolicy([
{
Sid: 'AwsWildcard',
Effect: 'Allow',
Principal: { AWS: '*' },
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
// Anonymous → no identity → Principal AWS:* doesn't match anonymous → NoOpinion → denied
const anonResp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(anonResp.status).toEqual(403);
// Authenticated → has identity → Principal AWS:* matches → Allow
const authResp = await authClient.send(
new GetObjectCommand({ Bucket: BUCKET, Key: 'test-obj.txt' })
);
expect(authResp.$metadata.httpStatusCode).toEqual(200);
await clearPolicy();
});
tap.test('Principal: {"AWS": "arn:aws:iam::TESTAKID"} → authenticated GET succeeds', async () => {
await putPolicy([
{
Sid: 'SpecificPrincipal',
Effect: 'Allow',
Principal: { AWS: `arn:aws:iam::${ACCESS_KEY}` },
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const resp = await authClient.send(
new GetObjectCommand({ Bucket: BUCKET, Key: 'test-obj.txt' })
);
expect(resp.$metadata.httpStatusCode).toEqual(200);
await clearPolicy();
});
tap.test('Principal: {"AWS": "arn:aws:iam::WRONGKEY"} → authenticated GET still succeeds (default allow)', async () => {
await putPolicy([
{
Sid: 'WrongPrincipal',
Effect: 'Allow',
Principal: { AWS: 'arn:aws:iam::WRONGKEY' },
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
// Principal doesn't match our key → NoOpinion → default allow for authenticated
const resp = await authClient.send(
new GetObjectCommand({ Bucket: BUCKET, Key: 'test-obj.txt' })
);
expect(resp.$metadata.httpStatusCode).toEqual(200);
await clearPolicy();
});
// ============================
// Action matching
// ============================
tap.test('Action: "s3:*" → anonymous can GET and PUT (wildcard matches all)', async () => {
await putPolicy([
{
Sid: 'S3Wildcard',
Effect: 'Allow',
Principal: '*',
Action: 's3:*',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const getResp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(getResp.status).toEqual(200);
const putResp = await fetch(`${BASE_URL}/${BUCKET}/anon-wildcard.txt`, {
method: 'PUT',
body: 'wildcard put',
});
expect(putResp.status).toEqual(200);
// Clean up the object we created
await authClient.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: 'anon-wildcard.txt' }));
await clearPolicy();
});
tap.test('Action: "*" → global wildcard matches all actions', async () => {
await putPolicy([
{
Sid: 'GlobalWildcard',
Effect: 'Allow',
Principal: '*',
Action: '*',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const getResp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(getResp.status).toEqual(200);
const putResp = await fetch(`${BASE_URL}/${BUCKET}/anon-global.txt`, {
method: 'PUT',
body: 'global wildcard',
});
expect(putResp.status).toEqual(200);
await authClient.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: 'anon-global.txt' }));
await clearPolicy();
});
tap.test('Action: "s3:Get*" → anonymous can GET but not PUT (prefix wildcard)', async () => {
await putPolicy([
{
Sid: 'PrefixWildcard',
Effect: 'Allow',
Principal: '*',
Action: 's3:Get*',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const getResp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(getResp.status).toEqual(200);
const putResp = await fetch(`${BASE_URL}/${BUCKET}/anon-prefix.txt`, {
method: 'PUT',
body: 'should fail',
});
expect(putResp.status).toEqual(403);
await clearPolicy();
});
tap.test('Action: ["s3:GetObject", "s3:PutObject"] → anonymous can GET and PUT but not DELETE', async () => {
await putPolicy([
{
Sid: 'MultiAction',
Effect: 'Allow',
Principal: '*',
Action: ['s3:GetObject', 's3:PutObject'],
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const getResp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(getResp.status).toEqual(200);
const putResp = await fetch(`${BASE_URL}/${BUCKET}/anon-multi.txt`, {
method: 'PUT',
body: 'multi action',
});
expect(putResp.status).toEqual(200);
const delResp = await fetch(`${BASE_URL}/${BUCKET}/anon-multi.txt`, {
method: 'DELETE',
});
expect(delResp.status).toEqual(403);
// Clean up
await authClient.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: 'anon-multi.txt' }));
await clearPolicy();
});
// ============================
// Resource ARN matching
// ============================
tap.test('Resource: "arn:aws:s3:::eval-bucket/*" → anonymous GET of object succeeds', async () => {
await putPolicy([
{
Sid: 'ResourceWildcard',
Effect: 'Allow',
Principal: '*',
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const resp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(resp.status).toEqual(200);
await clearPolicy();
});
tap.test('Resource: exact key → anonymous GET of that key succeeds, other key fails', async () => {
await putPolicy([
{
Sid: 'ExactResource',
Effect: 'Allow',
Principal: '*',
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/test-obj.txt`,
},
]);
const goodResp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(goodResp.status).toEqual(200);
// Other key → resource doesn't match → NoOpinion → denied for anonymous
const badResp = await fetch(`${BASE_URL}/${BUCKET}/nonexistent.txt`);
expect(badResp.status).toEqual(403);
await clearPolicy();
});
tap.test('Resource: wrong bucket ARN → NoOpinion → anonymous GET denied', async () => {
await putPolicy([
{
Sid: 'WrongBucket',
Effect: 'Allow',
Principal: '*',
Action: 's3:GetObject',
Resource: 'arn:aws:s3:::other-bucket/*',
},
]);
const resp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(resp.status).toEqual(403);
await clearPolicy();
});
tap.test('Resource: "*" → matches everything, anonymous GET succeeds', async () => {
await putPolicy([
{
Sid: 'StarResource',
Effect: 'Allow',
Principal: '*',
Action: 's3:GetObject',
Resource: '*',
},
]);
const resp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(resp.status).toEqual(200);
await clearPolicy();
});
// ============================
// Deny-over-Allow priority
// ============================
tap.test('Allow + Deny same action → anonymous GET denied', async () => {
await putPolicy([
{
Sid: 'AllowGet',
Effect: 'Allow',
Principal: '*',
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
{
Sid: 'DenyGet',
Effect: 'Deny',
Principal: '*',
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const resp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(resp.status).toEqual(403);
await clearPolicy();
});
tap.test('Allow s3:* + Deny s3:DeleteObject → anonymous GET succeeds, DELETE denied', async () => {
await putPolicy([
{
Sid: 'AllowAll',
Effect: 'Allow',
Principal: '*',
Action: 's3:*',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
{
Sid: 'DenyDelete',
Effect: 'Deny',
Principal: '*',
Action: 's3:DeleteObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const getResp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(getResp.status).toEqual(200);
const delResp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`, { method: 'DELETE' });
expect(delResp.status).toEqual(403);
await clearPolicy();
});
tap.test('Statement order does not matter: Deny first, Allow second → still denied', async () => {
await putPolicy([
{
Sid: 'DenyFirst',
Effect: 'Deny',
Principal: '*',
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
{
Sid: 'AllowSecond',
Effect: 'Allow',
Principal: '*',
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const resp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(resp.status).toEqual(403);
await clearPolicy();
});
// ============================
// NoOpinion fallback
// ============================
tap.test('NoOpinion: policy allows PutObject only → authenticated GET falls through (default allow)', async () => {
await putPolicy([
{
Sid: 'AllowPutOnly',
Effect: 'Allow',
Principal: '*',
Action: 's3:PutObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
// Authenticated → NoOpinion → default allow
const resp = await authClient.send(
new GetObjectCommand({ Bucket: BUCKET, Key: 'test-obj.txt' })
);
expect(resp.$metadata.httpStatusCode).toEqual(200);
await clearPolicy();
});
tap.test('NoOpinion: same policy → anonymous GET falls through → default deny (403)', async () => {
await putPolicy([
{
Sid: 'AllowPutOnly',
Effect: 'Allow',
Principal: '*',
Action: 's3:PutObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
// Anonymous → NoOpinion for GetObject → default deny
const resp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`);
expect(resp.status).toEqual(403);
await clearPolicy();
});
// ============================
// IAM action mapping
// ============================
tap.test('Policy allows s3:GetObject → anonymous HEAD object succeeds (HeadObject maps to s3:GetObject)', async () => {
await putPolicy([
{
Sid: 'AllowGet',
Effect: 'Allow',
Principal: '*',
Action: 's3:GetObject',
Resource: `arn:aws:s3:::${BUCKET}/*`,
},
]);
const resp = await fetch(`${BASE_URL}/${BUCKET}/test-obj.txt`, { method: 'HEAD' });
expect(resp.status).toEqual(200);
await clearPolicy();
});
tap.test('Policy allows s3:ListBucket → anonymous HEAD bucket succeeds', async () => {
await putPolicy([
{
Sid: 'AllowList',
Effect: 'Allow',
Principal: '*',
Action: 's3:ListBucket',
Resource: `arn:aws:s3:::${BUCKET}`,
},
]);
const resp = await fetch(`${BASE_URL}/${BUCKET}`, { method: 'HEAD' });
expect(resp.status).toEqual(200);
await clearPolicy();
});
tap.test('Policy allows s3:ListBucket → anonymous GET bucket (list objects) succeeds', async () => {
await putPolicy([
{
Sid: 'AllowList',
Effect: 'Allow',
Principal: '*',
Action: 's3:ListBucket',
Resource: `arn:aws:s3:::${BUCKET}`,
},
]);
const resp = await fetch(`${BASE_URL}/${BUCKET}`);
expect(resp.status).toEqual(200);
const text = await resp.text();
expect(text).toInclude('ListBucketResult');
await clearPolicy();
});
// ============================
// Teardown
// ============================
tap.test('teardown: clean up and stop server', async () => {
await authClient.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: 'test-obj.txt' }));
await authClient.send(new DeleteBucketCommand({ Bucket: BUCKET }));
await testSmartStorageInstance.stop();
});
export default tap.start();

View File

@@ -1,12 +1,12 @@
import { expect, tap } from '@git.zone/tstest/tapbundle'; import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
import * as smarts3 from '../ts/index.js'; import * as smartstorage from '../ts/index.js';
let testSmarts3Instance: smarts3.Smarts3; let testSmartStorageInstance: smartstorage.SmartStorage;
tap.test('should create a smarts3 instance and run it', async (toolsArg) => { tap.test('should create a smartstorage instance and run it', async (toolsArg) => {
testSmarts3Instance = await smarts3.Smarts3.createAndStart({ testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
server: { server: {
port: 3333, port: 3333,
}, },
@@ -20,7 +20,7 @@ tap.test('should create a smarts3 instance and run it', async (toolsArg) => {
tap.test('should be able to access buckets', async () => { tap.test('should be able to access buckets', async () => {
const smartbucketInstance = new plugins.smartbucket.SmartBucket( const smartbucketInstance = new plugins.smartbucket.SmartBucket(
await testSmarts3Instance.getS3Descriptor(), await testSmartStorageInstance.getStorageDescriptor(),
); );
const bucket = await smartbucketInstance.createBucket('testbucket'); const bucket = await smartbucketInstance.createBucket('testbucket');
const baseDirectory = await bucket.getBaseDirectory(); const baseDirectory = await bucket.getBaseDirectory();
@@ -31,7 +31,7 @@ tap.test('should be able to access buckets', async () => {
}); });
tap.test('should stop the instance', async () => { tap.test('should stop the instance', async () => {
await testSmarts3Instance.stop(); await testSmartStorageInstance.stop();
}); });
tap.start(); tap.start();

View File

@@ -2,7 +2,7 @@
* autocreated commitinfo by @push.rocks/commitinfo * autocreated commitinfo by @push.rocks/commitinfo
*/ */
export const commitinfo = { export const commitinfo = {
name: '@push.rocks/smarts3', name: '@push.rocks/smartstorage',
version: '5.1.1', version: '6.3.0',
description: 'A Node.js TypeScript package to create a local S3 endpoint for simulating AWS S3 operations using mapped local directories for development and testing purposes.' description: 'A Node.js TypeScript package to create a local S3-compatible storage server using mapped local directories for development and testing purposes.'
} }

View File

@@ -58,6 +58,7 @@ export interface IServerConfig {
port?: number; port?: number;
address?: string; address?: string;
silent?: boolean; silent?: boolean;
region?: string;
} }
/** /**
@@ -69,9 +70,39 @@ export interface IStorageConfig {
} }
/** /**
* Complete smarts3 configuration * Erasure coding configuration
*/ */
export interface ISmarts3Config { export interface IErasureConfig {
dataShards?: number;
parityShards?: number;
chunkSizeBytes?: number;
}
/**
* Drive configuration for multi-drive support
*/
export interface IDriveConfig {
paths: string[];
}
/**
* Cluster configuration for distributed mode
*/
export interface IClusterConfig {
enabled: boolean;
nodeId?: string;
quicPort?: number;
seedNodes?: string[];
erasure?: IErasureConfig;
drives?: IDriveConfig;
heartbeatIntervalMs?: number;
heartbeatTimeoutMs?: number;
}
/**
* Complete smartstorage configuration
*/
export interface ISmartStorageConfig {
server?: IServerConfig; server?: IServerConfig;
storage?: IStorageConfig; storage?: IStorageConfig;
auth?: IAuthConfig; auth?: IAuthConfig;
@@ -79,16 +110,18 @@ export interface ISmarts3Config {
logging?: ILoggingConfig; logging?: ILoggingConfig;
limits?: ILimitsConfig; limits?: ILimitsConfig;
multipart?: IMultipartConfig; multipart?: IMultipartConfig;
cluster?: IClusterConfig;
} }
/** /**
* Default configuration values * Default configuration values
*/ */
const DEFAULT_CONFIG: ISmarts3Config = { const DEFAULT_CONFIG: ISmartStorageConfig = {
server: { server: {
port: 3000, port: 3000,
address: '0.0.0.0', address: '0.0.0.0',
silent: false, silent: false,
region: 'us-east-1',
}, },
storage: { storage: {
directory: paths.bucketsDir, directory: paths.bucketsDir,
@@ -98,8 +131,8 @@ const DEFAULT_CONFIG: ISmarts3Config = {
enabled: false, enabled: false,
credentials: [ credentials: [
{ {
accessKeyId: 'S3RVER', accessKeyId: 'STORAGE',
secretAccessKey: 'S3RVER', secretAccessKey: 'STORAGE',
}, },
], ],
}, },
@@ -131,7 +164,7 @@ const DEFAULT_CONFIG: ISmarts3Config = {
/** /**
* Merge user config with defaults (deep merge) * Merge user config with defaults (deep merge)
*/ */
function mergeConfig(userConfig: ISmarts3Config): Required<ISmarts3Config> { function mergeConfig(userConfig: ISmartStorageConfig): Required<ISmartStorageConfig> {
return { return {
server: { server: {
...DEFAULT_CONFIG.server!, ...DEFAULT_CONFIG.server!,
@@ -161,41 +194,40 @@ function mergeConfig(userConfig: ISmarts3Config): Required<ISmarts3Config> {
...DEFAULT_CONFIG.multipart!, ...DEFAULT_CONFIG.multipart!,
...(userConfig.multipart || {}), ...(userConfig.multipart || {}),
}, },
}; ...(userConfig.cluster ? { cluster: userConfig.cluster } : {}),
} as Required<ISmartStorageConfig>;
} }
/** /**
* IPC command type map for RustBridge * IPC command type map for RustBridge
*/ */
type TRustS3Commands = { type TRustStorageCommands = {
start: { params: { config: Required<ISmarts3Config> }; result: {} }; start: { params: { config: Required<ISmartStorageConfig> }; result: {} };
stop: { params: {}; result: {} }; stop: { params: {}; result: {} };
createBucket: { params: { name: string }; result: {} }; createBucket: { params: { name: string }; result: {} };
}; };
/** /**
* Main Smarts3 class - production-ready S3-compatible server * Main SmartStorage class - production-ready S3-compatible storage server
*/ */
export class Smarts3 { export class SmartStorage {
// STATIC // STATIC
public static async createAndStart(configArg: ISmarts3Config = {}) { public static async createAndStart(configArg: ISmartStorageConfig = {}) {
const smartS3Instance = new Smarts3(configArg); const smartStorageInstance = new SmartStorage(configArg);
await smartS3Instance.start(); await smartStorageInstance.start();
return smartS3Instance; return smartStorageInstance;
} }
// INSTANCE // INSTANCE
public config: Required<ISmarts3Config>; public config: Required<ISmartStorageConfig>;
private bridge: InstanceType<typeof plugins.RustBridge<TRustS3Commands>>; private bridge: InstanceType<typeof plugins.RustBridge<TRustStorageCommands>>;
constructor(configArg: ISmarts3Config = {}) { constructor(configArg: ISmartStorageConfig = {}) {
this.config = mergeConfig(configArg); this.config = mergeConfig(configArg);
this.bridge = new plugins.RustBridge<TRustS3Commands>({ this.bridge = new plugins.RustBridge<TRustStorageCommands>({
binaryName: 'rusts3', binaryName: 'ruststorage',
localPaths: [ localPaths: [
plugins.path.join(paths.packageDir, 'dist_rust', 'rusts3'), plugins.path.join(paths.packageDir, 'dist_rust', 'ruststorage'),
plugins.path.join(paths.packageDir, 'rust', 'target', 'release', 'rusts3'),
plugins.path.join(paths.packageDir, 'rust', 'target', 'debug', 'rusts3'),
], ],
readyTimeoutMs: 30000, readyTimeoutMs: 30000,
requestTimeoutMs: 300000, requestTimeoutMs: 300000,
@@ -205,21 +237,21 @@ export class Smarts3 {
public async start() { public async start() {
const spawned = await this.bridge.spawn(); const spawned = await this.bridge.spawn();
if (!spawned) { if (!spawned) {
throw new Error('Failed to spawn rusts3 binary. Make sure it is compiled (pnpm build).'); throw new Error('Failed to spawn ruststorage binary. Make sure it is compiled (pnpm build).');
} }
await this.bridge.sendCommand('start', { config: this.config }); await this.bridge.sendCommand('start', { config: this.config });
if (!this.config.server.silent) { if (!this.config.server.silent) {
console.log('s3 server is running'); console.log('storage server is running');
} }
} }
public async getS3Descriptor( public async getStorageDescriptor(
optionsArg?: Partial<plugins.tsclass.storage.IS3Descriptor>, optionsArg?: Partial<plugins.tsclass.storage.IS3Descriptor>,
): Promise<plugins.tsclass.storage.IS3Descriptor> { ): Promise<plugins.tsclass.storage.IS3Descriptor> {
const cred = this.config.auth.credentials[0] || { const cred = this.config.auth.credentials[0] || {
accessKeyId: 'S3RVER', accessKeyId: 'STORAGE',
secretAccessKey: 'S3RVER', secretAccessKey: 'STORAGE',
}; };
const descriptor: plugins.tsclass.storage.IS3Descriptor = { const descriptor: plugins.tsclass.storage.IS3Descriptor = {