35 Commits

Author SHA1 Message Date
jkunz 19f18ef480 v2.6.2 2026-04-05 12:42:54 +00:00
jkunz 6148b28cba fix(readme): align architecture diagram formatting in the documentation 2026-04-05 12:42:54 +00:00
jkunz 012632111e v2.6.1 2026-04-05 12:41:39 +00:00
jkunz b9a59a8649 fix(readme): correct ASCII diagram spacing in architecture overview 2026-04-05 12:41:39 +00:00
jkunz f8a8c9fdff v2.6.0 2026-04-05 12:38:46 +00:00
jkunz d37b444dd5 feat(readme): document index enforcement, storage reliability, and data integrity validation features 2026-04-05 12:38:46 +00:00
jkunz 02ad9a29a7 v2.5.9 2026-04-05 09:54:56 +00:00
jkunz 24c504518d fix(rustdb-storage): run collection compaction during file storage initialization after crashes 2026-04-05 09:54:56 +00:00
jkunz 92f07ef3d7 v2.5.8 2026-04-05 09:48:10 +00:00
jkunz 22e010c554 fix(rustdb-storage): detect stale hint files using data file size metadata and add restart persistence regression tests 2026-04-05 09:48:10 +00:00
jkunz 8ebc1bb9e1 v2.5.7 2026-04-05 03:54:13 +00:00
jkunz 3fc21dcd99 fix(repo): no changes to commit 2026-04-05 03:54:13 +00:00
jkunz ad5e0e8a72 chore: gitignore generated bundled.ts to fix release cycle 2026-04-05 03:54:05 +00:00
jkunz c384df20ce v2.5.6 2026-04-05 03:53:29 +00:00
jkunz 4e944f3d05 fix(repo): no changes to commit 2026-04-05 03:53:29 +00:00
jkunz e0455daa2e chore: rebuild bundled debug server with current version 2026-04-05 03:53:22 +00:00
jkunz f3f1afe9af v2.5.5 2026-04-05 03:52:29 +00:00
jkunz 94dc9cfc3f fix(repo): no changes to commit 2026-04-05 03:52:29 +00:00
jkunz a9c0ced1ca v2.5.4 2026-04-05 03:52:23 +00:00
jkunz c8626a9afd fix(package): bump package version to 2.5.3 2026-04-05 03:52:23 +00:00
jkunz 55a1f66e57 chore: update bundled debug server output 2026-04-05 03:52:21 +00:00
jkunz 5b5f35821f v2.5.3 2026-04-05 03:51:58 +00:00
jkunz e8161e6417 fix(rustdb-commands): restore persisted index initialization before writes to enforce unique constraints after restart 2026-04-05 03:51:58 +00:00
jkunz 1a10c32b12 v2.5.2 2026-04-05 03:26:52 +00:00
jkunz cb8cb87d9f fix(rustdb-indexes): persist created indexes and restore them on server startup 2026-04-05 03:26:52 +00:00
jkunz 96117d54b9 v2.5.1 2026-04-05 02:48:00 +00:00
jkunz 53f58e45c3 fix(docs): update project documentation 2026-04-05 02:48:00 +00:00
jkunz 34d708be7e v2.5.0 2026-04-05 02:46:05 +00:00
jkunz 418e8dc052 feat(storage): add offline data validation and strengthen storage/index integrity checks 2026-04-05 02:46:05 +00:00
jkunz b8567ebe08 v2.4.1 2026-04-05 01:31:44 +00:00
jkunz 827bfa6370 fix(package): update package metadata 2026-04-05 01:31:44 +00:00
jkunz ceba64e34a v2.4.0 2026-04-05 01:30:28 +00:00
jkunz 8646d58f06 feat(rustdb): add restore and periodic persistence support for in-memory storage 2026-04-05 01:30:28 +00:00
jkunz 8ce6ff11c3 v2.3.1 2026-04-04 20:15:58 +00:00
jkunz 5c7aaebaba fix(package): update package metadata 2026-04-04 20:15:58 +00:00
25 changed files with 1763 additions and 117 deletions
+3
View File
@@ -13,5 +13,8 @@ rust/target/
package-lock.json
yarn.lock
# generated bundle (rebuilt on every build, embeds version)
ts_debugserver/bundled.ts
# playwright
.playwright-mcp/
+95
View File
@@ -1,5 +1,100 @@
# Changelog
## 2026-04-05 - 2.6.2 - fix(readme)
align architecture diagram formatting in the documentation
- Adjusts spacing and box alignment in the README architecture diagram for clearer presentation.
## 2026-04-05 - 2.6.1 - fix(readme)
correct ASCII diagram spacing in architecture overview
- Adjusts alignment in the README architecture diagram for clearer visual formatting.
## 2026-04-05 - 2.6.0 - feat(readme)
document index enforcement, storage reliability, and data integrity validation features
- Add documentation for engine-level unique index enforcement and duplicate key behavior
- Describe storage engine reliability features including WAL, CRC32 checks, compaction, hint file staleness detection, and stale socket cleanup
- Add usage documentation for the offline data integrity validation CLI
## 2026-04-05 - 2.5.9 - fix(rustdb-storage)
run collection compaction during file storage initialization after crashes
- Triggers compaction for all loaded collections before starting the periodic background compaction task.
- Helps clean up dead weight left from before a crash during startup.
## 2026-04-05 - 2.5.8 - fix(rustdb-storage)
detect stale hint files using data file size metadata and add restart persistence regression tests
- Store the current data.rdb size in hint file headers and validate it on load to rebuild KeyDir when hints are stale or written in the old format.
- Persist updated hint metadata after compaction and shutdown to avoid missing appended tombstones after restart.
- Add validation reporting for stale hint files based on recorded versus actual data file size.
- Add regression tests covering delete persistence across restarts, missing hint recovery, stale socket cleanup, and unique index enforcement persistence.
## 2026-04-05 - 2.5.7 - fix(repo)
no changes to commit
## 2026-04-05 - 2.5.6 - fix(repo)
no changes to commit
## 2026-04-05 - 2.5.5 - fix(repo)
no changes to commit
## 2026-04-05 - 2.5.4 - fix(package)
bump package version to 2.5.3
- Updates the package metadata version by one patch release.
## 2026-04-05 - 2.5.3 - fix(rustdb-commands)
restore persisted index initialization before writes to enforce unique constraints after restart
- load stored index specifications from storage when creating command context index engines
- rebuild index data from existing documents so custom indexes are active before insert, update, and upsert operations
- add @push.rocks/smartdata as a runtime dependency
## 2026-04-05 - 2.5.2 - fix(rustdb-indexes)
persist created indexes and restore them on server startup
- Save index specifications to storage when indexes are created.
- Remove persisted index metadata when indexes are dropped by name, key spec, or wildcard.
- Rebuild in-memory index engines from stored definitions and existing documents during startup.
## 2026-04-05 - 2.5.1 - fix(docs)
update project documentation
- Modifies a single documentation-related file with a minimal text change.
- No source code, API, or package metadata changes are indicated in the diff summary.
## 2026-04-05 - 2.5.0 - feat(storage)
add offline data validation and strengthen storage/index integrity checks
- adds a `--validate-data <PATH>` CLI mode to run offline integrity checks on storage directories
- introduces storage validation reporting for headers, checksums, duplicate ids, tombstones, and stale or orphaned hint entries
- pre-checks unique index constraints before insert, update, upsert, and findAndModify writes to prevent duplicate-key violations before storage changes
- validates hint files against data files during collection load and rebuilds indexes from data when hints are stale
- ensures new data files always receive a SMARTDB header and persists fresh hint files after successful compaction
- cleans up stale local Unix socket files before starting the TypeScript local server
## 2026-04-05 - 2.4.1 - fix(package)
update package metadata
- Adjusts package manifest content with a minimal one-line change.
## 2026-04-05 - 2.4.0 - feat(rustdb)
add restore and periodic persistence support for in-memory storage
- Restore previously persisted state during startup when a persist path is configured.
- Spawn a background task to periodically persist in-memory data using the configured interval.
- Warn when running purely in-memory without durable persistence configured.
## 2026-04-04 - 2.3.1 - fix(package)
update package metadata
- Adjusts a single package-level metadata entry in the project configuration.
## 2026-04-04 - 2.3.0 - feat(test)
add integration coverage for file storage, compaction, migration, and LocalSmartDb workflows
+2 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@push.rocks/smartdb",
"version": "2.3.0",
"version": "2.6.2",
"private": false,
"description": "A MongoDB-compatible embedded database server with wire protocol support, backed by a high-performance Rust engine.",
"exports": {
@@ -29,6 +29,7 @@
"dependencies": {
"@api.global/typedserver": "^8.0.0",
"@design.estate/dees-element": "^2.0.0",
"@push.rocks/smartdata": "7.1.5",
"@push.rocks/smartrust": "^1.3.2",
"bson": "^7.2.0"
},
+7 -4
View File
@@ -14,6 +14,9 @@ importers:
'@design.estate/dees-element':
specifier: ^2.0.0
version: 2.2.3
'@push.rocks/smartdata':
specifier: 7.1.5
version: 7.1.5(socks@2.8.7)
'@push.rocks/smartrust':
specifier: ^1.3.2
version: 1.3.2
@@ -1026,8 +1029,8 @@ packages:
'@push.rocks/smartcrypto@2.0.4':
resolution: {integrity: sha512-1+/5bsjyataf5uUkUNnnVXGRAt+gHVk1KDzozjTqgqJxHvQk1d9fVDohL6CxUhUucTPtu5VR5xNBiV8YCDuGyw==}
'@push.rocks/smartdata@7.1.3':
resolution: {integrity: sha512-7vQJ9pdRk450yn2m9tmGPdSRlQVmxFPZjHD4sGYsfqCQPg+GLFusu+H16zpf+jKzAq4F2ZBMPaYymJHXvXiVcw==}
'@push.rocks/smartdata@7.1.5':
resolution: {integrity: sha512-7x7VedEg6RocWndqUPuTbY2Bh85Q/x0LOVHL4o/NVXyh3IGNtiVQ8ple4WR0qYqlHRAojX4eDSBPMiYzIasqAg==}
'@push.rocks/smartdelay@3.0.5':
resolution: {integrity: sha512-mUuI7kj2f7ztjpic96FvRIlf2RsKBa5arw81AHNsndbxO6asRcxuWL8dTVxouEIK8YsBUlj0AsrCkHhMbLQdHw==}
@@ -5665,7 +5668,7 @@ snapshots:
'@types/node-forge': 1.3.14
node-forge: 1.4.0
'@push.rocks/smartdata@7.1.3(socks@2.8.7)':
'@push.rocks/smartdata@7.1.5(socks@2.8.7)':
dependencies:
'@push.rocks/lik': 6.4.0
'@push.rocks/smartdelay': 3.0.5
@@ -5899,7 +5902,7 @@ snapshots:
'@push.rocks/smartmongo@5.1.1(socks@2.8.7)':
dependencies:
'@push.rocks/mongodump': 1.1.0(socks@2.8.7)
'@push.rocks/smartdata': 7.1.3(socks@2.8.7)
'@push.rocks/smartdata': 7.1.5(socks@2.8.7)
'@push.rocks/smartfs': 1.5.0
'@push.rocks/smartpath': 6.0.0
'@push.rocks/smartpromise': 4.2.3
+60 -25
View File
@@ -44,38 +44,38 @@ SmartDB uses a **sidecar binary** pattern — TypeScript handles lifecycle, Rust
```
┌──────────────────────────────────────────────────────────────┐
│ Your Application
│ (TypeScript / Node.js)
│ ┌───────────────── ┌───────────────────────────┐
│ │ SmartdbServer │────▶│ RustDbBridge (IPC) │
│ │ or LocalSmartDb │ │ @push.rocks/smartrust │
│ └───────────────── └───────────┬───────────────┘
└─────────────────────────────────────────────────────────────┘
│ spawn + JSON IPC
│ Your Application │
│ (TypeScript / Node.js) │
│ ┌──────────────────┐ ┌───────────────────────────┐ │
│ │ SmartdbServer │────▶│ RustDbBridge (IPC) │ │
│ │ or LocalSmartDb │ │ @push.rocks/smartrust │ │
│ └──────────────────┘ └───────────┬───────────────┘ │
└─────────────────────────────────────────────────────────────┘
│ spawn + JSON IPC
┌──────────────────────────────────────────────────────────────┐
│ rustdb binary 🦀
│ rustdb binary
│ │
│ ┌──────────────┐ ┌──────────────┐ ┌───────────────┐ │
│ │ Wire Protocol│→ │Command Router│→ │ Handlers │ │
│ │ (OP_MSG) │ │ (40+ cmds) │ │ Find,Insert.. │ │
│ └──────────────┘ └──────────────┘ └───────┬───────┘ │
│ │
│ ┌─────────┐ ┌────────┐ ┌───────────┐ ┌──────┴──────┐ │
│ │ Query │ │ Update │ │Aggregation│ │ Index │ │
│ │ Matcher │ │ Engine │ │ Engine │ │ Engine │ │
│ └─────────┘ └────────┘ └───────────┘ └─────────────┘ │
│ ┌──────────────┐ ┌──────────────┐ ┌───────────────┐
│ │ Wire Protocol│→ │Command Router│→ │ Handlers │
│ │ (OP_MSG) │ │ (40+ cmds) │ │ Find,Insert.. │
│ └──────────────┘ └──────────────┘ └───────┬───────┘
│ │
│ ┌─────────┐ ┌────────┐ ┌───────────┐ ┌──────┴──────┐
│ │ Query │ │ Update │ │Aggregation│ │ Index │
│ │ Matcher │ │ Engine │ │ Engine │ │ Engine │
│ └─────────┘ └────────┘ └───────────┘ └─────────────┘
│ │
│ ┌──────────────────┐ ┌──────────────────┐ ┌──────────┐ │
│ │ MemoryStorage │ │ FileStorage │ │ OpLog │ │
│ └──────────────────┘ └──────────────────┘ └──────────┘ │
│ ┌──────────────────┐ ┌──────────────────┐ ┌──────────┐
│ │ MemoryStorage │ │ FileStorage │ │ OpLog │
│ └──────────────────┘ └──────────────────┘ └──────────┘
└──────────────────────────────────────────────────────────────┘
│ TCP / Unix Socket (wire protocol)
┌─────────────┴────────────────────────────────────────────────┐
│ MongoClient (mongodb npm driver)
│ Connects directly to Rust binary
│ MongoClient (mongodb npm driver) │
│ Connects directly to Rust binary │
└──────────────────────────────────────────────────────────────┘
```
@@ -429,6 +429,8 @@ await collection.dropIndex('email_1');
await collection.dropIndexes(); // drop all except _id
```
> 🛡️ **Unique indexes are enforced at the engine level.** Duplicate values are rejected with a `DuplicateKey` error (code 11000) *before* the document is written to disk — on `insertOne`, `updateOne`, `findAndModify`, and upserts. Index definitions are persisted to `indexes.json` and automatically restored on restart.
### Database & Admin
```typescript
@@ -497,6 +499,39 @@ The Rust engine is organized as a Cargo workspace with 8 focused crates:
Cross-compiled for `linux_amd64` and `linux_arm64` via [@git.zone/tsrust](https://www.npmjs.com/package/@git.zone/tsrust).
### Storage Engine Reliability 🔒
The Bitcask-style file storage engine includes several reliability features:
- **Write-ahead log (WAL)** — every write is logged before being applied, with crash recovery on restart
- **CRC32 checksums** — every record is integrity-checked on read
- **Automatic compaction** — dead records are reclaimed when they exceed 50% of file size, runs on startup and after every write
- **Hint file staleness detection** — the hint file records the data file size at write time; if data.rdb changed since (e.g. crash after a delete), the engine falls back to a full scan to ensure tombstones are not lost
- **Stale socket cleanup** — orphaned `/tmp/smartdb-*.sock` files from crashed instances are automatically cleaned up on startup
### Data Integrity CLI 🔍
The Rust binary includes an offline integrity checker:
```bash
# Check all collections in a data directory
./dist_rust/rustdb_linux_amd64 --validate-data /path/to/data
# Output:
# === SmartDB Data Integrity Report ===
#
# Database: mydb
# Collection: users
# Header: OK
# Records: 1,234 (1,200 live, 34 tombstones)
# Data size: 2.1 MB
# Duplicates: 0
# CRC errors: 0
# Hint file: OK
```
Checks file headers, record CRC32 checksums, duplicate `_id` entries, and hint file consistency. Exit code 1 if any errors are found.
---
## Testing Example
@@ -541,7 +576,7 @@ export default tap.start();
## License and Legal Information
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [LICENSE](./LICENSE) file.
This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [license](./license) file.
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
+63 -2
View File
@@ -1,8 +1,8 @@
use std::sync::Arc;
use bson::Document;
use bson::{Bson, Document};
use dashmap::DashMap;
use rustdb_index::IndexEngine;
use rustdb_index::{IndexEngine, IndexOptions};
use rustdb_storage::{OpLog, StorageAdapter};
use rustdb_txn::{SessionEngine, TransactionEngine};
@@ -24,6 +24,67 @@ pub struct CommandContext {
pub oplog: Arc<OpLog>,
}
impl CommandContext {
/// Get or lazily initialize an IndexEngine for a namespace.
///
/// If no IndexEngine exists yet for this namespace, loads persisted index
/// specs from `indexes.json` via the storage adapter, creates the engine
/// with those specs, and rebuilds index data from existing documents.
/// This ensures unique indexes are enforced even on the very first write
/// after a restart.
pub async fn get_or_init_index_engine(&self, db: &str, coll: &str) -> dashmap::mapref::one::RefMut<'_, String, IndexEngine> {
let ns_key = format!("{}.{}", db, coll);
// Fast path: engine already exists.
if self.indexes.contains_key(&ns_key) {
return self.indexes.entry(ns_key).or_insert_with(IndexEngine::new);
}
// Slow path: load from persisted specs.
let mut engine = IndexEngine::new();
let mut has_custom = false;
if let Ok(specs) = self.storage.get_indexes(db, coll).await {
for spec in &specs {
let name = spec.get_str("name").unwrap_or("").to_string();
if name == "_id_" || name.is_empty() {
continue;
}
let key = match spec.get("key") {
Some(Bson::Document(k)) => k.clone(),
_ => continue,
};
let unique = matches!(spec.get("unique"), Some(Bson::Boolean(true)));
let sparse = matches!(spec.get("sparse"), Some(Bson::Boolean(true)));
let expire_after_seconds = match spec.get("expireAfterSeconds") {
Some(Bson::Int32(n)) => Some(*n as u64),
Some(Bson::Int64(n)) => Some(*n as u64),
_ => None,
};
let options = IndexOptions {
name: Some(name),
unique,
sparse,
expire_after_seconds,
};
let _ = engine.create_index(key, options);
has_custom = true;
}
}
if has_custom {
// Rebuild index data from existing documents.
if let Ok(docs) = self.storage.find_all(db, coll).await {
if !docs.is_empty() {
engine.rebuild_from_documents(&docs);
}
}
}
self.indexes.entry(ns_key).or_insert(engine)
}
}
/// State of an open cursor from a find or aggregate command.
pub struct CursorState {
/// Documents remaining to be returned.
@@ -101,7 +101,15 @@ async fn handle_create_indexes(
expire_after_seconds,
};
// Create the index.
let options_for_persist = IndexOptions {
name: options.name.clone(),
unique: options.unique,
sparse: options.sparse,
expire_after_seconds: options.expire_after_seconds,
};
let key_for_persist = key.clone();
// Create the index in-memory.
let mut engine = ctx
.indexes
.entry(ns_key.clone())
@@ -110,6 +118,22 @@ async fn handle_create_indexes(
match engine.create_index(key, options) {
Ok(index_name) => {
debug!(index_name = %index_name, "Created index");
// Persist index spec to disk.
let mut spec = doc! { "key": key_for_persist };
if options_for_persist.unique {
spec.insert("unique", true);
}
if options_for_persist.sparse {
spec.insert("sparse", true);
}
if let Some(ttl) = options_for_persist.expire_after_seconds {
spec.insert("expireAfterSeconds", ttl as i64);
}
if let Err(e) = ctx.storage.save_index(db, coll, &index_name, spec).await {
tracing::warn!(index = %index_name, error = %e, "failed to persist index spec");
}
created_count += 1;
}
Err(e) => {
@@ -180,9 +204,21 @@ async fn handle_drop_indexes(
match index_spec {
Some(Bson::String(name)) if name == "*" => {
// Drop all indexes except _id_.
// Collect names to drop from storage first.
let names_to_drop: Vec<String> = if let Some(engine) = ctx.indexes.get(&ns_key) {
engine.list_indexes().iter()
.filter(|info| info.name != "_id_")
.map(|info| info.name.clone())
.collect()
} else {
Vec::new()
};
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
engine.drop_all_indexes();
}
for idx_name in &names_to_drop {
let _ = ctx.storage.drop_index(db, coll, idx_name).await;
}
}
Some(Bson::String(name)) => {
// Drop by name.
@@ -196,6 +232,7 @@ async fn handle_drop_indexes(
name
)));
}
let _ = ctx.storage.drop_index(db, coll, name).await;
}
Some(Bson::Document(key_spec)) => {
// Drop by key spec: find the index with matching key.
@@ -210,6 +247,7 @@ async fn handle_drop_indexes(
engine.drop_index(&name).map_err(|e| {
CommandError::IndexError(e.to_string())
})?;
let _ = ctx.storage.drop_index(db, coll, &name).await;
} else {
return Err(CommandError::IndexError(
"index not found with specified key".into(),
@@ -1,9 +1,8 @@
use std::collections::HashMap;
use bson::{doc, oid::ObjectId, Bson, Document};
use rustdb_index::IndexEngine;
use rustdb_storage::OpType;
use tracing::{debug, warn};
use tracing::debug;
use crate::context::CommandContext;
use crate::error::{CommandError, CommandResult};
@@ -56,12 +55,35 @@ pub async fn handle(
let mut inserted_count: i32 = 0;
let mut write_errors: Vec<Document> = Vec::new();
// Ensure the IndexEngine is loaded (with persisted specs from indexes.json).
// This must happen BEFORE any writes, so unique constraints are enforced
// even on the first write after a restart.
drop(ctx.get_or_init_index_engine(db, coll).await);
for (idx, mut doc) in docs.into_iter().enumerate() {
// Auto-generate _id if not present.
if !doc.contains_key("_id") {
doc.insert("_id", ObjectId::new());
}
// Pre-check unique index constraints BEFORE storage write.
// The engine is guaranteed to exist from the get_or_init call above.
if let Some(engine) = ctx.indexes.get(&ns_key) {
if let Err(e) = engine.check_unique_constraints(&doc) {
let err_msg = e.to_string();
write_errors.push(doc! {
"index": idx as i32,
"code": 11000_i32,
"codeName": "DuplicateKey",
"errmsg": &err_msg,
});
if ordered {
break;
}
continue;
}
}
// Attempt storage insert.
match ctx.storage.insert_one(db, coll, doc.clone()).await {
Ok(id_str) => {
@@ -75,17 +97,15 @@ pub async fn handle(
None,
);
// Update index engine.
let mut engine = ctx
.indexes
.entry(ns_key.clone())
.or_insert_with(IndexEngine::new);
if let Err(e) = engine.on_insert(&doc) {
warn!(
namespace = %ns_key,
error = %e,
"index update failed after successful insert"
);
// Update index engine (already initialized above).
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
if let Err(e) = engine.on_insert(&doc) {
tracing::error!(
namespace = %ns_key,
error = %e,
"index update failed after successful insert"
);
}
}
inserted_count += 1;
}
@@ -1,7 +1,6 @@
use std::collections::HashSet;
use bson::{doc, oid::ObjectId, Bson, Document};
use rustdb_index::IndexEngine;
use rustdb_query::{QueryMatcher, UpdateEngine, sort_documents, apply_projection};
use rustdb_storage::OpType;
use tracing::debug;
@@ -47,6 +46,10 @@ async fn handle_update(
ensure_collection_exists(db, coll, ctx).await?;
let ns_key = format!("{}.{}", db, coll);
// Ensure the IndexEngine is loaded with persisted specs from indexes.json.
drop(ctx.get_or_init_index_engine(db, coll).await);
let mut total_n: i32 = 0;
let mut total_n_modified: i32 = 0;
let mut upserted_list: Vec<Document> = Vec::new();
@@ -150,6 +153,22 @@ async fn handle_update(
updated.get("_id").unwrap().clone()
};
// Pre-check unique index constraints before upsert insert.
if let Some(engine) = ctx.indexes.get(&ns_key) {
if let Err(e) = engine.check_unique_constraints(&updated) {
write_errors.push(doc! {
"index": idx as i32,
"code": 11000_i32,
"codeName": "DuplicateKey",
"errmsg": e.to_string(),
});
if ordered {
break;
}
continue;
}
}
// Insert the new document.
match ctx.storage.insert_one(db, coll, updated.clone()).await {
Ok(id_str) => {
@@ -163,12 +182,12 @@ async fn handle_update(
None,
);
// Update index.
let mut engine = ctx
.indexes
.entry(ns_key.clone())
.or_insert_with(IndexEngine::new);
let _ = engine.on_insert(&updated);
// Update index (engine already initialized above).
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
if let Err(e) = engine.on_insert(&updated) {
tracing::error!(namespace = %ns_key, error = %e, "index update failed after upsert insert");
}
}
total_n += 1;
upserted_list.push(doc! {
@@ -216,6 +235,22 @@ async fn handle_update(
array_filters.as_deref(),
) {
Ok(updated_doc) => {
// Pre-check unique index constraints before storage write.
if let Some(engine) = ctx.indexes.get(&ns_key) {
if let Err(e) = engine.check_unique_constraints_for_update(matched_doc, &updated_doc) {
write_errors.push(doc! {
"index": idx as i32,
"code": 11000_i32,
"codeName": "DuplicateKey",
"errmsg": e.to_string(),
});
if ordered {
break;
}
continue;
}
}
let id_str = extract_id_string(matched_doc);
match ctx
.storage
@@ -235,7 +270,9 @@ async fn handle_update(
// Update index.
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
let _ = engine.on_update(matched_doc, &updated_doc);
if let Err(e) = engine.on_update(matched_doc, &updated_doc) {
tracing::error!(namespace = %ns_key, error = %e, "index update failed after update");
}
}
total_n += 1;
// Check if the document actually changed.
@@ -366,6 +403,9 @@ async fn handle_find_and_modify(
let ns_key = format!("{}.{}", db, coll);
// Ensure the IndexEngine is loaded with persisted specs.
drop(ctx.get_or_init_index_engine(db, coll).await);
// Load and filter documents.
let mut matched = load_filtered_docs(db, coll, &query, &ns_key, ctx).await?;
@@ -444,6 +484,13 @@ async fn handle_find_and_modify(
)
.map_err(|e| CommandError::InternalError(e.to_string()))?;
// Pre-check unique index constraints before storage write.
if let Some(engine) = ctx.indexes.get(&ns_key) {
if let Err(e) = engine.check_unique_constraints_for_update(&original_doc, &updated_doc) {
return Err(CommandError::StorageError(e.to_string()));
}
}
let id_str = extract_id_string(&original_doc);
ctx.storage
.update_by_id(db, coll, &id_str, updated_doc.clone())
@@ -461,7 +508,9 @@ async fn handle_find_and_modify(
// Update index.
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
let _ = engine.on_update(&original_doc, &updated_doc);
if let Err(e) = engine.on_update(&original_doc, &updated_doc) {
tracing::error!(namespace = %ns_key, error = %e, "index update failed after findAndModify update");
}
}
let return_doc = if return_new {
@@ -505,6 +554,13 @@ async fn handle_find_and_modify(
updated_doc.get("_id").unwrap().clone()
};
// Pre-check unique index constraints before upsert insert.
if let Some(engine) = ctx.indexes.get(&ns_key) {
if let Err(e) = engine.check_unique_constraints(&updated_doc) {
return Err(CommandError::StorageError(e.to_string()));
}
}
let inserted_id_str = ctx.storage
.insert_one(db, coll, updated_doc.clone())
.await?;
@@ -521,11 +577,11 @@ async fn handle_find_and_modify(
// Update index.
{
let mut engine = ctx
.indexes
.entry(ns_key.clone())
.or_insert_with(IndexEngine::new);
let _ = engine.on_insert(&updated_doc);
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
if let Err(e) = engine.on_insert(&updated_doc) {
tracing::error!(namespace = %ns_key, error = %e, "index update failed after findAndModify upsert");
}
}
}
let value = if return_new {
+49
View File
@@ -153,6 +153,55 @@ impl IndexEngine {
self.indexes.contains_key(name)
}
/// Check unique constraints for a document without modifying the index.
/// Returns Ok(()) if no conflict, Err(DuplicateKey) if a unique constraint
/// would be violated. This is a read-only check (immutable &self).
pub fn check_unique_constraints(&self, doc: &Document) -> Result<(), IndexError> {
for idx in self.indexes.values() {
if idx.unique {
let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse);
if let Some(ref kb) = key_bytes {
if let Some(existing_ids) = idx.hash.get(kb) {
if !existing_ids.is_empty() {
return Err(IndexError::DuplicateKey {
index: idx.name.clone(),
key: format!("{:?}", kb),
});
}
}
}
}
}
Ok(())
}
/// Check unique constraints for an update, excluding the document being updated.
/// Returns Ok(()) if no conflict. This is a read-only check (immutable &self).
pub fn check_unique_constraints_for_update(
&self,
old_doc: &Document,
new_doc: &Document,
) -> Result<(), IndexError> {
let doc_id = Self::extract_id(old_doc);
for idx in self.indexes.values() {
if idx.unique {
let new_key_bytes = Self::extract_key_bytes(new_doc, &idx.key, idx.sparse);
if let Some(ref kb) = new_key_bytes {
if let Some(existing_ids) = idx.hash.get(kb) {
let has_conflict = existing_ids.iter().any(|id| *id != doc_id);
if has_conflict {
return Err(IndexError::DuplicateKey {
index: idx.name.clone(),
key: format!("{:?}", kb),
});
}
}
}
}
}
Ok(())
}
/// Notify the engine that a document has been inserted.
/// Checks unique constraints and updates all index structures.
pub fn on_insert(&mut self, doc: &Document) -> Result<(), IndexError> {
+1 -1
View File
@@ -253,7 +253,7 @@ mod tests {
assert!(b_entry.offset > a_entry.offset);
// Verify the compacted file can be used to rebuild KeyDir
let (rebuilt, dead) = KeyDir::build_from_data_file(&data_path).unwrap();
let (rebuilt, dead, _stats) = KeyDir::build_from_data_file(&data_path).unwrap();
assert_eq!(rebuilt.len(), 2);
assert_eq!(dead, 0); // no dead records in compacted file
}
+104 -25
View File
@@ -21,7 +21,7 @@ use std::sync::Arc;
use async_trait::async_trait;
use bson::{doc, oid::ObjectId, Document};
use dashmap::DashMap;
use tracing::debug;
use tracing::{debug, info};
use crate::adapter::StorageAdapter;
use crate::binary_wal::{BinaryWal, WalOpType};
@@ -83,6 +83,20 @@ impl CollectionState {
.map_err(|e| StorageError::SerializationError(format!("BSON decode: {e}")))
}
/// Ensure a data file has the 64-byte SMARTDB header.
/// If the file was just created (empty), writes the header and updates
/// the data_file_size counter. Must be called under write_lock.
fn ensure_data_header(&self, file: &mut std::fs::File) -> StorageResult<()> {
let pos = file.seek(SeekFrom::End(0))?;
if pos == 0 {
let hdr = FileHeader::new(FileType::Data);
file.write_all(&hdr.encode())?;
self.data_file_size
.fetch_add(FILE_HEADER_SIZE as u64, Ordering::Relaxed);
}
Ok(())
}
/// Append a data record and update the KeyDir. Must be called under write_lock.
fn append_record(
&self,
@@ -104,6 +118,7 @@ impl CollectionState {
.append(true)
.open(&data_path)?;
self.ensure_data_header(&mut file)?;
let offset = file.seek(SeekFrom::End(0))?;
file.write_all(&encoded)?;
file.sync_all()?;
@@ -137,6 +152,7 @@ impl CollectionState {
.append(true)
.open(&data_path)?;
self.ensure_data_header(&mut file)?;
file.write_all(&encoded)?;
file.sync_all()?;
@@ -160,6 +176,12 @@ impl CollectionState {
&self.data_file_size,
) {
tracing::warn!("compaction failed for {:?}: {e}", self.coll_dir);
} else {
// Persist hint file after successful compaction to prevent stale hints
let current_size = self.data_file_size.load(Ordering::Relaxed);
if let Err(e) = self.keydir.persist_to_hint_file(&self.hint_path(), current_size) {
tracing::warn!("failed to persist hint after compaction for {:?}: {e}", self.coll_dir);
}
}
}
}
@@ -234,33 +256,63 @@ impl FileStorageAdapter {
let hint_path = coll_dir.join("keydir.hint");
// Try loading from hint file first, fall back to data file scan
let (keydir, dead_bytes) = if hint_path.exists() && data_path.exists() {
let (keydir, dead_bytes, loaded_from_hint) = if hint_path.exists() && data_path.exists() {
match KeyDir::load_from_hint_file(&hint_path) {
Ok(Some(kd)) => {
debug!("loaded KeyDir from hint file: {:?}", hint_path);
// We don't know dead_bytes from the hint file; estimate from file size
let file_size = std::fs::metadata(&data_path)
Ok(Some((kd, stored_size))) => {
let actual_size = std::fs::metadata(&data_path)
.map(|m| m.len())
.unwrap_or(FILE_HEADER_SIZE as u64);
let live_bytes: u64 = {
let mut total = 0u64;
kd.for_each(|_, e| total += e.record_len as u64);
total
};
let dead = file_size.saturating_sub(FILE_HEADER_SIZE as u64).saturating_sub(live_bytes);
(kd, dead)
.unwrap_or(0);
// Check if data.rdb changed since the hint was written.
// If stored_size is 0, this is an old-format hint without size tracking.
let size_matches = stored_size > 0 && stored_size == actual_size;
if !size_matches {
// data.rdb size differs from hint snapshot — records were appended
// (inserts, tombstones) after the hint was written. Full scan required
// to pick up tombstones that would otherwise be invisible.
if stored_size == 0 {
debug!("hint file {:?} has no size tracking, rebuilding from data file", hint_path);
} else {
tracing::warn!(
"hint file {:?} is stale: data size changed ({} -> {}), rebuilding",
hint_path, stored_size, actual_size
);
}
let (kd, dead, _stats) = KeyDir::build_from_data_file(&data_path)?;
(kd, dead, false)
} else {
// Size matches — validate entry integrity with spot-checks
let hint_valid = kd.validate_against_data_file(&data_path, 16)
.unwrap_or(false);
if hint_valid {
debug!("loaded KeyDir from hint file: {:?}", hint_path);
let live_bytes: u64 = {
let mut total = 0u64;
kd.for_each(|_, e| total += e.record_len as u64);
total
};
let dead = actual_size.saturating_sub(FILE_HEADER_SIZE as u64).saturating_sub(live_bytes);
(kd, dead, true)
} else {
tracing::warn!("hint file {:?} failed validation, rebuilding from data file", hint_path);
let (kd, dead, _stats) = KeyDir::build_from_data_file(&data_path)?;
(kd, dead, false)
}
}
}
_ => {
debug!("hint file invalid, rebuilding KeyDir from data file");
KeyDir::build_from_data_file(&data_path)?
let (kd, dead, _stats) = KeyDir::build_from_data_file(&data_path)?;
(kd, dead, false)
}
}
} else if data_path.exists() {
KeyDir::build_from_data_file(&data_path)?
let (kd, dead, _stats) = KeyDir::build_from_data_file(&data_path)?;
(kd, dead, false)
} else {
(KeyDir::new(), 0)
(KeyDir::new(), 0, false)
};
let doc_count = keydir.len();
let data_file_size = if data_path.exists() {
std::fs::metadata(&data_path)?.len()
@@ -268,6 +320,15 @@ impl FileStorageAdapter {
FILE_HEADER_SIZE as u64
};
info!(
collection = %coll_dir.display(),
documents = doc_count,
data_bytes = data_file_size,
dead_bytes = dead_bytes,
source = if loaded_from_hint { "hint" } else { "scan" },
"loaded collection"
);
// Initialize WAL and recover
let wal = BinaryWal::new(wal_path);
wal.initialize()?;
@@ -275,10 +336,10 @@ impl FileStorageAdapter {
// Recover uncommitted WAL entries
let uncommitted = wal.recover()?;
if !uncommitted.is_empty() {
debug!(
"recovering {} uncommitted WAL entries for {:?}",
uncommitted.len(),
coll_dir
info!(
collection = %coll_dir.display(),
entries = uncommitted.len(),
"recovering uncommitted WAL entries"
);
}
@@ -415,15 +476,18 @@ impl FileStorageAdapter {
impl StorageAdapter for FileStorageAdapter {
async fn initialize(&self) -> StorageResult<()> {
std::fs::create_dir_all(&self.base_path)?;
debug!("FileStorageAdapter initialized at {:?}", self.base_path);
// Pre-load all existing collections
let mut db_count: usize = 0;
if let Ok(entries) = std::fs::read_dir(&self.base_path) {
for entry in entries.flatten() {
if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
if let Some(db_name) = entry.file_name().to_str() {
let db_name = db_name.to_string();
if let Ok(colls) = self.list_collection_dirs(&db_name) {
if !colls.is_empty() {
db_count += 1;
}
for coll_name in colls {
let _ = self.get_or_init_collection(&db_name, &coll_name);
}
@@ -433,6 +497,20 @@ impl StorageAdapter for FileStorageAdapter {
}
}
info!(
databases = db_count,
collections = self.collections.len(),
path = %self.base_path.display(),
"FileStorageAdapter initialization complete"
);
// Run compaction on all collections that need it (dead weight from before crash)
for entry in self.collections.iter() {
let state = entry.value();
let _guard = state.write_lock.lock().unwrap();
state.try_compact();
}
// Start periodic compaction task (runs every 24 hours)
{
let collections = self.collections.clone();
@@ -461,10 +539,11 @@ impl StorageAdapter for FileStorageAdapter {
handle.abort();
}
// Persist all KeyDir hint files
// Persist all KeyDir hint files with current data file sizes
for entry in self.collections.iter() {
let state = entry.value();
let _ = state.keydir.persist_to_hint_file(&state.hint_path());
let current_size = state.data_file_size.load(Ordering::Relaxed);
let _ = state.keydir.persist_to_hint_file(&state.hint_path(), current_size);
}
debug!("FileStorageAdapter closed");
Ok(())
+129 -13
View File
@@ -6,7 +6,7 @@
//! The KeyDir can be rebuilt from a data file scan, or loaded quickly from a
//! persisted hint file for fast restart.
use std::io::{self, BufReader, BufWriter, Read, Write};
use std::io::{self, BufReader, BufWriter, Read, Seek, SeekFrom, Write};
use std::path::Path;
use std::sync::atomic::{AtomicU64, Ordering};
@@ -14,7 +14,7 @@ use dashmap::DashMap;
use crate::error::{StorageError, StorageResult};
use crate::record::{
FileHeader, FileType, RecordScanner, FILE_HEADER_SIZE, FORMAT_VERSION,
DataRecord, FileHeader, FileType, RecordScanner, FILE_HEADER_SIZE, FORMAT_VERSION,
};
// ---------------------------------------------------------------------------
@@ -34,6 +34,23 @@ pub struct KeyDirEntry {
pub timestamp: u64,
}
// ---------------------------------------------------------------------------
// BuildStats — statistics from building KeyDir from a data file scan
// ---------------------------------------------------------------------------
/// Statistics collected while building a KeyDir from a data file scan.
#[derive(Debug, Clone, Default)]
pub struct BuildStats {
/// Total records scanned (live + tombstones + superseded).
pub total_records_scanned: u64,
/// Number of live documents in the final KeyDir.
pub live_documents: u64,
/// Number of tombstone records encountered.
pub tombstones: u64,
/// Number of records superseded by a later write for the same key.
pub superseded_records: u64,
}
// ---------------------------------------------------------------------------
// KeyDir
// ---------------------------------------------------------------------------
@@ -116,9 +133,9 @@ impl KeyDir {
/// Rebuild the KeyDir by scanning an entire data file.
/// The file must start with a valid `FileHeader`.
/// Returns `(keydir, dead_bytes)` where `dead_bytes` is the total size of
/// Returns `(keydir, dead_bytes, stats)` where `dead_bytes` is the total size of
/// stale records (superseded by later writes or tombstoned).
pub fn build_from_data_file(path: &Path) -> StorageResult<(Self, u64)> {
pub fn build_from_data_file(path: &Path) -> StorageResult<(Self, u64, BuildStats)> {
let file = std::fs::File::open(path)?;
let mut reader = BufReader::new(file);
@@ -135,6 +152,7 @@ impl KeyDir {
let keydir = KeyDir::new();
let mut dead_bytes: u64 = 0;
let mut stats = BuildStats::default();
let scanner = RecordScanner::new(reader, FILE_HEADER_SIZE as u64);
for result in scanner {
@@ -146,7 +164,10 @@ impl KeyDir {
let key = String::from_utf8(record.key)
.map_err(|e| StorageError::CorruptRecord(format!("invalid UTF-8 key: {e}")))?;
stats.total_records_scanned += 1;
if is_tombstone {
stats.tombstones += 1;
// Remove from index; the tombstone itself is dead weight
if let Some(prev) = keydir.remove(&key) {
dead_bytes += prev.record_len as u64;
@@ -162,11 +183,13 @@ impl KeyDir {
if let Some(prev) = keydir.insert(key, entry) {
// Previous version of same key is now dead
dead_bytes += prev.record_len as u64;
stats.superseded_records += 1;
}
}
}
Ok((keydir, dead_bytes))
stats.live_documents = keydir.len();
Ok((keydir, dead_bytes, stats))
}
// -----------------------------------------------------------------------
@@ -175,14 +198,17 @@ impl KeyDir {
/// Persist the KeyDir to a hint file for fast restart.
///
/// `data_file_size` is the current size of data.rdb — stored in the hint header
/// so that on next load we can detect if data.rdb changed (stale hint).
///
/// Hint file format (after the 64-byte file header):
/// For each entry: [key_len:u32 LE][key bytes][offset:u64 LE][record_len:u32 LE][value_len:u32 LE][timestamp:u64 LE]
pub fn persist_to_hint_file(&self, path: &Path) -> StorageResult<()> {
pub fn persist_to_hint_file(&self, path: &Path, data_file_size: u64) -> StorageResult<()> {
let file = std::fs::File::create(path)?;
let mut writer = BufWriter::new(file);
// Write file header
let hdr = FileHeader::new(FileType::Hint);
// Write file header with data_file_size for staleness detection
let hdr = FileHeader::new_hint(data_file_size);
writer.write_all(&hdr.encode())?;
// Write entries
@@ -202,7 +228,9 @@ impl KeyDir {
}
/// Load a KeyDir from a hint file. Returns None if the file doesn't exist.
pub fn load_from_hint_file(path: &Path) -> StorageResult<Option<Self>> {
/// Returns `(keydir, stored_data_file_size)` where `stored_data_file_size` is the
/// data.rdb size recorded when the hint was written (0 = old format, unknown).
pub fn load_from_hint_file(path: &Path) -> StorageResult<Option<(Self, u64)>> {
if !path.exists() {
return Ok(None);
}
@@ -231,6 +259,7 @@ impl KeyDir {
)));
}
let stored_data_file_size = hdr.data_file_size;
let keydir = KeyDir::new();
loop {
@@ -269,7 +298,87 @@ impl KeyDir {
);
}
Ok(Some(keydir))
Ok(Some((keydir, stored_data_file_size)))
}
// -----------------------------------------------------------------------
// Hint file validation
// -----------------------------------------------------------------------
/// Validate this KeyDir (loaded from a hint file) against the actual data file.
/// Returns `Ok(true)` if the hint appears consistent, `Ok(false)` if a rebuild
/// from the data file is recommended.
///
/// Checks:
/// 1. All entry offsets + record_len fit within the data file size.
/// 2. All entry offsets are >= FILE_HEADER_SIZE.
/// 3. A random sample of entries is spot-checked by reading the record at
/// the offset and verifying the key matches.
pub fn validate_against_data_file(&self, data_path: &Path, sample_size: usize) -> StorageResult<bool> {
let file_size = std::fs::metadata(data_path)
.map(|m| m.len())
.unwrap_or(0);
if file_size < FILE_HEADER_SIZE as u64 {
// Data file is too small to even contain a header
return Ok(self.is_empty());
}
// Pass 1: bounds check all entries
let mut all_keys: Vec<(String, KeyDirEntry)> = Vec::with_capacity(self.len() as usize);
let mut bounds_ok = true;
self.for_each(|key, entry| {
if entry.offset < FILE_HEADER_SIZE as u64
|| entry.offset + entry.record_len as u64 > file_size
{
bounds_ok = false;
}
all_keys.push((key.to_string(), *entry));
});
if !bounds_ok {
return Ok(false);
}
// Pass 2: spot-check a sample of entries by reading records from data.rdb
if all_keys.is_empty() {
return Ok(true);
}
// Sort by offset for sequential I/O, take first `sample_size` entries
all_keys.sort_by_key(|(_, e)| e.offset);
let step = if all_keys.len() <= sample_size {
1
} else {
all_keys.len() / sample_size
};
let mut file = std::fs::File::open(data_path)?;
let mut checked = 0usize;
for (i, (expected_key, entry)) in all_keys.iter().enumerate() {
if checked >= sample_size {
break;
}
if i % step != 0 {
continue;
}
// Seek to the entry's offset and try to decode the record
file.seek(SeekFrom::Start(entry.offset))?;
match DataRecord::decode_from(&mut file) {
Ok(Some((record, _disk_size))) => {
let record_key = String::from_utf8_lossy(&record.key);
if record_key != *expected_key {
return Ok(false);
}
}
Ok(None) | Err(_) => {
return Ok(false);
}
}
checked += 1;
}
Ok(true)
}
}
@@ -372,7 +481,7 @@ mod tests {
f.write_all(&r3.encode()).unwrap();
}
let (kd, dead_bytes) = KeyDir::build_from_data_file(&data_path).unwrap();
let (kd, dead_bytes, stats) = KeyDir::build_from_data_file(&data_path).unwrap();
// Only B should be live
assert_eq!(kd.len(), 1);
@@ -381,6 +490,12 @@ mod tests {
// Dead bytes: r1 (aaa live, then superseded by tombstone) + r3 (tombstone itself)
assert!(dead_bytes > 0);
// Stats
assert_eq!(stats.total_records_scanned, 3);
assert_eq!(stats.live_documents, 1);
assert_eq!(stats.tombstones, 1);
assert_eq!(stats.superseded_records, 0); // aaa was removed by tombstone, not superseded
}
#[test]
@@ -408,9 +523,10 @@ mod tests {
},
);
kd.persist_to_hint_file(&hint_path).unwrap();
let loaded = KeyDir::load_from_hint_file(&hint_path).unwrap().unwrap();
kd.persist_to_hint_file(&hint_path, 12345).unwrap();
let (loaded, stored_size) = KeyDir::load_from_hint_file(&hint_path).unwrap().unwrap();
assert_eq!(stored_size, 12345);
assert_eq!(loaded.len(), 2);
let e1 = loaded.get("doc1").unwrap();
assert_eq!(e1.offset, 64);
+2 -1
View File
@@ -16,13 +16,14 @@ pub mod keydir;
pub mod memory;
pub mod oplog;
pub mod record;
pub mod validate;
pub use adapter::StorageAdapter;
pub use binary_wal::{BinaryWal, WalEntry, WalOpType};
pub use compaction::{compact_data_file, should_compact, CompactionResult};
pub use error::{StorageError, StorageResult};
pub use file::FileStorageAdapter;
pub use keydir::{KeyDir, KeyDirEntry};
pub use keydir::{BuildStats, KeyDir, KeyDirEntry};
pub use memory::MemoryStorageAdapter;
pub use oplog::{OpLog, OpLogEntry, OpLogStats, OpType};
pub use record::{
+21 -1
View File
@@ -79,6 +79,9 @@ pub struct FileHeader {
pub file_type: FileType,
pub flags: u32,
pub created_ms: u64,
/// For hint files: the data.rdb file size at the time the hint was written.
/// Used to detect stale hints after ungraceful shutdown. 0 = unknown (old format).
pub data_file_size: u64,
}
impl FileHeader {
@@ -89,6 +92,18 @@ impl FileHeader {
file_type,
flags: 0,
created_ms: now_ms(),
data_file_size: 0,
}
}
/// Create a new hint header that records the data file size.
pub fn new_hint(data_file_size: u64) -> Self {
Self {
version: FORMAT_VERSION,
file_type: FileType::Hint,
flags: 0,
created_ms: now_ms(),
data_file_size,
}
}
@@ -100,7 +115,8 @@ impl FileHeader {
buf[10] = self.file_type as u8;
buf[11..15].copy_from_slice(&self.flags.to_le_bytes());
buf[15..23].copy_from_slice(&self.created_ms.to_le_bytes());
// bytes 23..64 are reserved (zeros)
buf[23..31].copy_from_slice(&self.data_file_size.to_le_bytes());
// bytes 31..64 are reserved (zeros)
buf
}
@@ -127,11 +143,15 @@ impl FileHeader {
let created_ms = u64::from_le_bytes([
buf[15], buf[16], buf[17], buf[18], buf[19], buf[20], buf[21], buf[22],
]);
let data_file_size = u64::from_le_bytes([
buf[23], buf[24], buf[25], buf[26], buf[27], buf[28], buf[29], buf[30],
]);
Ok(Self {
version,
file_type,
flags,
created_ms,
data_file_size,
})
}
}
+330
View File
@@ -0,0 +1,330 @@
//! Data integrity validation for RustDb storage directories.
//!
//! Provides offline validation of data files without starting the server.
//! Checks header magic, record CRC32 checksums, duplicate IDs, and
//! keydir.hint consistency.
use std::collections::HashMap;
use std::io::{BufReader, Read};
use std::path::Path;
use crate::error::{StorageError, StorageResult};
use crate::keydir::KeyDir;
use crate::record::{FileHeader, FileType, RecordScanner, FILE_HEADER_SIZE};
/// Result of validating an entire data directory.
pub struct ValidationReport {
pub collections: Vec<CollectionReport>,
}
/// Result of validating a single collection.
pub struct CollectionReport {
pub db: String,
pub collection: String,
pub header_valid: bool,
pub total_records: u64,
pub live_documents: u64,
pub tombstones: u64,
pub duplicate_ids: Vec<String>,
pub checksum_errors: u64,
pub decode_errors: u64,
pub data_file_size: u64,
pub hint_file_exists: bool,
pub orphaned_hint_entries: u64,
pub errors: Vec<String>,
}
impl ValidationReport {
/// Whether any errors were found across all collections.
pub fn has_errors(&self) -> bool {
self.collections.iter().any(|c| {
!c.header_valid
|| !c.duplicate_ids.is_empty()
|| c.checksum_errors > 0
|| c.decode_errors > 0
|| c.orphaned_hint_entries > 0
|| !c.errors.is_empty()
})
}
/// Print a human-readable summary to stdout.
pub fn print_summary(&self) {
println!("=== SmartDB Data Integrity Report ===");
println!();
let mut total_errors = 0u64;
for report in &self.collections {
println!("Database: {}", report.db);
println!(" Collection: {}", report.collection);
println!(
" Header: {}",
if report.header_valid { "OK" } else { "INVALID" }
);
println!(
" Records: {} ({} live, {} tombstones)",
report.total_records, report.live_documents, report.tombstones
);
println!(" Data size: {} bytes", report.data_file_size);
if report.duplicate_ids.is_empty() {
println!(" Duplicates: 0");
} else {
let ids_preview: Vec<&str> = report.duplicate_ids.iter().take(5).map(|s| s.as_str()).collect();
let suffix = if report.duplicate_ids.len() > 5 {
format!(", ... and {} more", report.duplicate_ids.len() - 5)
} else {
String::new()
};
println!(
" Duplicates: {} (ids: {}{})",
report.duplicate_ids.len(),
ids_preview.join(", "),
suffix
);
}
if report.checksum_errors > 0 {
println!(" CRC errors: {}", report.checksum_errors);
} else {
println!(" CRC errors: 0");
}
if report.decode_errors > 0 {
println!(" Decode errors: {}", report.decode_errors);
}
if report.hint_file_exists {
if report.orphaned_hint_entries > 0 {
println!(
" Hint file: STALE ({} orphaned entries)",
report.orphaned_hint_entries
);
} else {
println!(" Hint file: OK");
}
} else {
println!(" Hint file: absent");
}
for err in &report.errors {
println!(" ERROR: {}", err);
}
println!();
if !report.header_valid { total_errors += 1; }
total_errors += report.duplicate_ids.len() as u64;
total_errors += report.checksum_errors;
total_errors += report.decode_errors;
total_errors += report.orphaned_hint_entries;
total_errors += report.errors.len() as u64;
}
println!(
"Summary: {} collection(s) checked, {} error(s) found.",
self.collections.len(),
total_errors
);
}
}
/// Validate all collections in a data directory.
///
/// The directory structure is expected to be:
/// ```text
/// {base_path}/{db}/{collection}/data.rdb
/// ```
pub fn validate_data_directory(base_path: &str) -> StorageResult<ValidationReport> {
let base = Path::new(base_path);
if !base.exists() {
return Err(StorageError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
format!("data directory not found: {base_path}"),
)));
}
let mut collections = Vec::new();
// Iterate database directories
let entries = std::fs::read_dir(base)?;
for entry in entries {
let entry = entry?;
if !entry.file_type()?.is_dir() {
continue;
}
let db_name = match entry.file_name().to_str() {
Some(s) => s.to_string(),
None => continue,
};
// Iterate collection directories
let db_entries = std::fs::read_dir(entry.path())?;
for coll_entry in db_entries {
let coll_entry = coll_entry?;
if !coll_entry.file_type()?.is_dir() {
continue;
}
let coll_name = match coll_entry.file_name().to_str() {
Some(s) => s.to_string(),
None => continue,
};
let data_path = coll_entry.path().join("data.rdb");
if !data_path.exists() {
continue;
}
let report = validate_collection(&db_name, &coll_name, &coll_entry.path());
collections.push(report);
}
}
// Sort for deterministic output
collections.sort_by(|a, b| (&a.db, &a.collection).cmp(&(&b.db, &b.collection)));
Ok(ValidationReport { collections })
}
/// Validate a single collection directory.
fn validate_collection(db: &str, coll: &str, coll_dir: &Path) -> CollectionReport {
let data_path = coll_dir.join("data.rdb");
let hint_path = coll_dir.join("keydir.hint");
let mut report = CollectionReport {
db: db.to_string(),
collection: coll.to_string(),
header_valid: false,
total_records: 0,
live_documents: 0,
tombstones: 0,
duplicate_ids: Vec::new(),
checksum_errors: 0,
decode_errors: 0,
data_file_size: 0,
hint_file_exists: hint_path.exists(),
orphaned_hint_entries: 0,
errors: Vec::new(),
};
// Get file size
match std::fs::metadata(&data_path) {
Ok(m) => report.data_file_size = m.len(),
Err(e) => {
report.errors.push(format!("cannot stat data.rdb: {e}"));
return report;
}
}
// Open and validate header
let file = match std::fs::File::open(&data_path) {
Ok(f) => f,
Err(e) => {
report.errors.push(format!("cannot open data.rdb: {e}"));
return report;
}
};
let mut reader = BufReader::new(file);
let mut hdr_buf = [0u8; FILE_HEADER_SIZE];
if let Err(e) = reader.read_exact(&mut hdr_buf) {
report.errors.push(format!("cannot read header: {e}"));
return report;
}
match FileHeader::decode(&hdr_buf) {
Ok(hdr) => {
if hdr.file_type != FileType::Data {
report.errors.push(format!(
"wrong file type: expected Data, got {:?}",
hdr.file_type
));
} else {
report.header_valid = true;
}
}
Err(e) => {
report.errors.push(format!("invalid header: {e}"));
return report;
}
}
// Scan all records
let mut id_counts: HashMap<String, u64> = HashMap::new();
let mut live_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
let scanner = RecordScanner::new(reader, FILE_HEADER_SIZE as u64);
for result in scanner {
match result {
Ok((_offset, record)) => {
report.total_records += 1;
let key = String::from_utf8_lossy(&record.key).to_string();
if record.is_tombstone() {
report.tombstones += 1;
live_ids.remove(&key);
} else {
*id_counts.entry(key.clone()).or_insert(0) += 1;
live_ids.insert(key);
}
}
Err(e) => {
let err_str = e.to_string();
if err_str.contains("checksum") || err_str.contains("Checksum") {
report.checksum_errors += 1;
} else {
report.decode_errors += 1;
}
// Cannot continue scanning after a decode error — the stream position is lost
report.errors.push(format!("record decode error: {e}"));
break;
}
}
}
report.live_documents = live_ids.len() as u64;
// Find duplicates (keys that appeared more than once as live inserts)
for (id, count) in &id_counts {
if *count > 1 {
report.duplicate_ids.push(id.clone());
}
}
report.duplicate_ids.sort();
// Validate hint file if present
if hint_path.exists() {
match KeyDir::load_from_hint_file(&hint_path) {
Ok(Some((hint_kd, stored_size))) => {
if stored_size > 0 && stored_size != report.data_file_size {
report.errors.push(format!(
"hint file is stale: recorded data size {} but actual is {}",
stored_size, report.data_file_size
));
}
// Check for orphaned entries: keys in hint but not live in data
hint_kd.for_each(|key, _entry| {
if !live_ids.contains(key) {
report.orphaned_hint_entries += 1;
}
});
// Also check if hint references offsets beyond file size
hint_kd.for_each(|_key, entry| {
if entry.offset + entry.record_len as u64 > report.data_file_size {
report.orphaned_hint_entries += 1;
}
});
}
Ok(None) => {
// File existed but was empty or unreadable
report.errors.push("hint file exists but is empty".into());
}
Err(e) => {
report.errors.push(format!("hint file decode error: {e}"));
}
}
}
report
}
+104 -3
View File
@@ -1,6 +1,8 @@
pub mod management;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Result;
use dashmap::DashMap;
@@ -14,7 +16,7 @@ use rustdb_config::{RustDbOptions, StorageType};
use rustdb_wire::{WireCodec, OP_QUERY};
use rustdb_wire::{encode_op_msg_response, encode_op_reply_response};
use rustdb_storage::{StorageAdapter, MemoryStorageAdapter, FileStorageAdapter, OpLog};
// IndexEngine is used indirectly via CommandContext
use rustdb_index::{IndexEngine, IndexOptions};
use rustdb_txn::{TransactionEngine, SessionEngine};
use rustdb_commands::{CommandRouter, CommandContext};
@@ -33,7 +35,16 @@ impl RustDb {
// Create storage adapter
let storage: Arc<dyn StorageAdapter> = match options.storage {
StorageType::Memory => {
let adapter = MemoryStorageAdapter::new();
let adapter = if let Some(ref pp) = options.persist_path {
tracing::info!("MemoryStorageAdapter with periodic persistence to {}", pp);
MemoryStorageAdapter::with_persist_path(PathBuf::from(pp))
} else {
tracing::warn!(
"SmartDB is using in-memory storage — data will NOT survive a restart. \
Set storage to 'file' for durable persistence."
);
MemoryStorageAdapter::new()
};
Arc::new(adapter)
}
StorageType::File => {
@@ -49,9 +60,99 @@ impl RustDb {
// Initialize storage
storage.initialize().await?;
// Restore any previously persisted state (no-op for file storage and
// memory storage without a persist_path).
storage.restore().await?;
// Spawn periodic persistence task for memory storage with persist_path.
if options.storage == StorageType::Memory && options.persist_path.is_some() {
let persist_storage = storage.clone();
let interval_ms = options.persist_interval_ms;
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_millis(interval_ms));
interval.tick().await; // skip the immediate first tick
loop {
interval.tick().await;
if let Err(e) = persist_storage.persist().await {
tracing::error!("Periodic persist failed: {}", e);
}
}
});
}
let indexes: Arc<DashMap<String, IndexEngine>> = Arc::new(DashMap::new());
// Restore persisted indexes from storage.
if let Ok(databases) = storage.list_databases().await {
for db_name in &databases {
if let Ok(collections) = storage.list_collections(db_name).await {
for coll_name in &collections {
if let Ok(specs) = storage.get_indexes(db_name, coll_name).await {
let has_custom = specs.iter().any(|s| {
s.get_str("name").unwrap_or("_id_") != "_id_"
});
if !has_custom {
continue;
}
let ns_key = format!("{}.{}", db_name, coll_name);
let mut engine = IndexEngine::new();
for spec in &specs {
let name = spec.get_str("name").unwrap_or("").to_string();
if name == "_id_" {
continue; // already created by IndexEngine::new()
}
let key = match spec.get("key") {
Some(bson::Bson::Document(k)) => k.clone(),
_ => continue,
};
let unique = matches!(spec.get("unique"), Some(bson::Bson::Boolean(true)));
let sparse = matches!(spec.get("sparse"), Some(bson::Bson::Boolean(true)));
let expire_after_seconds = match spec.get("expireAfterSeconds") {
Some(bson::Bson::Int32(n)) => Some(*n as u64),
Some(bson::Bson::Int64(n)) => Some(*n as u64),
_ => None,
};
let options = IndexOptions {
name: Some(name.clone()),
unique,
sparse,
expire_after_seconds,
};
if let Err(e) = engine.create_index(key, options) {
tracing::warn!(
namespace = %ns_key,
index = %name,
error = %e,
"failed to restore index"
);
}
}
// Rebuild index data from existing documents.
if let Ok(docs) = storage.find_all(db_name, coll_name).await {
if !docs.is_empty() {
engine.rebuild_from_documents(&docs);
}
}
tracing::info!(
namespace = %ns_key,
indexes = engine.list_indexes().len(),
"restored indexes"
);
indexes.insert(ns_key, engine);
}
}
}
}
}
let ctx = Arc::new(CommandContext {
storage,
indexes: Arc::new(DashMap::new()),
indexes,
transactions: Arc::new(TransactionEngine::new()),
sessions: Arc::new(SessionEngine::new(30 * 60 * 1000, 60 * 1000)),
cursors: Arc::new(DashMap::new()),
+17 -1
View File
@@ -25,6 +25,10 @@ struct Cli {
#[arg(long)]
validate: bool,
/// Validate data integrity of a storage directory (offline check)
#[arg(long, value_name = "PATH")]
validate_data: Option<String>,
/// Run in management mode (JSON-over-stdin IPC for TypeScript wrapper)
#[arg(long)]
management: bool,
@@ -55,7 +59,7 @@ async fn main() -> Result<()> {
let options = RustDbOptions::from_file(&cli.config)
.map_err(|e| anyhow::anyhow!("Failed to load config '{}': {}", cli.config, e))?;
// Validate-only mode
// Validate-only mode (config)
if cli.validate {
match options.validate() {
Ok(()) => {
@@ -69,6 +73,18 @@ async fn main() -> Result<()> {
}
}
// Validate data integrity mode
if let Some(ref data_path) = cli.validate_data {
tracing::info!("Validating data integrity at {}", data_path);
let report = rustdb_storage::validate::validate_data_directory(data_path)
.map_err(|e| anyhow::anyhow!("Validation failed: {}", e))?;
report.print_summary();
if report.has_errors() {
std::process::exit(1);
}
return Ok(());
}
// Create and start server
let mut db = RustDb::new(options).await?;
db.start().await?;
+191
View File
@@ -0,0 +1,191 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as smartdb from '../ts/index.js';
import { MongoClient, Db } from 'mongodb';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
// ---------------------------------------------------------------------------
// Test: Deletes persist across restart (tombstone + hint staleness detection)
// Covers: append_tombstone to data.rdb, hint file data_file_size tracking,
// stale hint detection on restart
// ---------------------------------------------------------------------------
let tmpDir: string;
let localDb: smartdb.LocalSmartDb;
let client: MongoClient;
let db: Db;
function makeTmpDir(): string {
return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-delete-test-'));
}
function cleanTmpDir(dir: string): void {
if (fs.existsSync(dir)) {
fs.rmSync(dir, { recursive: true, force: true });
}
}
// ============================================================================
// Setup
// ============================================================================
tap.test('setup: start local db and insert documents', async () => {
tmpDir = makeTmpDir();
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
client = new MongoClient(info.connectionUri, {
directConnection: true,
serverSelectionTimeoutMS: 5000,
});
await client.connect();
db = client.db('deletetest');
const coll = db.collection('items');
await coll.insertMany([
{ name: 'keep-1', value: 100 },
{ name: 'keep-2', value: 200 },
{ name: 'delete-me', value: 999 },
{ name: 'keep-3', value: 300 },
]);
const count = await coll.countDocuments();
expect(count).toEqual(4);
});
// ============================================================================
// Delete and verify
// ============================================================================
tap.test('delete-persistence: delete a document', async () => {
const coll = db.collection('items');
const result = await coll.deleteOne({ name: 'delete-me' });
expect(result.deletedCount).toEqual(1);
const remaining = await coll.countDocuments();
expect(remaining).toEqual(3);
const deleted = await coll.findOne({ name: 'delete-me' });
expect(deleted).toBeNull();
});
// ============================================================================
// Graceful restart: delete survives
// ============================================================================
tap.test('delete-persistence: graceful stop and restart', async () => {
await client.close();
await localDb.stop(); // graceful — writes hint file
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
client = new MongoClient(info.connectionUri, {
directConnection: true,
serverSelectionTimeoutMS: 5000,
});
await client.connect();
db = client.db('deletetest');
});
tap.test('delete-persistence: deleted doc stays deleted after graceful restart', async () => {
const coll = db.collection('items');
const count = await coll.countDocuments();
expect(count).toEqual(3);
const deleted = await coll.findOne({ name: 'delete-me' });
expect(deleted).toBeNull();
// The remaining docs are intact
const keep1 = await coll.findOne({ name: 'keep-1' });
expect(keep1).toBeTruthy();
expect(keep1!.value).toEqual(100);
});
// ============================================================================
// Simulate ungraceful restart: delete after hint write, then restart
// The hint file data_file_size check should detect the stale hint
// ============================================================================
tap.test('delete-persistence: insert and delete more docs, then restart', async () => {
const coll = db.collection('items');
// Insert a new doc
await coll.insertOne({ name: 'temporary', value: 777 });
expect(await coll.countDocuments()).toEqual(4);
// Delete it
await coll.deleteOne({ name: 'temporary' });
expect(await coll.countDocuments()).toEqual(3);
const gone = await coll.findOne({ name: 'temporary' });
expect(gone).toBeNull();
});
tap.test('delete-persistence: stop and restart again', async () => {
await client.close();
await localDb.stop();
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
client = new MongoClient(info.connectionUri, {
directConnection: true,
serverSelectionTimeoutMS: 5000,
});
await client.connect();
db = client.db('deletetest');
});
tap.test('delete-persistence: all deletes survived second restart', async () => {
const coll = db.collection('items');
const count = await coll.countDocuments();
expect(count).toEqual(3);
// Both deletes are permanent
expect(await coll.findOne({ name: 'delete-me' })).toBeNull();
expect(await coll.findOne({ name: 'temporary' })).toBeNull();
// Survivors intact
const names = (await coll.find({}).toArray()).map(d => d.name).sort();
expect(names).toEqual(['keep-1', 'keep-2', 'keep-3']);
});
// ============================================================================
// Delete all docs and verify empty after restart
// ============================================================================
tap.test('delete-persistence: delete all remaining docs', async () => {
const coll = db.collection('items');
await coll.deleteMany({});
expect(await coll.countDocuments()).toEqual(0);
});
tap.test('delete-persistence: restart with empty collection', async () => {
await client.close();
await localDb.stop();
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
client = new MongoClient(info.connectionUri, {
directConnection: true,
serverSelectionTimeoutMS: 5000,
});
await client.connect();
db = client.db('deletetest');
});
tap.test('delete-persistence: collection is empty after restart', async () => {
const coll = db.collection('items');
const count = await coll.countDocuments();
expect(count).toEqual(0);
});
// ============================================================================
// Cleanup
// ============================================================================
tap.test('delete-persistence: cleanup', async () => {
await client.close();
await localDb.stop();
cleanTmpDir(tmpDir);
});
export default tap.start();
+126
View File
@@ -0,0 +1,126 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as smartdb from '../ts/index.js';
import { MongoClient, Db } from 'mongodb';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
// ---------------------------------------------------------------------------
// Test: Missing data.rdb header recovery + startup logging
// Covers: ensure_data_header, BuildStats, info-level startup logging
// ---------------------------------------------------------------------------
let tmpDir: string;
let localDb: smartdb.LocalSmartDb;
let client: MongoClient;
let db: Db;
function makeTmpDir(): string {
return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-header-test-'));
}
function cleanTmpDir(dir: string): void {
if (fs.existsSync(dir)) {
fs.rmSync(dir, { recursive: true, force: true });
}
}
// ============================================================================
// Setup: create data, then corrupt it
// ============================================================================
tap.test('setup: start, insert data, stop', async () => {
tmpDir = makeTmpDir();
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
client = new MongoClient(info.connectionUri, {
directConnection: true,
serverSelectionTimeoutMS: 5000,
});
await client.connect();
db = client.db('headertest');
const coll = db.collection('docs');
await coll.insertMany([
{ key: 'a', val: 1 },
{ key: 'b', val: 2 },
{ key: 'c', val: 3 },
]);
await client.close();
await localDb.stop();
});
// ============================================================================
// Delete hint file and restart: should rebuild from data.rdb scan
// ============================================================================
tap.test('header-recovery: delete hint file and restart', async () => {
// Find and delete hint files
const dbDir = path.join(tmpDir, 'headertest', 'docs');
const hintPath = path.join(dbDir, 'keydir.hint');
if (fs.existsSync(hintPath)) {
fs.unlinkSync(hintPath);
}
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
client = new MongoClient(info.connectionUri, {
directConnection: true,
serverSelectionTimeoutMS: 5000,
});
await client.connect();
db = client.db('headertest');
});
tap.test('header-recovery: data intact after hint deletion', async () => {
const coll = db.collection('docs');
const count = await coll.countDocuments();
expect(count).toEqual(3);
const a = await coll.findOne({ key: 'a' });
expect(a!.val).toEqual(1);
});
// ============================================================================
// Write new data after restart, stop, restart again
// ============================================================================
tap.test('header-recovery: write after hint-less restart', async () => {
const coll = db.collection('docs');
await coll.insertOne({ key: 'd', val: 4 });
expect(await coll.countDocuments()).toEqual(4);
});
tap.test('header-recovery: restart and verify all data', async () => {
await client.close();
await localDb.stop();
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
client = new MongoClient(info.connectionUri, {
directConnection: true,
serverSelectionTimeoutMS: 5000,
});
await client.connect();
db = client.db('headertest');
const coll = db.collection('docs');
const count = await coll.countDocuments();
expect(count).toEqual(4);
const keys = (await coll.find({}).toArray()).map(d => d.key).sort();
expect(keys).toEqual(['a', 'b', 'c', 'd']);
});
// ============================================================================
// Cleanup
// ============================================================================
tap.test('header-recovery: cleanup', async () => {
await client.close();
await localDb.stop();
cleanTmpDir(tmpDir);
});
export default tap.start();
+82
View File
@@ -0,0 +1,82 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as smartdb from '../ts/index.js';
import * as fs from 'fs';
import * as net from 'net';
import * as path from 'path';
import * as os from 'os';
// ---------------------------------------------------------------------------
// Test: Stale socket cleanup on startup
// Covers: LocalSmartDb.cleanStaleSockets(), isSocketAlive()
// ---------------------------------------------------------------------------
function makeTmpDir(): string {
return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-socket-test-'));
}
function cleanTmpDir(dir: string): void {
if (fs.existsSync(dir)) {
fs.rmSync(dir, { recursive: true, force: true });
}
}
// ============================================================================
// Stale socket cleanup: active sockets are preserved
// ============================================================================
tap.test('stale-sockets: does not remove active sockets', async () => {
const tmpDir = makeTmpDir();
const activeSocketPath = path.join(os.tmpdir(), `smartdb-active-${Date.now()}.sock`);
// Create an active socket (server still listening)
const activeServer = net.createServer();
await new Promise<void>((resolve) => activeServer.listen(activeSocketPath, resolve));
expect(fs.existsSync(activeSocketPath)).toBeTrue();
// Start LocalSmartDb — should NOT remove the active socket
const localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
await localDb.start();
expect(fs.existsSync(activeSocketPath)).toBeTrue();
// Cleanup
await localDb.stop();
await new Promise<void>((resolve) => activeServer.close(() => resolve()));
try { fs.unlinkSync(activeSocketPath); } catch {}
cleanTmpDir(tmpDir);
});
// ============================================================================
// Stale socket cleanup: startup works with no stale sockets
// ============================================================================
tap.test('stale-sockets: startup works cleanly with no stale sockets', async () => {
const tmpDir = makeTmpDir();
const localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
expect(localDb.running).toBeTrue();
expect(info.socketPath).toBeTruthy();
await localDb.stop();
cleanTmpDir(tmpDir);
});
// ============================================================================
// Stale socket cleanup: the socket file for the current instance is cleaned on stop
// ============================================================================
tap.test('stale-sockets: own socket file is removed on stop', async () => {
const tmpDir = makeTmpDir();
const localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
expect(fs.existsSync(info.socketPath)).toBeTrue();
await localDb.stop();
// Socket file should be gone after graceful stop
expect(fs.existsSync(info.socketPath)).toBeFalse();
cleanTmpDir(tmpDir);
});
export default tap.start();
+180
View File
@@ -0,0 +1,180 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as smartdb from '../ts/index.js';
import { MongoClient, Db } from 'mongodb';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
// ---------------------------------------------------------------------------
// Test: Unique index enforcement via wire protocol
// Covers: unique index pre-check, createIndexes persistence, index restoration
// ---------------------------------------------------------------------------
let tmpDir: string;
let localDb: smartdb.LocalSmartDb;
let client: MongoClient;
let db: Db;
function makeTmpDir(): string {
return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-unique-test-'));
}
function cleanTmpDir(dir: string): void {
if (fs.existsSync(dir)) {
fs.rmSync(dir, { recursive: true, force: true });
}
}
// ============================================================================
// Setup
// ============================================================================
tap.test('setup: start local db', async () => {
tmpDir = makeTmpDir();
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
client = new MongoClient(info.connectionUri, {
directConnection: true,
serverSelectionTimeoutMS: 5000,
});
await client.connect();
db = client.db('uniquetest');
});
// ============================================================================
// Unique index enforcement on insert
// ============================================================================
tap.test('unique-index: createIndex with unique: true', async () => {
const coll = db.collection('users');
await coll.insertOne({ email: 'alice@example.com', name: 'Alice' });
const indexName = await coll.createIndex({ email: 1 }, { unique: true });
expect(indexName).toBeTruthy();
});
tap.test('unique-index: reject duplicate on insertOne', async () => {
const coll = db.collection('users');
let threw = false;
try {
await coll.insertOne({ email: 'alice@example.com', name: 'Alice2' });
} catch (err: any) {
threw = true;
expect(err.code).toEqual(11000);
}
expect(threw).toBeTrue();
// Verify only 1 document exists
const count = await coll.countDocuments();
expect(count).toEqual(1);
});
tap.test('unique-index: allow insert with different unique value', async () => {
const coll = db.collection('users');
await coll.insertOne({ email: 'bob@example.com', name: 'Bob' });
const count = await coll.countDocuments();
expect(count).toEqual(2);
});
// ============================================================================
// Unique index enforcement on update
// ============================================================================
tap.test('unique-index: reject duplicate on updateOne that changes unique field', async () => {
const coll = db.collection('users');
let threw = false;
try {
await coll.updateOne(
{ email: 'bob@example.com' },
{ $set: { email: 'alice@example.com' } }
);
} catch (err: any) {
threw = true;
expect(err.code).toEqual(11000);
}
expect(threw).toBeTrue();
// Bob's email should be unchanged
const bob = await coll.findOne({ name: 'Bob' });
expect(bob!.email).toEqual('bob@example.com');
});
tap.test('unique-index: allow update that keeps same unique value', async () => {
const coll = db.collection('users');
await coll.updateOne(
{ email: 'bob@example.com' },
{ $set: { name: 'Robert' } }
);
const bob = await coll.findOne({ email: 'bob@example.com' });
expect(bob!.name).toEqual('Robert');
});
// ============================================================================
// Unique index enforcement on upsert
// ============================================================================
tap.test('unique-index: reject duplicate on upsert insert', async () => {
const coll = db.collection('users');
let threw = false;
try {
await coll.updateOne(
{ email: 'new@example.com' },
{ $set: { email: 'alice@example.com', name: 'Imposter' } },
{ upsert: true }
);
} catch (err: any) {
threw = true;
}
expect(threw).toBeTrue();
});
// ============================================================================
// Unique index survives restart (persistence + restoration)
// ============================================================================
tap.test('unique-index: stop and restart', async () => {
await client.close();
await localDb.stop();
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
const info = await localDb.start();
client = new MongoClient(info.connectionUri, {
directConnection: true,
serverSelectionTimeoutMS: 5000,
});
await client.connect();
db = client.db('uniquetest');
});
tap.test('unique-index: enforcement persists after restart', async () => {
const coll = db.collection('users');
// Data should still be there
const count = await coll.countDocuments();
expect(count).toEqual(2);
// Unique constraint should still be enforced without calling createIndex again
let threw = false;
try {
await coll.insertOne({ email: 'alice@example.com', name: 'Alice3' });
} catch (err: any) {
threw = true;
expect(err.code).toEqual(11000);
}
expect(threw).toBeTrue();
// Count unchanged
const countAfter = await coll.countDocuments();
expect(countAfter).toEqual(2);
});
// ============================================================================
// Cleanup
// ============================================================================
tap.test('unique-index: cleanup', async () => {
await client.close();
await localDb.stop();
cleanTmpDir(tmpDir);
});
export default tap.start();
+1 -1
View File
@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartdb',
version: '2.3.0',
version: '2.6.2',
description: 'A MongoDB-compatible embedded database server with wire protocol support, backed by a high-performance Rust engine.'
}
+54
View File
@@ -1,4 +1,6 @@
import * as crypto from 'crypto';
import * as fs from 'fs/promises';
import * as net from 'net';
import * as path from 'path';
import * as os from 'os';
import { SmartdbServer } from '../ts_smartdb/index.js';
@@ -66,6 +68,55 @@ export class LocalSmartDb {
return path.join(os.tmpdir(), `smartdb-${randomId}.sock`);
}
/**
* Check if a Unix socket is alive by attempting to connect.
*/
private static isSocketAlive(socketPath: string): Promise<boolean> {
return new Promise((resolve) => {
const client = net.createConnection({ path: socketPath }, () => {
client.destroy();
resolve(true);
});
client.on('error', () => {
resolve(false);
});
client.setTimeout(500, () => {
client.destroy();
resolve(false);
});
});
}
/**
* Remove stale smartdb-*.sock files from /tmp.
* A socket is considered stale if connecting to it fails.
*/
private static async cleanStaleSockets(): Promise<void> {
const tmpDir = os.tmpdir();
let entries: string[];
try {
entries = await fs.readdir(tmpDir);
} catch {
return;
}
const socketFiles = entries.filter(
(f) => f.startsWith('smartdb-') && f.endsWith('.sock')
);
for (const name of socketFiles) {
const fullPath = path.join(tmpDir, name);
try {
const stat = await fs.stat(fullPath);
if (!stat.isSocket()) continue;
const alive = await LocalSmartDb.isSocketAlive(fullPath);
if (!alive) {
await fs.unlink(fullPath);
}
} catch {
// File may have been removed already; ignore
}
}
}
/**
* Start the local SmartDB server and return connection info
*/
@@ -74,6 +125,9 @@ export class LocalSmartDb {
throw new Error('LocalSmartDb is already running');
}
// Clean up stale sockets from previous crashed instances
await LocalSmartDb.cleanStaleSockets();
// Run storage migration before starting the Rust engine
const migrator = new StorageMigrator(this.options.folderPath);
await migrator.run();
File diff suppressed because one or more lines are too long