Compare commits
31 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 8ebc1bb9e1 | |||
| 3fc21dcd99 | |||
| ad5e0e8a72 | |||
| c384df20ce | |||
| 4e944f3d05 | |||
| e0455daa2e | |||
| f3f1afe9af | |||
| 94dc9cfc3f | |||
| a9c0ced1ca | |||
| c8626a9afd | |||
| 55a1f66e57 | |||
| 5b5f35821f | |||
| e8161e6417 | |||
| 1a10c32b12 | |||
| cb8cb87d9f | |||
| 96117d54b9 | |||
| 53f58e45c3 | |||
| 34d708be7e | |||
| 418e8dc052 | |||
| b8567ebe08 | |||
| 827bfa6370 | |||
| ceba64e34a | |||
| 8646d58f06 | |||
| 8ce6ff11c3 | |||
| 5c7aaebaba | |||
| be7d086c0b | |||
| 91a7b69f1d | |||
| 4e078b35d4 | |||
| d8a8259c73 | |||
| 9e7ce25b45 | |||
| b634ee50d1 |
@@ -13,5 +13,8 @@ rust/target/
|
|||||||
package-lock.json
|
package-lock.json
|
||||||
yarn.lock
|
yarn.lock
|
||||||
|
|
||||||
|
# generated bundle (rebuilt on every build, embeds version)
|
||||||
|
ts_debugserver/bundled.ts
|
||||||
|
|
||||||
# playwright
|
# playwright
|
||||||
.playwright-mcp/
|
.playwright-mcp/
|
||||||
|
|||||||
@@ -1,5 +1,89 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.5.7 - fix(repo)
|
||||||
|
no changes to commit
|
||||||
|
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.5.6 - fix(repo)
|
||||||
|
no changes to commit
|
||||||
|
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.5.5 - fix(repo)
|
||||||
|
no changes to commit
|
||||||
|
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.5.4 - fix(package)
|
||||||
|
bump package version to 2.5.3
|
||||||
|
|
||||||
|
- Updates the package metadata version by one patch release.
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.5.3 - fix(rustdb-commands)
|
||||||
|
restore persisted index initialization before writes to enforce unique constraints after restart
|
||||||
|
|
||||||
|
- load stored index specifications from storage when creating command context index engines
|
||||||
|
- rebuild index data from existing documents so custom indexes are active before insert, update, and upsert operations
|
||||||
|
- add @push.rocks/smartdata as a runtime dependency
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.5.2 - fix(rustdb-indexes)
|
||||||
|
persist created indexes and restore them on server startup
|
||||||
|
|
||||||
|
- Save index specifications to storage when indexes are created.
|
||||||
|
- Remove persisted index metadata when indexes are dropped by name, key spec, or wildcard.
|
||||||
|
- Rebuild in-memory index engines from stored definitions and existing documents during startup.
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.5.1 - fix(docs)
|
||||||
|
update project documentation
|
||||||
|
|
||||||
|
- Modifies a single documentation-related file with a minimal text change.
|
||||||
|
- No source code, API, or package metadata changes are indicated in the diff summary.
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.5.0 - feat(storage)
|
||||||
|
add offline data validation and strengthen storage/index integrity checks
|
||||||
|
|
||||||
|
- adds a `--validate-data <PATH>` CLI mode to run offline integrity checks on storage directories
|
||||||
|
- introduces storage validation reporting for headers, checksums, duplicate ids, tombstones, and stale or orphaned hint entries
|
||||||
|
- pre-checks unique index constraints before insert, update, upsert, and findAndModify writes to prevent duplicate-key violations before storage changes
|
||||||
|
- validates hint files against data files during collection load and rebuilds indexes from data when hints are stale
|
||||||
|
- ensures new data files always receive a SMARTDB header and persists fresh hint files after successful compaction
|
||||||
|
- cleans up stale local Unix socket files before starting the TypeScript local server
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.4.1 - fix(package)
|
||||||
|
update package metadata
|
||||||
|
|
||||||
|
- Adjusts package manifest content with a minimal one-line change.
|
||||||
|
|
||||||
|
## 2026-04-05 - 2.4.0 - feat(rustdb)
|
||||||
|
add restore and periodic persistence support for in-memory storage
|
||||||
|
|
||||||
|
- Restore previously persisted state during startup when a persist path is configured.
|
||||||
|
- Spawn a background task to periodically persist in-memory data using the configured interval.
|
||||||
|
- Warn when running purely in-memory without durable persistence configured.
|
||||||
|
|
||||||
|
## 2026-04-04 - 2.3.1 - fix(package)
|
||||||
|
update package metadata
|
||||||
|
|
||||||
|
- Adjusts a single package-level metadata entry in the project configuration.
|
||||||
|
|
||||||
|
## 2026-04-04 - 2.3.0 - feat(test)
|
||||||
|
add integration coverage for file storage, compaction, migration, and LocalSmartDb workflows
|
||||||
|
|
||||||
|
- adds end-to-end tests for file-backed storage creation, CRUD operations, bulk updates, persistence, and index file generation
|
||||||
|
- adds compaction stress tests covering repeated updates, tombstones, file shrinking behavior, and restart integrity
|
||||||
|
- adds migration tests for automatic v0 JSON layout detection, v1 conversion, restart persistence, and post-migration writes
|
||||||
|
- adds LocalSmartDb lifecycle and unix socket tests, including restart persistence, custom socket paths, and database isolation
|
||||||
|
|
||||||
|
## 2026-04-04 - 2.2.0 - feat(storage)
|
||||||
|
add Bitcask storage migration, binary WAL, and data compaction support
|
||||||
|
|
||||||
|
- add TypeScript storage migration from legacy JSON collections to the v1 Bitcask binary format before starting the Rust engine
|
||||||
|
- replace the legacy JSON WAL with a binary write-ahead log plus shared binary record and KeyDir infrastructure in rustdb-storage
|
||||||
|
- introduce data file compaction with dead-record reclamation and tests, and add the bson dependency for BSON serialization during migration
|
||||||
|
|
||||||
|
## 2026-04-02 - 2.1.1 - fix(package)
|
||||||
|
update package metadata
|
||||||
|
|
||||||
|
- Adjusts a single package metadata entry in package.json.
|
||||||
|
|
||||||
## 2026-04-02 - 2.1.0 - feat(smartdb)
|
## 2026-04-02 - 2.1.0 - feat(smartdb)
|
||||||
add operation log APIs, point-in-time revert support, and a web-based debug dashboard
|
add operation log APIs, point-in-time revert support, and a web-based debug dashboard
|
||||||
|
|
||||||
|
|||||||
+4
-2
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@push.rocks/smartdb",
|
"name": "@push.rocks/smartdb",
|
||||||
"version": "2.1.0",
|
"version": "2.5.7",
|
||||||
"private": false,
|
"private": false,
|
||||||
"description": "A MongoDB-compatible embedded database server with wire protocol support, backed by a high-performance Rust engine.",
|
"description": "A MongoDB-compatible embedded database server with wire protocol support, backed by a high-performance Rust engine.",
|
||||||
"exports": {
|
"exports": {
|
||||||
@@ -29,7 +29,9 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@api.global/typedserver": "^8.0.0",
|
"@api.global/typedserver": "^8.0.0",
|
||||||
"@design.estate/dees-element": "^2.0.0",
|
"@design.estate/dees-element": "^2.0.0",
|
||||||
"@push.rocks/smartrust": "^1.3.2"
|
"@push.rocks/smartdata": "7.1.5",
|
||||||
|
"@push.rocks/smartrust": "^1.3.2",
|
||||||
|
"bson": "^7.2.0"
|
||||||
},
|
},
|
||||||
"browserslist": [
|
"browserslist": [
|
||||||
"last 1 chrome versions"
|
"last 1 chrome versions"
|
||||||
|
|||||||
Generated
+10
-4
@@ -14,9 +14,15 @@ importers:
|
|||||||
'@design.estate/dees-element':
|
'@design.estate/dees-element':
|
||||||
specifier: ^2.0.0
|
specifier: ^2.0.0
|
||||||
version: 2.2.3
|
version: 2.2.3
|
||||||
|
'@push.rocks/smartdata':
|
||||||
|
specifier: 7.1.5
|
||||||
|
version: 7.1.5(socks@2.8.7)
|
||||||
'@push.rocks/smartrust':
|
'@push.rocks/smartrust':
|
||||||
specifier: ^1.3.2
|
specifier: ^1.3.2
|
||||||
version: 1.3.2
|
version: 1.3.2
|
||||||
|
bson:
|
||||||
|
specifier: ^7.2.0
|
||||||
|
version: 7.2.0
|
||||||
devDependencies:
|
devDependencies:
|
||||||
'@git.zone/tsbuild':
|
'@git.zone/tsbuild':
|
||||||
specifier: ^4.4.0
|
specifier: ^4.4.0
|
||||||
@@ -1023,8 +1029,8 @@ packages:
|
|||||||
'@push.rocks/smartcrypto@2.0.4':
|
'@push.rocks/smartcrypto@2.0.4':
|
||||||
resolution: {integrity: sha512-1+/5bsjyataf5uUkUNnnVXGRAt+gHVk1KDzozjTqgqJxHvQk1d9fVDohL6CxUhUucTPtu5VR5xNBiV8YCDuGyw==}
|
resolution: {integrity: sha512-1+/5bsjyataf5uUkUNnnVXGRAt+gHVk1KDzozjTqgqJxHvQk1d9fVDohL6CxUhUucTPtu5VR5xNBiV8YCDuGyw==}
|
||||||
|
|
||||||
'@push.rocks/smartdata@7.1.3':
|
'@push.rocks/smartdata@7.1.5':
|
||||||
resolution: {integrity: sha512-7vQJ9pdRk450yn2m9tmGPdSRlQVmxFPZjHD4sGYsfqCQPg+GLFusu+H16zpf+jKzAq4F2ZBMPaYymJHXvXiVcw==}
|
resolution: {integrity: sha512-7x7VedEg6RocWndqUPuTbY2Bh85Q/x0LOVHL4o/NVXyh3IGNtiVQ8ple4WR0qYqlHRAojX4eDSBPMiYzIasqAg==}
|
||||||
|
|
||||||
'@push.rocks/smartdelay@3.0.5':
|
'@push.rocks/smartdelay@3.0.5':
|
||||||
resolution: {integrity: sha512-mUuI7kj2f7ztjpic96FvRIlf2RsKBa5arw81AHNsndbxO6asRcxuWL8dTVxouEIK8YsBUlj0AsrCkHhMbLQdHw==}
|
resolution: {integrity: sha512-mUuI7kj2f7ztjpic96FvRIlf2RsKBa5arw81AHNsndbxO6asRcxuWL8dTVxouEIK8YsBUlj0AsrCkHhMbLQdHw==}
|
||||||
@@ -5662,7 +5668,7 @@ snapshots:
|
|||||||
'@types/node-forge': 1.3.14
|
'@types/node-forge': 1.3.14
|
||||||
node-forge: 1.4.0
|
node-forge: 1.4.0
|
||||||
|
|
||||||
'@push.rocks/smartdata@7.1.3(socks@2.8.7)':
|
'@push.rocks/smartdata@7.1.5(socks@2.8.7)':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@push.rocks/lik': 6.4.0
|
'@push.rocks/lik': 6.4.0
|
||||||
'@push.rocks/smartdelay': 3.0.5
|
'@push.rocks/smartdelay': 3.0.5
|
||||||
@@ -5896,7 +5902,7 @@ snapshots:
|
|||||||
'@push.rocks/smartmongo@5.1.1(socks@2.8.7)':
|
'@push.rocks/smartmongo@5.1.1(socks@2.8.7)':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@push.rocks/mongodump': 1.1.0(socks@2.8.7)
|
'@push.rocks/mongodump': 1.1.0(socks@2.8.7)
|
||||||
'@push.rocks/smartdata': 7.1.3(socks@2.8.7)
|
'@push.rocks/smartdata': 7.1.5(socks@2.8.7)
|
||||||
'@push.rocks/smartfs': 1.5.0
|
'@push.rocks/smartfs': 1.5.0
|
||||||
'@push.rocks/smartpath': 6.0.0
|
'@push.rocks/smartpath': 6.0.0
|
||||||
'@push.rocks/smartpromise': 4.2.3
|
'@push.rocks/smartpromise': 4.2.3
|
||||||
|
|||||||
Generated
+39
@@ -275,6 +275,12 @@ dependencies = [
|
|||||||
"windows-sys",
|
"windows-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fastrand"
|
||||||
|
version = "2.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a043dc74da1e37d6afe657061213aa6f425f855399a11d3463c6ecccc4dfda1f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "find-msvc-tools"
|
name = "find-msvc-tools"
|
||||||
version = "0.1.9"
|
version = "0.1.9"
|
||||||
@@ -477,6 +483,12 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linux-raw-sys"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
version = "0.4.14"
|
version = "0.4.14"
|
||||||
@@ -802,6 +814,7 @@ dependencies = [
|
|||||||
"dashmap",
|
"dashmap",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tracing",
|
"tracing",
|
||||||
@@ -835,6 +848,19 @@ dependencies = [
|
|||||||
"tracing",
|
"tracing",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustix"
|
||||||
|
version = "1.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"errno",
|
||||||
|
"libc",
|
||||||
|
"linux-raw-sys",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustversion"
|
name = "rustversion"
|
||||||
version = "1.0.22"
|
version = "1.0.22"
|
||||||
@@ -977,6 +1003,19 @@ version = "1.0.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
|
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tempfile"
|
||||||
|
version = "3.27.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
|
||||||
|
dependencies = [
|
||||||
|
"fastrand",
|
||||||
|
"getrandom 0.4.2",
|
||||||
|
"once_cell",
|
||||||
|
"rustix",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "2.0.18"
|
version = "2.0.18"
|
||||||
|
|||||||
@@ -66,6 +66,9 @@ uuid = { version = "1", features = ["v4", "serde"] }
|
|||||||
# Async traits
|
# Async traits
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
|
|
||||||
|
# Test utilities
|
||||||
|
tempfile = "3"
|
||||||
|
|
||||||
# Internal crates
|
# Internal crates
|
||||||
rustdb-config = { path = "crates/rustdb-config" }
|
rustdb-config = { path = "crates/rustdb-config" }
|
||||||
rustdb-wire = { path = "crates/rustdb-wire" }
|
rustdb-wire = { path = "crates/rustdb-wire" }
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use bson::Document;
|
use bson::{Bson, Document};
|
||||||
use dashmap::DashMap;
|
use dashmap::DashMap;
|
||||||
use rustdb_index::IndexEngine;
|
use rustdb_index::{IndexEngine, IndexOptions};
|
||||||
use rustdb_storage::{OpLog, StorageAdapter};
|
use rustdb_storage::{OpLog, StorageAdapter};
|
||||||
use rustdb_txn::{SessionEngine, TransactionEngine};
|
use rustdb_txn::{SessionEngine, TransactionEngine};
|
||||||
|
|
||||||
@@ -24,6 +24,67 @@ pub struct CommandContext {
|
|||||||
pub oplog: Arc<OpLog>,
|
pub oplog: Arc<OpLog>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl CommandContext {
|
||||||
|
/// Get or lazily initialize an IndexEngine for a namespace.
|
||||||
|
///
|
||||||
|
/// If no IndexEngine exists yet for this namespace, loads persisted index
|
||||||
|
/// specs from `indexes.json` via the storage adapter, creates the engine
|
||||||
|
/// with those specs, and rebuilds index data from existing documents.
|
||||||
|
/// This ensures unique indexes are enforced even on the very first write
|
||||||
|
/// after a restart.
|
||||||
|
pub async fn get_or_init_index_engine(&self, db: &str, coll: &str) -> dashmap::mapref::one::RefMut<'_, String, IndexEngine> {
|
||||||
|
let ns_key = format!("{}.{}", db, coll);
|
||||||
|
|
||||||
|
// Fast path: engine already exists.
|
||||||
|
if self.indexes.contains_key(&ns_key) {
|
||||||
|
return self.indexes.entry(ns_key).or_insert_with(IndexEngine::new);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slow path: load from persisted specs.
|
||||||
|
let mut engine = IndexEngine::new();
|
||||||
|
let mut has_custom = false;
|
||||||
|
|
||||||
|
if let Ok(specs) = self.storage.get_indexes(db, coll).await {
|
||||||
|
for spec in &specs {
|
||||||
|
let name = spec.get_str("name").unwrap_or("").to_string();
|
||||||
|
if name == "_id_" || name.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let key = match spec.get("key") {
|
||||||
|
Some(Bson::Document(k)) => k.clone(),
|
||||||
|
_ => continue,
|
||||||
|
};
|
||||||
|
let unique = matches!(spec.get("unique"), Some(Bson::Boolean(true)));
|
||||||
|
let sparse = matches!(spec.get("sparse"), Some(Bson::Boolean(true)));
|
||||||
|
let expire_after_seconds = match spec.get("expireAfterSeconds") {
|
||||||
|
Some(Bson::Int32(n)) => Some(*n as u64),
|
||||||
|
Some(Bson::Int64(n)) => Some(*n as u64),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
let options = IndexOptions {
|
||||||
|
name: Some(name),
|
||||||
|
unique,
|
||||||
|
sparse,
|
||||||
|
expire_after_seconds,
|
||||||
|
};
|
||||||
|
let _ = engine.create_index(key, options);
|
||||||
|
has_custom = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if has_custom {
|
||||||
|
// Rebuild index data from existing documents.
|
||||||
|
if let Ok(docs) = self.storage.find_all(db, coll).await {
|
||||||
|
if !docs.is_empty() {
|
||||||
|
engine.rebuild_from_documents(&docs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.indexes.entry(ns_key).or_insert(engine)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// State of an open cursor from a find or aggregate command.
|
/// State of an open cursor from a find or aggregate command.
|
||||||
pub struct CursorState {
|
pub struct CursorState {
|
||||||
/// Documents remaining to be returned.
|
/// Documents remaining to be returned.
|
||||||
|
|||||||
@@ -101,7 +101,15 @@ async fn handle_create_indexes(
|
|||||||
expire_after_seconds,
|
expire_after_seconds,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Create the index.
|
let options_for_persist = IndexOptions {
|
||||||
|
name: options.name.clone(),
|
||||||
|
unique: options.unique,
|
||||||
|
sparse: options.sparse,
|
||||||
|
expire_after_seconds: options.expire_after_seconds,
|
||||||
|
};
|
||||||
|
let key_for_persist = key.clone();
|
||||||
|
|
||||||
|
// Create the index in-memory.
|
||||||
let mut engine = ctx
|
let mut engine = ctx
|
||||||
.indexes
|
.indexes
|
||||||
.entry(ns_key.clone())
|
.entry(ns_key.clone())
|
||||||
@@ -110,6 +118,22 @@ async fn handle_create_indexes(
|
|||||||
match engine.create_index(key, options) {
|
match engine.create_index(key, options) {
|
||||||
Ok(index_name) => {
|
Ok(index_name) => {
|
||||||
debug!(index_name = %index_name, "Created index");
|
debug!(index_name = %index_name, "Created index");
|
||||||
|
|
||||||
|
// Persist index spec to disk.
|
||||||
|
let mut spec = doc! { "key": key_for_persist };
|
||||||
|
if options_for_persist.unique {
|
||||||
|
spec.insert("unique", true);
|
||||||
|
}
|
||||||
|
if options_for_persist.sparse {
|
||||||
|
spec.insert("sparse", true);
|
||||||
|
}
|
||||||
|
if let Some(ttl) = options_for_persist.expire_after_seconds {
|
||||||
|
spec.insert("expireAfterSeconds", ttl as i64);
|
||||||
|
}
|
||||||
|
if let Err(e) = ctx.storage.save_index(db, coll, &index_name, spec).await {
|
||||||
|
tracing::warn!(index = %index_name, error = %e, "failed to persist index spec");
|
||||||
|
}
|
||||||
|
|
||||||
created_count += 1;
|
created_count += 1;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@@ -180,9 +204,21 @@ async fn handle_drop_indexes(
|
|||||||
match index_spec {
|
match index_spec {
|
||||||
Some(Bson::String(name)) if name == "*" => {
|
Some(Bson::String(name)) if name == "*" => {
|
||||||
// Drop all indexes except _id_.
|
// Drop all indexes except _id_.
|
||||||
|
// Collect names to drop from storage first.
|
||||||
|
let names_to_drop: Vec<String> = if let Some(engine) = ctx.indexes.get(&ns_key) {
|
||||||
|
engine.list_indexes().iter()
|
||||||
|
.filter(|info| info.name != "_id_")
|
||||||
|
.map(|info| info.name.clone())
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
Vec::new()
|
||||||
|
};
|
||||||
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
|
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
|
||||||
engine.drop_all_indexes();
|
engine.drop_all_indexes();
|
||||||
}
|
}
|
||||||
|
for idx_name in &names_to_drop {
|
||||||
|
let _ = ctx.storage.drop_index(db, coll, idx_name).await;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Some(Bson::String(name)) => {
|
Some(Bson::String(name)) => {
|
||||||
// Drop by name.
|
// Drop by name.
|
||||||
@@ -196,6 +232,7 @@ async fn handle_drop_indexes(
|
|||||||
name
|
name
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
let _ = ctx.storage.drop_index(db, coll, name).await;
|
||||||
}
|
}
|
||||||
Some(Bson::Document(key_spec)) => {
|
Some(Bson::Document(key_spec)) => {
|
||||||
// Drop by key spec: find the index with matching key.
|
// Drop by key spec: find the index with matching key.
|
||||||
@@ -210,6 +247,7 @@ async fn handle_drop_indexes(
|
|||||||
engine.drop_index(&name).map_err(|e| {
|
engine.drop_index(&name).map_err(|e| {
|
||||||
CommandError::IndexError(e.to_string())
|
CommandError::IndexError(e.to_string())
|
||||||
})?;
|
})?;
|
||||||
|
let _ = ctx.storage.drop_index(db, coll, &name).await;
|
||||||
} else {
|
} else {
|
||||||
return Err(CommandError::IndexError(
|
return Err(CommandError::IndexError(
|
||||||
"index not found with specified key".into(),
|
"index not found with specified key".into(),
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use bson::{doc, oid::ObjectId, Bson, Document};
|
use bson::{doc, oid::ObjectId, Bson, Document};
|
||||||
use rustdb_index::IndexEngine;
|
|
||||||
use rustdb_storage::OpType;
|
use rustdb_storage::OpType;
|
||||||
use tracing::{debug, warn};
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::context::CommandContext;
|
use crate::context::CommandContext;
|
||||||
use crate::error::{CommandError, CommandResult};
|
use crate::error::{CommandError, CommandResult};
|
||||||
@@ -56,12 +55,35 @@ pub async fn handle(
|
|||||||
let mut inserted_count: i32 = 0;
|
let mut inserted_count: i32 = 0;
|
||||||
let mut write_errors: Vec<Document> = Vec::new();
|
let mut write_errors: Vec<Document> = Vec::new();
|
||||||
|
|
||||||
|
// Ensure the IndexEngine is loaded (with persisted specs from indexes.json).
|
||||||
|
// This must happen BEFORE any writes, so unique constraints are enforced
|
||||||
|
// even on the first write after a restart.
|
||||||
|
drop(ctx.get_or_init_index_engine(db, coll).await);
|
||||||
|
|
||||||
for (idx, mut doc) in docs.into_iter().enumerate() {
|
for (idx, mut doc) in docs.into_iter().enumerate() {
|
||||||
// Auto-generate _id if not present.
|
// Auto-generate _id if not present.
|
||||||
if !doc.contains_key("_id") {
|
if !doc.contains_key("_id") {
|
||||||
doc.insert("_id", ObjectId::new());
|
doc.insert("_id", ObjectId::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pre-check unique index constraints BEFORE storage write.
|
||||||
|
// The engine is guaranteed to exist from the get_or_init call above.
|
||||||
|
if let Some(engine) = ctx.indexes.get(&ns_key) {
|
||||||
|
if let Err(e) = engine.check_unique_constraints(&doc) {
|
||||||
|
let err_msg = e.to_string();
|
||||||
|
write_errors.push(doc! {
|
||||||
|
"index": idx as i32,
|
||||||
|
"code": 11000_i32,
|
||||||
|
"codeName": "DuplicateKey",
|
||||||
|
"errmsg": &err_msg,
|
||||||
|
});
|
||||||
|
if ordered {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Attempt storage insert.
|
// Attempt storage insert.
|
||||||
match ctx.storage.insert_one(db, coll, doc.clone()).await {
|
match ctx.storage.insert_one(db, coll, doc.clone()).await {
|
||||||
Ok(id_str) => {
|
Ok(id_str) => {
|
||||||
@@ -75,18 +97,16 @@ pub async fn handle(
|
|||||||
None,
|
None,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Update index engine.
|
// Update index engine (already initialized above).
|
||||||
let mut engine = ctx
|
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
|
||||||
.indexes
|
|
||||||
.entry(ns_key.clone())
|
|
||||||
.or_insert_with(IndexEngine::new);
|
|
||||||
if let Err(e) = engine.on_insert(&doc) {
|
if let Err(e) = engine.on_insert(&doc) {
|
||||||
warn!(
|
tracing::error!(
|
||||||
namespace = %ns_key,
|
namespace = %ns_key,
|
||||||
error = %e,
|
error = %e,
|
||||||
"index update failed after successful insert"
|
"index update failed after successful insert"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
inserted_count += 1;
|
inserted_count += 1;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use bson::{doc, oid::ObjectId, Bson, Document};
|
use bson::{doc, oid::ObjectId, Bson, Document};
|
||||||
use rustdb_index::IndexEngine;
|
|
||||||
use rustdb_query::{QueryMatcher, UpdateEngine, sort_documents, apply_projection};
|
use rustdb_query::{QueryMatcher, UpdateEngine, sort_documents, apply_projection};
|
||||||
use rustdb_storage::OpType;
|
use rustdb_storage::OpType;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
@@ -47,6 +46,10 @@ async fn handle_update(
|
|||||||
ensure_collection_exists(db, coll, ctx).await?;
|
ensure_collection_exists(db, coll, ctx).await?;
|
||||||
|
|
||||||
let ns_key = format!("{}.{}", db, coll);
|
let ns_key = format!("{}.{}", db, coll);
|
||||||
|
|
||||||
|
// Ensure the IndexEngine is loaded with persisted specs from indexes.json.
|
||||||
|
drop(ctx.get_or_init_index_engine(db, coll).await);
|
||||||
|
|
||||||
let mut total_n: i32 = 0;
|
let mut total_n: i32 = 0;
|
||||||
let mut total_n_modified: i32 = 0;
|
let mut total_n_modified: i32 = 0;
|
||||||
let mut upserted_list: Vec<Document> = Vec::new();
|
let mut upserted_list: Vec<Document> = Vec::new();
|
||||||
@@ -150,6 +153,22 @@ async fn handle_update(
|
|||||||
updated.get("_id").unwrap().clone()
|
updated.get("_id").unwrap().clone()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Pre-check unique index constraints before upsert insert.
|
||||||
|
if let Some(engine) = ctx.indexes.get(&ns_key) {
|
||||||
|
if let Err(e) = engine.check_unique_constraints(&updated) {
|
||||||
|
write_errors.push(doc! {
|
||||||
|
"index": idx as i32,
|
||||||
|
"code": 11000_i32,
|
||||||
|
"codeName": "DuplicateKey",
|
||||||
|
"errmsg": e.to_string(),
|
||||||
|
});
|
||||||
|
if ordered {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Insert the new document.
|
// Insert the new document.
|
||||||
match ctx.storage.insert_one(db, coll, updated.clone()).await {
|
match ctx.storage.insert_one(db, coll, updated.clone()).await {
|
||||||
Ok(id_str) => {
|
Ok(id_str) => {
|
||||||
@@ -163,12 +182,12 @@ async fn handle_update(
|
|||||||
None,
|
None,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Update index.
|
// Update index (engine already initialized above).
|
||||||
let mut engine = ctx
|
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
|
||||||
.indexes
|
if let Err(e) = engine.on_insert(&updated) {
|
||||||
.entry(ns_key.clone())
|
tracing::error!(namespace = %ns_key, error = %e, "index update failed after upsert insert");
|
||||||
.or_insert_with(IndexEngine::new);
|
}
|
||||||
let _ = engine.on_insert(&updated);
|
}
|
||||||
|
|
||||||
total_n += 1;
|
total_n += 1;
|
||||||
upserted_list.push(doc! {
|
upserted_list.push(doc! {
|
||||||
@@ -216,6 +235,22 @@ async fn handle_update(
|
|||||||
array_filters.as_deref(),
|
array_filters.as_deref(),
|
||||||
) {
|
) {
|
||||||
Ok(updated_doc) => {
|
Ok(updated_doc) => {
|
||||||
|
// Pre-check unique index constraints before storage write.
|
||||||
|
if let Some(engine) = ctx.indexes.get(&ns_key) {
|
||||||
|
if let Err(e) = engine.check_unique_constraints_for_update(matched_doc, &updated_doc) {
|
||||||
|
write_errors.push(doc! {
|
||||||
|
"index": idx as i32,
|
||||||
|
"code": 11000_i32,
|
||||||
|
"codeName": "DuplicateKey",
|
||||||
|
"errmsg": e.to_string(),
|
||||||
|
});
|
||||||
|
if ordered {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let id_str = extract_id_string(matched_doc);
|
let id_str = extract_id_string(matched_doc);
|
||||||
match ctx
|
match ctx
|
||||||
.storage
|
.storage
|
||||||
@@ -235,7 +270,9 @@ async fn handle_update(
|
|||||||
|
|
||||||
// Update index.
|
// Update index.
|
||||||
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
|
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
|
||||||
let _ = engine.on_update(matched_doc, &updated_doc);
|
if let Err(e) = engine.on_update(matched_doc, &updated_doc) {
|
||||||
|
tracing::error!(namespace = %ns_key, error = %e, "index update failed after update");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
total_n += 1;
|
total_n += 1;
|
||||||
// Check if the document actually changed.
|
// Check if the document actually changed.
|
||||||
@@ -366,6 +403,9 @@ async fn handle_find_and_modify(
|
|||||||
|
|
||||||
let ns_key = format!("{}.{}", db, coll);
|
let ns_key = format!("{}.{}", db, coll);
|
||||||
|
|
||||||
|
// Ensure the IndexEngine is loaded with persisted specs.
|
||||||
|
drop(ctx.get_or_init_index_engine(db, coll).await);
|
||||||
|
|
||||||
// Load and filter documents.
|
// Load and filter documents.
|
||||||
let mut matched = load_filtered_docs(db, coll, &query, &ns_key, ctx).await?;
|
let mut matched = load_filtered_docs(db, coll, &query, &ns_key, ctx).await?;
|
||||||
|
|
||||||
@@ -444,6 +484,13 @@ async fn handle_find_and_modify(
|
|||||||
)
|
)
|
||||||
.map_err(|e| CommandError::InternalError(e.to_string()))?;
|
.map_err(|e| CommandError::InternalError(e.to_string()))?;
|
||||||
|
|
||||||
|
// Pre-check unique index constraints before storage write.
|
||||||
|
if let Some(engine) = ctx.indexes.get(&ns_key) {
|
||||||
|
if let Err(e) = engine.check_unique_constraints_for_update(&original_doc, &updated_doc) {
|
||||||
|
return Err(CommandError::StorageError(e.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let id_str = extract_id_string(&original_doc);
|
let id_str = extract_id_string(&original_doc);
|
||||||
ctx.storage
|
ctx.storage
|
||||||
.update_by_id(db, coll, &id_str, updated_doc.clone())
|
.update_by_id(db, coll, &id_str, updated_doc.clone())
|
||||||
@@ -461,7 +508,9 @@ async fn handle_find_and_modify(
|
|||||||
|
|
||||||
// Update index.
|
// Update index.
|
||||||
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
|
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
|
||||||
let _ = engine.on_update(&original_doc, &updated_doc);
|
if let Err(e) = engine.on_update(&original_doc, &updated_doc) {
|
||||||
|
tracing::error!(namespace = %ns_key, error = %e, "index update failed after findAndModify update");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let return_doc = if return_new {
|
let return_doc = if return_new {
|
||||||
@@ -505,6 +554,13 @@ async fn handle_find_and_modify(
|
|||||||
updated_doc.get("_id").unwrap().clone()
|
updated_doc.get("_id").unwrap().clone()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Pre-check unique index constraints before upsert insert.
|
||||||
|
if let Some(engine) = ctx.indexes.get(&ns_key) {
|
||||||
|
if let Err(e) = engine.check_unique_constraints(&updated_doc) {
|
||||||
|
return Err(CommandError::StorageError(e.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let inserted_id_str = ctx.storage
|
let inserted_id_str = ctx.storage
|
||||||
.insert_one(db, coll, updated_doc.clone())
|
.insert_one(db, coll, updated_doc.clone())
|
||||||
.await?;
|
.await?;
|
||||||
@@ -521,11 +577,11 @@ async fn handle_find_and_modify(
|
|||||||
|
|
||||||
// Update index.
|
// Update index.
|
||||||
{
|
{
|
||||||
let mut engine = ctx
|
if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
|
||||||
.indexes
|
if let Err(e) = engine.on_insert(&updated_doc) {
|
||||||
.entry(ns_key.clone())
|
tracing::error!(namespace = %ns_key, error = %e, "index update failed after findAndModify upsert");
|
||||||
.or_insert_with(IndexEngine::new);
|
}
|
||||||
let _ = engine.on_insert(&updated_doc);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let value = if return_new {
|
let value = if return_new {
|
||||||
|
|||||||
@@ -153,6 +153,55 @@ impl IndexEngine {
|
|||||||
self.indexes.contains_key(name)
|
self.indexes.contains_key(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check unique constraints for a document without modifying the index.
|
||||||
|
/// Returns Ok(()) if no conflict, Err(DuplicateKey) if a unique constraint
|
||||||
|
/// would be violated. This is a read-only check (immutable &self).
|
||||||
|
pub fn check_unique_constraints(&self, doc: &Document) -> Result<(), IndexError> {
|
||||||
|
for idx in self.indexes.values() {
|
||||||
|
if idx.unique {
|
||||||
|
let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse);
|
||||||
|
if let Some(ref kb) = key_bytes {
|
||||||
|
if let Some(existing_ids) = idx.hash.get(kb) {
|
||||||
|
if !existing_ids.is_empty() {
|
||||||
|
return Err(IndexError::DuplicateKey {
|
||||||
|
index: idx.name.clone(),
|
||||||
|
key: format!("{:?}", kb),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check unique constraints for an update, excluding the document being updated.
|
||||||
|
/// Returns Ok(()) if no conflict. This is a read-only check (immutable &self).
|
||||||
|
pub fn check_unique_constraints_for_update(
|
||||||
|
&self,
|
||||||
|
old_doc: &Document,
|
||||||
|
new_doc: &Document,
|
||||||
|
) -> Result<(), IndexError> {
|
||||||
|
let doc_id = Self::extract_id(old_doc);
|
||||||
|
for idx in self.indexes.values() {
|
||||||
|
if idx.unique {
|
||||||
|
let new_key_bytes = Self::extract_key_bytes(new_doc, &idx.key, idx.sparse);
|
||||||
|
if let Some(ref kb) = new_key_bytes {
|
||||||
|
if let Some(existing_ids) = idx.hash.get(kb) {
|
||||||
|
let has_conflict = existing_ids.iter().any(|id| *id != doc_id);
|
||||||
|
if has_conflict {
|
||||||
|
return Err(IndexError::DuplicateKey {
|
||||||
|
index: idx.name.clone(),
|
||||||
|
key: format!("{:?}", kb),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Notify the engine that a document has been inserted.
|
/// Notify the engine that a document has been inserted.
|
||||||
/// Checks unique constraints and updates all index structures.
|
/// Checks unique constraints and updates all index structures.
|
||||||
pub fn on_insert(&mut self, doc: &Document) -> Result<(), IndexError> {
|
pub fn on_insert(&mut self, doc: &Document) -> Result<(), IndexError> {
|
||||||
|
|||||||
@@ -17,3 +17,6 @@ tracing = { workspace = true }
|
|||||||
crc32fast = { workspace = true }
|
crc32fast = { workspace = true }
|
||||||
uuid = { workspace = true }
|
uuid = { workspace = true }
|
||||||
async-trait = { workspace = true }
|
async-trait = { workspace = true }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
tempfile = { workspace = true }
|
||||||
|
|||||||
@@ -0,0 +1,499 @@
|
|||||||
|
//! Binary Write-Ahead Log for crash recovery.
|
||||||
|
//!
|
||||||
|
//! # Protocol
|
||||||
|
//!
|
||||||
|
//! Every mutation follows this sequence:
|
||||||
|
//! 1. Append WAL record → fsync
|
||||||
|
//! 2. Perform the actual data write
|
||||||
|
//! 3. Append WAL commit marker → fsync
|
||||||
|
//!
|
||||||
|
//! On recovery, uncommitted entries (those without a matching commit marker)
|
||||||
|
//! are replayed or verified.
|
||||||
|
//!
|
||||||
|
//! # Record format
|
||||||
|
//!
|
||||||
|
//! ```text
|
||||||
|
//! ┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬────────────┐
|
||||||
|
//! │ magic │ seq │ op │ key_len │ val_len │ crc32 │ payload │
|
||||||
|
//! │ u16 LE │ u64 LE │ u8 │ u32 LE │ u32 LE │ u32 LE │ [key][val] │
|
||||||
|
//! │ 0xWA01 │ │ │ │ │ │ │
|
||||||
|
//! └──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴────────────┘
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! # Commit marker
|
||||||
|
//!
|
||||||
|
//! ```text
|
||||||
|
//! ┌──────────┬──────────┬──────────┐
|
||||||
|
//! │ magic │ seq │ crc32 │
|
||||||
|
//! │ u16 LE │ u64 LE │ u32 LE │
|
||||||
|
//! │ 0xCA01 │ │ │
|
||||||
|
//! └──────────┴──────────┴──────────┘
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use std::io::{self, BufReader, Read, Write};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
|
||||||
|
use crate::error::{StorageError, StorageResult};
|
||||||
|
use crate::record::{FileHeader, FileType, FILE_HEADER_SIZE};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Constants
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const WAL_RECORD_MAGIC: u16 = 0xAA01;
|
||||||
|
const WAL_COMMIT_MAGIC: u16 = 0xCC01;
|
||||||
|
|
||||||
|
/// WAL record header: magic(2) + seq(8) + op(1) + key_len(4) + val_len(4) + crc(4) = 23
|
||||||
|
const WAL_RECORD_HEADER: usize = 23;
|
||||||
|
|
||||||
|
/// Commit marker size: magic(2) + seq(8) + crc(4) = 14
|
||||||
|
const WAL_COMMIT_SIZE: usize = 14;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// WAL operation type
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub enum WalOpType {
|
||||||
|
Insert = 1,
|
||||||
|
Update = 2,
|
||||||
|
Delete = 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WalOpType {
|
||||||
|
fn from_u8(v: u8) -> StorageResult<Self> {
|
||||||
|
match v {
|
||||||
|
1 => Ok(WalOpType::Insert),
|
||||||
|
2 => Ok(WalOpType::Update),
|
||||||
|
3 => Ok(WalOpType::Delete),
|
||||||
|
_ => Err(StorageError::WalError(format!("unknown WAL op: {v}"))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// WAL entry (parsed from file)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct WalEntry {
|
||||||
|
pub seq: u64,
|
||||||
|
pub op: WalOpType,
|
||||||
|
pub key: Vec<u8>,
|
||||||
|
pub value: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Internal: what we read from the WAL file
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum WalItem {
|
||||||
|
Record(WalEntry),
|
||||||
|
Commit(u64), // seq that was committed
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// BinaryWal
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Binary write-ahead log backed by a single file.
|
||||||
|
pub struct BinaryWal {
|
||||||
|
path: PathBuf,
|
||||||
|
next_seq: AtomicU64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BinaryWal {
|
||||||
|
/// Create a new WAL. Does not touch the filesystem until `initialize()`.
|
||||||
|
pub fn new(path: PathBuf) -> Self {
|
||||||
|
Self {
|
||||||
|
path,
|
||||||
|
next_seq: AtomicU64::new(1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Initialize: create parent dirs, recover sequence counter from existing file.
|
||||||
|
pub fn initialize(&self) -> StorageResult<()> {
|
||||||
|
if let Some(parent) = self.path.parent() {
|
||||||
|
std::fs::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.path.exists() {
|
||||||
|
// Scan to find highest seq
|
||||||
|
let items = self.read_all_items()?;
|
||||||
|
let max_seq = items
|
||||||
|
.iter()
|
||||||
|
.map(|item| match item {
|
||||||
|
WalItem::Record(e) => e.seq,
|
||||||
|
WalItem::Commit(s) => *s,
|
||||||
|
})
|
||||||
|
.max()
|
||||||
|
.unwrap_or(0);
|
||||||
|
self.next_seq.store(max_seq + 1, Ordering::SeqCst);
|
||||||
|
} else {
|
||||||
|
// Create the file with a header
|
||||||
|
let mut f = std::fs::File::create(&self.path)?;
|
||||||
|
let hdr = FileHeader::new(FileType::Wal);
|
||||||
|
f.write_all(&hdr.encode())?;
|
||||||
|
f.flush()?;
|
||||||
|
f.sync_all()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append a WAL record. Returns the sequence number. Fsyncs.
|
||||||
|
pub fn append(
|
||||||
|
&self,
|
||||||
|
op: WalOpType,
|
||||||
|
key: &[u8],
|
||||||
|
value: &[u8],
|
||||||
|
) -> StorageResult<u64> {
|
||||||
|
let seq = self.next_seq.fetch_add(1, Ordering::SeqCst);
|
||||||
|
let key_len = key.len() as u32;
|
||||||
|
let val_len = value.len() as u32;
|
||||||
|
|
||||||
|
// Build header bytes (without CRC)
|
||||||
|
let mut hdr = Vec::with_capacity(WAL_RECORD_HEADER);
|
||||||
|
hdr.extend_from_slice(&WAL_RECORD_MAGIC.to_le_bytes());
|
||||||
|
hdr.extend_from_slice(&seq.to_le_bytes());
|
||||||
|
hdr.push(op as u8);
|
||||||
|
hdr.extend_from_slice(&key_len.to_le_bytes());
|
||||||
|
hdr.extend_from_slice(&val_len.to_le_bytes());
|
||||||
|
// CRC placeholder
|
||||||
|
hdr.extend_from_slice(&0u32.to_le_bytes());
|
||||||
|
|
||||||
|
// Compute CRC over header (without crc field) + payload
|
||||||
|
let mut hasher = crc32fast::Hasher::new();
|
||||||
|
hasher.update(&hdr[0..19]); // magic + seq + op + key_len + val_len
|
||||||
|
hasher.update(key);
|
||||||
|
hasher.update(value);
|
||||||
|
let crc = hasher.finalize();
|
||||||
|
hdr[19..23].copy_from_slice(&crc.to_le_bytes());
|
||||||
|
|
||||||
|
// Append to file
|
||||||
|
let mut f = std::fs::OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.append(true)
|
||||||
|
.open(&self.path)?;
|
||||||
|
f.write_all(&hdr)?;
|
||||||
|
f.write_all(key)?;
|
||||||
|
f.write_all(value)?;
|
||||||
|
f.sync_all()?;
|
||||||
|
|
||||||
|
Ok(seq)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append a commit marker for the given sequence. Fsyncs.
|
||||||
|
pub fn append_commit(&self, seq: u64) -> StorageResult<()> {
|
||||||
|
let mut buf = Vec::with_capacity(WAL_COMMIT_SIZE);
|
||||||
|
buf.extend_from_slice(&WAL_COMMIT_MAGIC.to_le_bytes());
|
||||||
|
buf.extend_from_slice(&seq.to_le_bytes());
|
||||||
|
|
||||||
|
// CRC over magic + seq
|
||||||
|
let mut hasher = crc32fast::Hasher::new();
|
||||||
|
hasher.update(&buf[0..10]);
|
||||||
|
let crc = hasher.finalize();
|
||||||
|
buf.extend_from_slice(&crc.to_le_bytes());
|
||||||
|
|
||||||
|
let mut f = std::fs::OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.append(true)
|
||||||
|
.open(&self.path)?;
|
||||||
|
f.write_all(&buf)?;
|
||||||
|
f.sync_all()?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recover: return all WAL entries that were NOT committed.
|
||||||
|
pub fn recover(&self) -> StorageResult<Vec<WalEntry>> {
|
||||||
|
let items = self.read_all_items()?;
|
||||||
|
|
||||||
|
// Collect committed seq numbers
|
||||||
|
let committed: std::collections::HashSet<u64> = items
|
||||||
|
.iter()
|
||||||
|
.filter_map(|item| {
|
||||||
|
if let WalItem::Commit(s) = item {
|
||||||
|
Some(*s)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Return records without a commit marker
|
||||||
|
let uncommitted: Vec<WalEntry> = items
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|item| {
|
||||||
|
if let WalItem::Record(entry) = item {
|
||||||
|
if !committed.contains(&entry.seq) {
|
||||||
|
return Some(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(uncommitted)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Truncate the WAL: rewrite with just the file header (clears all entries).
|
||||||
|
pub fn truncate(&self) -> StorageResult<()> {
|
||||||
|
let mut f = std::fs::File::create(&self.path)?;
|
||||||
|
let hdr = FileHeader::new(FileType::Wal);
|
||||||
|
f.write_all(&hdr.encode())?;
|
||||||
|
f.flush()?;
|
||||||
|
f.sync_all()?;
|
||||||
|
// Don't reset next_seq — it should keep incrementing
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Path to the WAL file.
|
||||||
|
pub fn path(&self) -> &Path {
|
||||||
|
&self.path
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Internal: read all items from the WAL file
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
fn read_all_items(&self) -> StorageResult<Vec<WalItem>> {
|
||||||
|
if !self.path.exists() {
|
||||||
|
return Ok(vec![]);
|
||||||
|
}
|
||||||
|
|
||||||
|
let file = std::fs::File::open(&self.path)?;
|
||||||
|
let mut reader = BufReader::new(file);
|
||||||
|
|
||||||
|
// Skip file header (if present)
|
||||||
|
let file_len = std::fs::metadata(&self.path)?.len();
|
||||||
|
if file_len >= FILE_HEADER_SIZE as u64 {
|
||||||
|
let mut hdr_buf = [0u8; FILE_HEADER_SIZE];
|
||||||
|
reader.read_exact(&mut hdr_buf)?;
|
||||||
|
// Validate but don't fail hard — allow reading even slightly off headers
|
||||||
|
let _ = FileHeader::decode(&hdr_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut items = Vec::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// Peek at the magic to determine if this is a record or commit marker
|
||||||
|
let mut magic_buf = [0u8; 2];
|
||||||
|
match reader.read_exact(&mut magic_buf) {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
let magic = u16::from_le_bytes(magic_buf);
|
||||||
|
|
||||||
|
match magic {
|
||||||
|
WAL_RECORD_MAGIC => {
|
||||||
|
// Read rest of header: seq(8) + op(1) + key_len(4) + val_len(4) + crc(4) = 21
|
||||||
|
let mut rest = [0u8; 21];
|
||||||
|
match reader.read_exact(&mut rest) {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
|
||||||
|
let seq = u64::from_le_bytes(rest[0..8].try_into().unwrap());
|
||||||
|
let op = WalOpType::from_u8(rest[8])?;
|
||||||
|
let key_len = u32::from_le_bytes(rest[9..13].try_into().unwrap()) as usize;
|
||||||
|
let val_len = u32::from_le_bytes(rest[13..17].try_into().unwrap()) as usize;
|
||||||
|
let stored_crc = u32::from_le_bytes(rest[17..21].try_into().unwrap());
|
||||||
|
|
||||||
|
let mut payload = vec![0u8; key_len + val_len];
|
||||||
|
match reader.read_exact(&mut payload) {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify CRC
|
||||||
|
let mut hasher = crc32fast::Hasher::new();
|
||||||
|
hasher.update(&magic_buf);
|
||||||
|
hasher.update(&rest[0..17]); // seq + op + key_len + val_len
|
||||||
|
hasher.update(&payload);
|
||||||
|
let computed = hasher.finalize();
|
||||||
|
|
||||||
|
if computed != stored_crc {
|
||||||
|
// Corrupt WAL entry — skip it (best-effort recovery)
|
||||||
|
tracing::warn!(
|
||||||
|
seq,
|
||||||
|
"skipping corrupt WAL record: CRC mismatch (expected 0x{stored_crc:08X}, got 0x{computed:08X})"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let key = payload[..key_len].to_vec();
|
||||||
|
let value = payload[key_len..].to_vec();
|
||||||
|
items.push(WalItem::Record(WalEntry {
|
||||||
|
seq,
|
||||||
|
op,
|
||||||
|
key,
|
||||||
|
value,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
WAL_COMMIT_MAGIC => {
|
||||||
|
// Read rest: seq(8) + crc(4) = 12
|
||||||
|
let mut rest = [0u8; 12];
|
||||||
|
match reader.read_exact(&mut rest) {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
|
||||||
|
let seq = u64::from_le_bytes(rest[0..8].try_into().unwrap());
|
||||||
|
let stored_crc = u32::from_le_bytes(rest[8..12].try_into().unwrap());
|
||||||
|
|
||||||
|
let mut hasher = crc32fast::Hasher::new();
|
||||||
|
hasher.update(&magic_buf);
|
||||||
|
hasher.update(&rest[0..8]);
|
||||||
|
let computed = hasher.finalize();
|
||||||
|
|
||||||
|
if computed != stored_crc {
|
||||||
|
tracing::warn!(
|
||||||
|
seq,
|
||||||
|
"skipping corrupt WAL commit marker: CRC mismatch"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
items.push(WalItem::Commit(seq));
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Unknown magic — file is corrupt past this point
|
||||||
|
tracing::warn!("unknown WAL magic 0x{magic:04X}, stopping scan");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(items)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn make_wal(dir: &tempfile::TempDir) -> BinaryWal {
|
||||||
|
let path = dir.path().join("test.wal");
|
||||||
|
let wal = BinaryWal::new(path);
|
||||||
|
wal.initialize().unwrap();
|
||||||
|
wal
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn append_and_commit() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let wal = make_wal(&dir);
|
||||||
|
|
||||||
|
let seq = wal
|
||||||
|
.append(WalOpType::Insert, b"key1", b"value1")
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(seq, 1);
|
||||||
|
|
||||||
|
wal.append_commit(seq).unwrap();
|
||||||
|
|
||||||
|
// All committed — recover should return empty
|
||||||
|
let uncommitted = wal.recover().unwrap();
|
||||||
|
assert!(uncommitted.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn uncommitted_entries_recovered() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let wal = make_wal(&dir);
|
||||||
|
|
||||||
|
let s1 = wal
|
||||||
|
.append(WalOpType::Insert, b"k1", b"v1")
|
||||||
|
.unwrap();
|
||||||
|
wal.append_commit(s1).unwrap();
|
||||||
|
|
||||||
|
// s2 is NOT committed
|
||||||
|
let s2 = wal
|
||||||
|
.append(WalOpType::Update, b"k2", b"v2")
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let uncommitted = wal.recover().unwrap();
|
||||||
|
assert_eq!(uncommitted.len(), 1);
|
||||||
|
assert_eq!(uncommitted[0].seq, s2);
|
||||||
|
assert_eq!(uncommitted[0].op, WalOpType::Update);
|
||||||
|
assert_eq!(uncommitted[0].key, b"k2");
|
||||||
|
assert_eq!(uncommitted[0].value, b"v2");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn truncate_clears_wal() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let wal = make_wal(&dir);
|
||||||
|
|
||||||
|
wal.append(WalOpType::Insert, b"k", b"v").unwrap();
|
||||||
|
wal.truncate().unwrap();
|
||||||
|
|
||||||
|
let uncommitted = wal.recover().unwrap();
|
||||||
|
assert!(uncommitted.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multiple_operations() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let wal = make_wal(&dir);
|
||||||
|
|
||||||
|
let s1 = wal.append(WalOpType::Insert, b"a", b"1").unwrap();
|
||||||
|
let s2 = wal.append(WalOpType::Update, b"b", b"2").unwrap();
|
||||||
|
let s3 = wal.append(WalOpType::Delete, b"c", b"").unwrap();
|
||||||
|
|
||||||
|
// Commit only s1 and s3
|
||||||
|
wal.append_commit(s1).unwrap();
|
||||||
|
wal.append_commit(s3).unwrap();
|
||||||
|
|
||||||
|
let uncommitted = wal.recover().unwrap();
|
||||||
|
assert_eq!(uncommitted.len(), 1);
|
||||||
|
assert_eq!(uncommitted[0].seq, s2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sequence_numbers_persist_across_reinit() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let path = dir.path().join("persist.wal");
|
||||||
|
|
||||||
|
{
|
||||||
|
let wal = BinaryWal::new(path.clone());
|
||||||
|
wal.initialize().unwrap();
|
||||||
|
let s1 = wal.append(WalOpType::Insert, b"k", b"v").unwrap();
|
||||||
|
assert_eq!(s1, 1);
|
||||||
|
wal.append_commit(s1).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-open — seq should continue from 2+ (since max committed was 1)
|
||||||
|
{
|
||||||
|
let wal = BinaryWal::new(path);
|
||||||
|
wal.initialize().unwrap();
|
||||||
|
let s2 = wal.append(WalOpType::Insert, b"k2", b"v2").unwrap();
|
||||||
|
assert!(s2 >= 2, "seq should continue: got {s2}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn delete_has_empty_value() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let wal = make_wal(&dir);
|
||||||
|
|
||||||
|
let seq = wal.append(WalOpType::Delete, b"key", b"").unwrap();
|
||||||
|
|
||||||
|
let uncommitted = wal.recover().unwrap();
|
||||||
|
assert_eq!(uncommitted.len(), 1);
|
||||||
|
assert_eq!(uncommitted[0].seq, seq);
|
||||||
|
assert_eq!(uncommitted[0].op, WalOpType::Delete);
|
||||||
|
assert!(uncommitted[0].value.is_empty());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,270 @@
|
|||||||
|
//! Compaction for the Bitcask-style storage engine.
|
||||||
|
//!
|
||||||
|
//! Over time, the data file accumulates dead records (superseded by updates,
|
||||||
|
//! tombstones from deletes). Compaction rewrites the data file with only live
|
||||||
|
//! records, reclaiming disk space.
|
||||||
|
//!
|
||||||
|
//! The process is:
|
||||||
|
//! 1. Create a new `data.rdb.compact` file with a fresh file header.
|
||||||
|
//! 2. Iterate all live entries from the KeyDir.
|
||||||
|
//! 3. Read each live document from the old data file, write to the new file.
|
||||||
|
//! 4. Atomically rename `data.rdb.compact` → `data.rdb`.
|
||||||
|
//! 5. Update KeyDir entries with new offsets.
|
||||||
|
//! 6. Reset dead_bytes counter.
|
||||||
|
|
||||||
|
use std::io::{Seek, SeekFrom, Write};
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::atomic::Ordering;
|
||||||
|
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::error::StorageResult;
|
||||||
|
use crate::keydir::{KeyDir, KeyDirEntry};
|
||||||
|
use crate::record::{DataRecord, FileHeader, FileType, FILE_HEADER_SIZE};
|
||||||
|
|
||||||
|
/// Result of a compaction operation.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct CompactionResult {
|
||||||
|
/// Number of live records written.
|
||||||
|
pub records_written: u64,
|
||||||
|
/// Bytes reclaimed (old file size - new file size).
|
||||||
|
pub bytes_reclaimed: u64,
|
||||||
|
/// New data file size.
|
||||||
|
pub new_file_size: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compact a collection's data file.
|
||||||
|
///
|
||||||
|
/// This function:
|
||||||
|
/// - Reads all live documents (entries present in the KeyDir) from the old data file
|
||||||
|
/// - Writes them sequentially to a new file
|
||||||
|
/// - Atomically renames the new file over the old one
|
||||||
|
/// - Updates all KeyDir entries with their new offsets
|
||||||
|
///
|
||||||
|
/// The caller must hold the collection's write lock during this operation.
|
||||||
|
pub fn compact_data_file(
|
||||||
|
data_path: &Path,
|
||||||
|
keydir: &KeyDir,
|
||||||
|
dead_bytes: &std::sync::atomic::AtomicU64,
|
||||||
|
data_file_size: &std::sync::atomic::AtomicU64,
|
||||||
|
) -> StorageResult<CompactionResult> {
|
||||||
|
let compact_path = data_path.with_extension("rdb.compact");
|
||||||
|
|
||||||
|
let old_file_size = std::fs::metadata(data_path)
|
||||||
|
.map(|m| m.len())
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
// Collect all live entries with their keys
|
||||||
|
let mut live_entries: Vec<(String, KeyDirEntry)> = Vec::with_capacity(keydir.len() as usize);
|
||||||
|
keydir.for_each(|key, entry| {
|
||||||
|
live_entries.push((key.to_string(), *entry));
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sort by offset for sequential reads (cache-friendly)
|
||||||
|
live_entries.sort_by_key(|(_, e)| e.offset);
|
||||||
|
|
||||||
|
// Create compact file with header
|
||||||
|
let mut compact_file = std::fs::File::create(&compact_path)?;
|
||||||
|
let hdr = FileHeader::new(FileType::Data);
|
||||||
|
compact_file.write_all(&hdr.encode())?;
|
||||||
|
|
||||||
|
let mut current_offset = FILE_HEADER_SIZE as u64;
|
||||||
|
let mut new_entries: Vec<(String, KeyDirEntry)> = Vec::with_capacity(live_entries.len());
|
||||||
|
let mut old_data_file = std::fs::File::open(data_path)?;
|
||||||
|
|
||||||
|
for (key, entry) in &live_entries {
|
||||||
|
// Read the record from the old file
|
||||||
|
old_data_file.seek(SeekFrom::Start(entry.offset))?;
|
||||||
|
let (record, _disk_size) = DataRecord::decode_from(&mut old_data_file)?
|
||||||
|
.ok_or_else(|| {
|
||||||
|
crate::error::StorageError::CorruptRecord(format!(
|
||||||
|
"compaction: unexpected EOF reading doc '{key}' at offset {}",
|
||||||
|
entry.offset
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Write to compact file
|
||||||
|
let encoded = record.encode();
|
||||||
|
let new_disk_size = encoded.len() as u32;
|
||||||
|
compact_file.write_all(&encoded)?;
|
||||||
|
|
||||||
|
new_entries.push((
|
||||||
|
key.clone(),
|
||||||
|
KeyDirEntry {
|
||||||
|
offset: current_offset,
|
||||||
|
record_len: new_disk_size,
|
||||||
|
value_len: entry.value_len,
|
||||||
|
timestamp: entry.timestamp,
|
||||||
|
},
|
||||||
|
));
|
||||||
|
|
||||||
|
current_offset += new_disk_size as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
compact_file.sync_all()?;
|
||||||
|
drop(compact_file);
|
||||||
|
drop(old_data_file);
|
||||||
|
|
||||||
|
// Atomic rename
|
||||||
|
std::fs::rename(&compact_path, data_path)?;
|
||||||
|
|
||||||
|
// Update KeyDir with new offsets
|
||||||
|
for (key, new_entry) in new_entries {
|
||||||
|
keydir.insert(key, new_entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset counters
|
||||||
|
dead_bytes.store(0, Ordering::Relaxed);
|
||||||
|
data_file_size.store(current_offset, Ordering::Relaxed);
|
||||||
|
|
||||||
|
let bytes_reclaimed = old_file_size.saturating_sub(current_offset);
|
||||||
|
|
||||||
|
info!(
|
||||||
|
records = live_entries.len(),
|
||||||
|
old_size = old_file_size,
|
||||||
|
new_size = current_offset,
|
||||||
|
reclaimed = bytes_reclaimed,
|
||||||
|
"compaction complete"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(CompactionResult {
|
||||||
|
records_written: live_entries.len() as u64,
|
||||||
|
bytes_reclaimed,
|
||||||
|
new_file_size: current_offset,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if compaction is warranted for a collection.
|
||||||
|
/// Returns true if dead bytes exceed 50% of live data.
|
||||||
|
pub fn should_compact(dead_bytes: u64, data_file_size: u64) -> bool {
|
||||||
|
if data_file_size <= FILE_HEADER_SIZE as u64 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let useful_bytes = data_file_size - FILE_HEADER_SIZE as u64;
|
||||||
|
// Trigger when dead > 50% of total useful data
|
||||||
|
dead_bytes > useful_bytes / 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::keydir::KeyDir;
|
||||||
|
use crate::record::{now_ms, DataRecord, FileHeader, FileType};
|
||||||
|
use std::io::Write;
|
||||||
|
use std::sync::atomic::AtomicU64;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn compact_removes_dead_records() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let data_path = dir.path().join("data.rdb");
|
||||||
|
|
||||||
|
// Write a data file: insert A, update A (new version), insert B
|
||||||
|
let mut f = std::fs::File::create(&data_path).unwrap();
|
||||||
|
let hdr = FileHeader::new(FileType::Data);
|
||||||
|
f.write_all(&hdr.encode()).unwrap();
|
||||||
|
|
||||||
|
let ts = now_ms();
|
||||||
|
|
||||||
|
// Record 1: A v1 (will be superseded)
|
||||||
|
let r1 = DataRecord {
|
||||||
|
timestamp: ts,
|
||||||
|
key: b"aaa".to_vec(),
|
||||||
|
value: b"old_value".to_vec(),
|
||||||
|
};
|
||||||
|
let r1_enc = r1.encode();
|
||||||
|
let r1_offset = FILE_HEADER_SIZE as u64;
|
||||||
|
let r1_size = r1_enc.len();
|
||||||
|
f.write_all(&r1_enc).unwrap();
|
||||||
|
|
||||||
|
// Record 2: A v2 (current)
|
||||||
|
let r2 = DataRecord {
|
||||||
|
timestamp: ts + 1,
|
||||||
|
key: b"aaa".to_vec(),
|
||||||
|
value: b"new_value".to_vec(),
|
||||||
|
};
|
||||||
|
let r2_enc = r2.encode();
|
||||||
|
let r2_offset = r1_offset + r1_size as u64;
|
||||||
|
let r2_size = r2_enc.len();
|
||||||
|
f.write_all(&r2_enc).unwrap();
|
||||||
|
|
||||||
|
// Record 3: B (live)
|
||||||
|
let r3 = DataRecord {
|
||||||
|
timestamp: ts + 2,
|
||||||
|
key: b"bbb".to_vec(),
|
||||||
|
value: b"bbb_value".to_vec(),
|
||||||
|
};
|
||||||
|
let r3_enc = r3.encode();
|
||||||
|
let r3_offset = r2_offset + r2_size as u64;
|
||||||
|
f.write_all(&r3_enc).unwrap();
|
||||||
|
f.sync_all().unwrap();
|
||||||
|
drop(f);
|
||||||
|
|
||||||
|
let total_size = std::fs::metadata(&data_path).unwrap().len();
|
||||||
|
|
||||||
|
// Build KeyDir — only points to latest versions
|
||||||
|
let keydir = KeyDir::new();
|
||||||
|
keydir.insert(
|
||||||
|
"aaa".into(),
|
||||||
|
KeyDirEntry {
|
||||||
|
offset: r2_offset,
|
||||||
|
record_len: r2_size as u32,
|
||||||
|
value_len: r2.value.len() as u32,
|
||||||
|
timestamp: ts + 1,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
keydir.insert(
|
||||||
|
"bbb".into(),
|
||||||
|
KeyDirEntry {
|
||||||
|
offset: r3_offset,
|
||||||
|
record_len: r3.encode().len() as u32,
|
||||||
|
value_len: r3.value.len() as u32,
|
||||||
|
timestamp: ts + 2,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
let dead_bytes_counter = AtomicU64::new(r1_size as u64);
|
||||||
|
let data_file_size_counter = AtomicU64::new(total_size);
|
||||||
|
|
||||||
|
let result = compact_data_file(
|
||||||
|
&data_path,
|
||||||
|
&keydir,
|
||||||
|
&dead_bytes_counter,
|
||||||
|
&data_file_size_counter,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(result.records_written, 2);
|
||||||
|
assert!(result.bytes_reclaimed > 0);
|
||||||
|
assert!(result.new_file_size < total_size);
|
||||||
|
|
||||||
|
// Verify dead_bytes was reset
|
||||||
|
assert_eq!(dead_bytes_counter.load(Ordering::Relaxed), 0);
|
||||||
|
|
||||||
|
// Verify KeyDir was updated with new offsets
|
||||||
|
let a_entry = keydir.get("aaa").unwrap();
|
||||||
|
assert_eq!(a_entry.offset, FILE_HEADER_SIZE as u64); // first record after header
|
||||||
|
assert_eq!(a_entry.value_len, b"new_value".len() as u32);
|
||||||
|
|
||||||
|
let b_entry = keydir.get("bbb").unwrap();
|
||||||
|
assert!(b_entry.offset > a_entry.offset);
|
||||||
|
|
||||||
|
// Verify the compacted file can be used to rebuild KeyDir
|
||||||
|
let (rebuilt, dead, _stats) = KeyDir::build_from_data_file(&data_path).unwrap();
|
||||||
|
assert_eq!(rebuilt.len(), 2);
|
||||||
|
assert_eq!(dead, 0); // no dead records in compacted file
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn should_compact_thresholds() {
|
||||||
|
// Under threshold
|
||||||
|
assert!(!should_compact(10, 100 + FILE_HEADER_SIZE as u64));
|
||||||
|
// Over threshold (dead > 50% of useful)
|
||||||
|
assert!(should_compact(60, 100 + FILE_HEADER_SIZE as u64));
|
||||||
|
// Empty file
|
||||||
|
assert!(!should_compact(0, FILE_HEADER_SIZE as u64));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -17,6 +17,15 @@ pub enum StorageError {
|
|||||||
|
|
||||||
#[error("conflict detected: {0}")]
|
#[error("conflict detected: {0}")]
|
||||||
ConflictError(String),
|
ConflictError(String),
|
||||||
|
|
||||||
|
#[error("corrupt record: {0}")]
|
||||||
|
CorruptRecord(String),
|
||||||
|
|
||||||
|
#[error("checksum mismatch: expected 0x{expected:08X}, got 0x{actual:08X}")]
|
||||||
|
ChecksumMismatch { expected: u32, actual: u32 },
|
||||||
|
|
||||||
|
#[error("WAL error: {0}")]
|
||||||
|
WalError(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<serde_json::Error> for StorageError {
|
impl From<serde_json::Error> for StorageError {
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,562 @@
|
|||||||
|
//! KeyDir — in-memory document location index for the Bitcask storage engine.
|
||||||
|
//!
|
||||||
|
//! Maps document `_id` (hex string) to its location in the append-only data file.
|
||||||
|
//! Backed by `DashMap` for lock-free concurrent reads and fine-grained write locking.
|
||||||
|
//!
|
||||||
|
//! The KeyDir can be rebuilt from a data file scan, or loaded quickly from a
|
||||||
|
//! persisted hint file for fast restart.
|
||||||
|
|
||||||
|
use std::io::{self, BufReader, BufWriter, Read, Seek, SeekFrom, Write};
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
|
||||||
|
use dashmap::DashMap;
|
||||||
|
|
||||||
|
use crate::error::{StorageError, StorageResult};
|
||||||
|
use crate::record::{
|
||||||
|
DataRecord, FileHeader, FileType, RecordScanner, FILE_HEADER_SIZE, FORMAT_VERSION,
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// KeyDirEntry
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Location of a single document in the data file.
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct KeyDirEntry {
|
||||||
|
/// Byte offset of the record in `data.rdb`.
|
||||||
|
pub offset: u64,
|
||||||
|
/// Total record size on disk (header + payload).
|
||||||
|
pub record_len: u32,
|
||||||
|
/// BSON value length. 0 means tombstone (used during compaction accounting).
|
||||||
|
pub value_len: u32,
|
||||||
|
/// Timestamp (epoch ms) from the record. Used for conflict detection.
|
||||||
|
pub timestamp: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// BuildStats — statistics from building KeyDir from a data file scan
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Statistics collected while building a KeyDir from a data file scan.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct BuildStats {
|
||||||
|
/// Total records scanned (live + tombstones + superseded).
|
||||||
|
pub total_records_scanned: u64,
|
||||||
|
/// Number of live documents in the final KeyDir.
|
||||||
|
pub live_documents: u64,
|
||||||
|
/// Number of tombstone records encountered.
|
||||||
|
pub tombstones: u64,
|
||||||
|
/// Number of records superseded by a later write for the same key.
|
||||||
|
pub superseded_records: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// KeyDir
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// In-memory index mapping document ID → data file location.
|
||||||
|
pub struct KeyDir {
|
||||||
|
map: DashMap<String, KeyDirEntry>,
|
||||||
|
/// Running count of live documents.
|
||||||
|
doc_count: AtomicU64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KeyDir {
|
||||||
|
/// Create an empty KeyDir.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
map: DashMap::new(),
|
||||||
|
doc_count: AtomicU64::new(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert or update an entry. Returns the previous entry if one existed.
|
||||||
|
pub fn insert(&self, key: String, entry: KeyDirEntry) -> Option<KeyDirEntry> {
|
||||||
|
let prev = self.map.insert(key, entry);
|
||||||
|
if prev.is_none() {
|
||||||
|
self.doc_count.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
prev
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Look up an entry by key.
|
||||||
|
pub fn get(&self, key: &str) -> Option<KeyDirEntry> {
|
||||||
|
self.map.get(key).map(|r| *r.value())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove an entry. Returns the removed entry if it existed.
|
||||||
|
pub fn remove(&self, key: &str) -> Option<KeyDirEntry> {
|
||||||
|
let removed = self.map.remove(key).map(|(_, v)| v);
|
||||||
|
if removed.is_some() {
|
||||||
|
self.doc_count.fetch_sub(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
removed
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Number of live documents.
|
||||||
|
pub fn len(&self) -> u64 {
|
||||||
|
self.doc_count.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether the index is empty.
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a key exists.
|
||||||
|
pub fn contains(&self, key: &str) -> bool {
|
||||||
|
self.map.contains_key(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterate over all entries. The closure receives (key, entry).
|
||||||
|
pub fn for_each(&self, mut f: impl FnMut(&str, &KeyDirEntry)) {
|
||||||
|
for entry in self.map.iter() {
|
||||||
|
f(entry.key(), entry.value());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect all keys.
|
||||||
|
pub fn keys(&self) -> Vec<String> {
|
||||||
|
self.map.iter().map(|e| e.key().clone()).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clear all entries.
|
||||||
|
pub fn clear(&self) {
|
||||||
|
self.map.clear();
|
||||||
|
self.doc_count.store(0, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Build from data file
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Rebuild the KeyDir by scanning an entire data file.
|
||||||
|
/// The file must start with a valid `FileHeader`.
|
||||||
|
/// Returns `(keydir, dead_bytes, stats)` where `dead_bytes` is the total size of
|
||||||
|
/// stale records (superseded by later writes or tombstoned).
|
||||||
|
pub fn build_from_data_file(path: &Path) -> StorageResult<(Self, u64, BuildStats)> {
|
||||||
|
let file = std::fs::File::open(path)?;
|
||||||
|
let mut reader = BufReader::new(file);
|
||||||
|
|
||||||
|
// Read and validate file header
|
||||||
|
let mut hdr_buf = [0u8; FILE_HEADER_SIZE];
|
||||||
|
reader.read_exact(&mut hdr_buf)?;
|
||||||
|
let hdr = FileHeader::decode(&hdr_buf)?;
|
||||||
|
if hdr.file_type != FileType::Data {
|
||||||
|
return Err(StorageError::CorruptRecord(format!(
|
||||||
|
"expected data file (type 1), got type {:?}",
|
||||||
|
hdr.file_type
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let keydir = KeyDir::new();
|
||||||
|
let mut dead_bytes: u64 = 0;
|
||||||
|
let mut stats = BuildStats::default();
|
||||||
|
|
||||||
|
let scanner = RecordScanner::new(reader, FILE_HEADER_SIZE as u64);
|
||||||
|
for result in scanner {
|
||||||
|
let (offset, record) = result?;
|
||||||
|
let is_tombstone = record.is_tombstone();
|
||||||
|
let disk_size = record.disk_size() as u32;
|
||||||
|
let value_len = record.value.len() as u32;
|
||||||
|
let timestamp = record.timestamp;
|
||||||
|
let key = String::from_utf8(record.key)
|
||||||
|
.map_err(|e| StorageError::CorruptRecord(format!("invalid UTF-8 key: {e}")))?;
|
||||||
|
|
||||||
|
stats.total_records_scanned += 1;
|
||||||
|
|
||||||
|
if is_tombstone {
|
||||||
|
stats.tombstones += 1;
|
||||||
|
// Remove from index; the tombstone itself is dead weight
|
||||||
|
if let Some(prev) = keydir.remove(&key) {
|
||||||
|
dead_bytes += prev.record_len as u64;
|
||||||
|
}
|
||||||
|
dead_bytes += disk_size as u64;
|
||||||
|
} else {
|
||||||
|
let entry = KeyDirEntry {
|
||||||
|
offset,
|
||||||
|
record_len: disk_size,
|
||||||
|
value_len,
|
||||||
|
timestamp,
|
||||||
|
};
|
||||||
|
if let Some(prev) = keydir.insert(key, entry) {
|
||||||
|
// Previous version of same key is now dead
|
||||||
|
dead_bytes += prev.record_len as u64;
|
||||||
|
stats.superseded_records += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.live_documents = keydir.len();
|
||||||
|
Ok((keydir, dead_bytes, stats))
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Hint file persistence (for fast startup)
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Persist the KeyDir to a hint file for fast restart.
|
||||||
|
///
|
||||||
|
/// Hint file format (after the 64-byte file header):
|
||||||
|
/// For each entry: [key_len:u32 LE][key bytes][offset:u64 LE][record_len:u32 LE][value_len:u32 LE][timestamp:u64 LE]
|
||||||
|
pub fn persist_to_hint_file(&self, path: &Path) -> StorageResult<()> {
|
||||||
|
let file = std::fs::File::create(path)?;
|
||||||
|
let mut writer = BufWriter::new(file);
|
||||||
|
|
||||||
|
// Write file header
|
||||||
|
let hdr = FileHeader::new(FileType::Hint);
|
||||||
|
writer.write_all(&hdr.encode())?;
|
||||||
|
|
||||||
|
// Write entries
|
||||||
|
for entry in self.map.iter() {
|
||||||
|
let key_bytes = entry.key().as_bytes();
|
||||||
|
let key_len = key_bytes.len() as u32;
|
||||||
|
writer.write_all(&key_len.to_le_bytes())?;
|
||||||
|
writer.write_all(key_bytes)?;
|
||||||
|
writer.write_all(&entry.value().offset.to_le_bytes())?;
|
||||||
|
writer.write_all(&entry.value().record_len.to_le_bytes())?;
|
||||||
|
writer.write_all(&entry.value().value_len.to_le_bytes())?;
|
||||||
|
writer.write_all(&entry.value().timestamp.to_le_bytes())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.flush()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load a KeyDir from a hint file. Returns None if the file doesn't exist.
|
||||||
|
pub fn load_from_hint_file(path: &Path) -> StorageResult<Option<Self>> {
|
||||||
|
if !path.exists() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let file = std::fs::File::open(path)?;
|
||||||
|
let mut reader = BufReader::new(file);
|
||||||
|
|
||||||
|
// Read and validate header
|
||||||
|
let mut hdr_buf = [0u8; FILE_HEADER_SIZE];
|
||||||
|
match reader.read_exact(&mut hdr_buf) {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
let hdr = FileHeader::decode(&hdr_buf)?;
|
||||||
|
if hdr.file_type != FileType::Hint {
|
||||||
|
return Err(StorageError::CorruptRecord(format!(
|
||||||
|
"expected hint file (type 3), got type {:?}",
|
||||||
|
hdr.file_type
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if hdr.version > FORMAT_VERSION {
|
||||||
|
return Err(StorageError::CorruptRecord(format!(
|
||||||
|
"hint file version {} is newer than supported {}",
|
||||||
|
hdr.version, FORMAT_VERSION
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let keydir = KeyDir::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// Read key_len
|
||||||
|
let mut key_len_buf = [0u8; 4];
|
||||||
|
match reader.read_exact(&mut key_len_buf) {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
let key_len = u32::from_le_bytes(key_len_buf) as usize;
|
||||||
|
|
||||||
|
// Read key
|
||||||
|
let mut key_buf = vec![0u8; key_len];
|
||||||
|
reader.read_exact(&mut key_buf)?;
|
||||||
|
let key = String::from_utf8(key_buf)
|
||||||
|
.map_err(|e| StorageError::CorruptRecord(format!("invalid UTF-8 key: {e}")))?;
|
||||||
|
|
||||||
|
// Read entry fields
|
||||||
|
let mut fields = [0u8; 8 + 4 + 4 + 8]; // offset + record_len + value_len + timestamp = 24
|
||||||
|
reader.read_exact(&mut fields)?;
|
||||||
|
|
||||||
|
let offset = u64::from_le_bytes(fields[0..8].try_into().unwrap());
|
||||||
|
let record_len = u32::from_le_bytes(fields[8..12].try_into().unwrap());
|
||||||
|
let value_len = u32::from_le_bytes(fields[12..16].try_into().unwrap());
|
||||||
|
let timestamp = u64::from_le_bytes(fields[16..24].try_into().unwrap());
|
||||||
|
|
||||||
|
keydir.insert(
|
||||||
|
key,
|
||||||
|
KeyDirEntry {
|
||||||
|
offset,
|
||||||
|
record_len,
|
||||||
|
value_len,
|
||||||
|
timestamp,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(keydir))
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Hint file validation
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Validate this KeyDir (loaded from a hint file) against the actual data file.
|
||||||
|
/// Returns `Ok(true)` if the hint appears consistent, `Ok(false)` if a rebuild
|
||||||
|
/// from the data file is recommended.
|
||||||
|
///
|
||||||
|
/// Checks:
|
||||||
|
/// 1. All entry offsets + record_len fit within the data file size.
|
||||||
|
/// 2. All entry offsets are >= FILE_HEADER_SIZE.
|
||||||
|
/// 3. A random sample of entries is spot-checked by reading the record at
|
||||||
|
/// the offset and verifying the key matches.
|
||||||
|
pub fn validate_against_data_file(&self, data_path: &Path, sample_size: usize) -> StorageResult<bool> {
|
||||||
|
let file_size = std::fs::metadata(data_path)
|
||||||
|
.map(|m| m.len())
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
if file_size < FILE_HEADER_SIZE as u64 {
|
||||||
|
// Data file is too small to even contain a header
|
||||||
|
return Ok(self.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pass 1: bounds check all entries
|
||||||
|
let mut all_keys: Vec<(String, KeyDirEntry)> = Vec::with_capacity(self.len() as usize);
|
||||||
|
let mut bounds_ok = true;
|
||||||
|
self.for_each(|key, entry| {
|
||||||
|
if entry.offset < FILE_HEADER_SIZE as u64
|
||||||
|
|| entry.offset + entry.record_len as u64 > file_size
|
||||||
|
{
|
||||||
|
bounds_ok = false;
|
||||||
|
}
|
||||||
|
all_keys.push((key.to_string(), *entry));
|
||||||
|
});
|
||||||
|
|
||||||
|
if !bounds_ok {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pass 2: spot-check a sample of entries by reading records from data.rdb
|
||||||
|
if all_keys.is_empty() {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by offset for sequential I/O, take first `sample_size` entries
|
||||||
|
all_keys.sort_by_key(|(_, e)| e.offset);
|
||||||
|
let step = if all_keys.len() <= sample_size {
|
||||||
|
1
|
||||||
|
} else {
|
||||||
|
all_keys.len() / sample_size
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut file = std::fs::File::open(data_path)?;
|
||||||
|
let mut checked = 0usize;
|
||||||
|
for (i, (expected_key, entry)) in all_keys.iter().enumerate() {
|
||||||
|
if checked >= sample_size {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if i % step != 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Seek to the entry's offset and try to decode the record
|
||||||
|
file.seek(SeekFrom::Start(entry.offset))?;
|
||||||
|
match DataRecord::decode_from(&mut file) {
|
||||||
|
Ok(Some((record, _disk_size))) => {
|
||||||
|
let record_key = String::from_utf8_lossy(&record.key);
|
||||||
|
if record_key != *expected_key {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None) | Err(_) => {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
checked += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for KeyDir {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::record::DataRecord;
|
||||||
|
use std::io::Write;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_insert_get_remove() {
|
||||||
|
let kd = KeyDir::new();
|
||||||
|
assert!(kd.is_empty());
|
||||||
|
|
||||||
|
let entry = KeyDirEntry {
|
||||||
|
offset: 100,
|
||||||
|
record_len: 50,
|
||||||
|
value_len: 30,
|
||||||
|
timestamp: 1700000000000,
|
||||||
|
};
|
||||||
|
|
||||||
|
assert!(kd.insert("abc".into(), entry).is_none());
|
||||||
|
assert_eq!(kd.len(), 1);
|
||||||
|
assert!(kd.contains("abc"));
|
||||||
|
|
||||||
|
let got = kd.get("abc").unwrap();
|
||||||
|
assert_eq!(got.offset, 100);
|
||||||
|
assert_eq!(got.value_len, 30);
|
||||||
|
|
||||||
|
let removed = kd.remove("abc").unwrap();
|
||||||
|
assert_eq!(removed.offset, 100);
|
||||||
|
assert_eq!(kd.len(), 0);
|
||||||
|
assert!(!kd.contains("abc"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn insert_overwrites_returns_previous() {
|
||||||
|
let kd = KeyDir::new();
|
||||||
|
let e1 = KeyDirEntry {
|
||||||
|
offset: 100,
|
||||||
|
record_len: 50,
|
||||||
|
value_len: 30,
|
||||||
|
timestamp: 1,
|
||||||
|
};
|
||||||
|
let e2 = KeyDirEntry {
|
||||||
|
offset: 200,
|
||||||
|
record_len: 60,
|
||||||
|
value_len: 40,
|
||||||
|
timestamp: 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
kd.insert("k".into(), e1);
|
||||||
|
assert_eq!(kd.len(), 1);
|
||||||
|
|
||||||
|
let prev = kd.insert("k".into(), e2).unwrap();
|
||||||
|
assert_eq!(prev.offset, 100);
|
||||||
|
// Count stays at 1 (overwrite, not new)
|
||||||
|
assert_eq!(kd.len(), 1);
|
||||||
|
assert_eq!(kd.get("k").unwrap().offset, 200);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn build_from_data_file() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let data_path = dir.path().join("data.rdb");
|
||||||
|
|
||||||
|
// Write a data file with 3 records: insert A, insert B, delete A
|
||||||
|
{
|
||||||
|
let mut f = std::fs::File::create(&data_path).unwrap();
|
||||||
|
let hdr = FileHeader::new(FileType::Data);
|
||||||
|
f.write_all(&hdr.encode()).unwrap();
|
||||||
|
|
||||||
|
let r1 = DataRecord {
|
||||||
|
timestamp: 1,
|
||||||
|
key: b"aaa".to_vec(),
|
||||||
|
value: b"val_a".to_vec(),
|
||||||
|
};
|
||||||
|
let r2 = DataRecord {
|
||||||
|
timestamp: 2,
|
||||||
|
key: b"bbb".to_vec(),
|
||||||
|
value: b"val_b".to_vec(),
|
||||||
|
};
|
||||||
|
let r3 = DataRecord {
|
||||||
|
timestamp: 3,
|
||||||
|
key: b"aaa".to_vec(),
|
||||||
|
value: vec![], // tombstone
|
||||||
|
};
|
||||||
|
f.write_all(&r1.encode()).unwrap();
|
||||||
|
f.write_all(&r2.encode()).unwrap();
|
||||||
|
f.write_all(&r3.encode()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let (kd, dead_bytes, stats) = KeyDir::build_from_data_file(&data_path).unwrap();
|
||||||
|
|
||||||
|
// Only B should be live
|
||||||
|
assert_eq!(kd.len(), 1);
|
||||||
|
assert!(kd.contains("bbb"));
|
||||||
|
assert!(!kd.contains("aaa"));
|
||||||
|
|
||||||
|
// Dead bytes: r1 (aaa live, then superseded by tombstone) + r3 (tombstone itself)
|
||||||
|
assert!(dead_bytes > 0);
|
||||||
|
|
||||||
|
// Stats
|
||||||
|
assert_eq!(stats.total_records_scanned, 3);
|
||||||
|
assert_eq!(stats.live_documents, 1);
|
||||||
|
assert_eq!(stats.tombstones, 1);
|
||||||
|
assert_eq!(stats.superseded_records, 0); // aaa was removed by tombstone, not superseded
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hint_file_roundtrip() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let hint_path = dir.path().join("keydir.hint");
|
||||||
|
|
||||||
|
let kd = KeyDir::new();
|
||||||
|
kd.insert(
|
||||||
|
"doc1".into(),
|
||||||
|
KeyDirEntry {
|
||||||
|
offset: 64,
|
||||||
|
record_len: 100,
|
||||||
|
value_len: 80,
|
||||||
|
timestamp: 1000,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
kd.insert(
|
||||||
|
"doc2".into(),
|
||||||
|
KeyDirEntry {
|
||||||
|
offset: 164,
|
||||||
|
record_len: 200,
|
||||||
|
value_len: 150,
|
||||||
|
timestamp: 2000,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
kd.persist_to_hint_file(&hint_path).unwrap();
|
||||||
|
let loaded = KeyDir::load_from_hint_file(&hint_path).unwrap().unwrap();
|
||||||
|
|
||||||
|
assert_eq!(loaded.len(), 2);
|
||||||
|
let e1 = loaded.get("doc1").unwrap();
|
||||||
|
assert_eq!(e1.offset, 64);
|
||||||
|
assert_eq!(e1.record_len, 100);
|
||||||
|
assert_eq!(e1.value_len, 80);
|
||||||
|
assert_eq!(e1.timestamp, 1000);
|
||||||
|
|
||||||
|
let e2 = loaded.get("doc2").unwrap();
|
||||||
|
assert_eq!(e2.offset, 164);
|
||||||
|
assert_eq!(e2.timestamp, 2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hint_file_nonexistent_returns_none() {
|
||||||
|
let result = KeyDir::load_from_hint_file(Path::new("/tmp/nonexistent_hint_file.hint"));
|
||||||
|
assert!(result.unwrap().is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn for_each_and_keys() {
|
||||||
|
let kd = KeyDir::new();
|
||||||
|
let e = KeyDirEntry {
|
||||||
|
offset: 0,
|
||||||
|
record_len: 10,
|
||||||
|
value_len: 5,
|
||||||
|
timestamp: 1,
|
||||||
|
};
|
||||||
|
kd.insert("x".into(), e);
|
||||||
|
kd.insert("y".into(), e);
|
||||||
|
|
||||||
|
let mut collected = Vec::new();
|
||||||
|
kd.for_each(|k, _| collected.push(k.to_string()));
|
||||||
|
collected.sort();
|
||||||
|
assert_eq!(collected, vec!["x", "y"]);
|
||||||
|
|
||||||
|
let mut keys = kd.keys();
|
||||||
|
keys.sort();
|
||||||
|
assert_eq!(keys, vec!["x", "y"]);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,21 +2,31 @@
|
|||||||
//!
|
//!
|
||||||
//! Provides the [`StorageAdapter`] trait and two concrete implementations:
|
//! Provides the [`StorageAdapter`] trait and two concrete implementations:
|
||||||
//! - [`MemoryStorageAdapter`] -- fast in-memory store backed by `DashMap`
|
//! - [`MemoryStorageAdapter`] -- fast in-memory store backed by `DashMap`
|
||||||
//! - [`FileStorageAdapter`] -- JSON-file-per-collection persistent store
|
//! - [`FileStorageAdapter`] -- Bitcask-style append-only log with crash recovery
|
||||||
//!
|
//!
|
||||||
//! Also includes an [`OpLog`] for operation logging and a [`WriteAheadLog`]
|
//! Also includes an [`OpLog`] for operation logging, a [`BinaryWal`] for
|
||||||
//! for crash recovery.
|
//! write-ahead logging, and [`compaction`] for dead record reclamation.
|
||||||
|
|
||||||
pub mod adapter;
|
pub mod adapter;
|
||||||
|
pub mod binary_wal;
|
||||||
|
pub mod compaction;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod file;
|
pub mod file;
|
||||||
|
pub mod keydir;
|
||||||
pub mod memory;
|
pub mod memory;
|
||||||
pub mod oplog;
|
pub mod oplog;
|
||||||
pub mod wal;
|
pub mod record;
|
||||||
|
pub mod validate;
|
||||||
|
|
||||||
pub use adapter::StorageAdapter;
|
pub use adapter::StorageAdapter;
|
||||||
|
pub use binary_wal::{BinaryWal, WalEntry, WalOpType};
|
||||||
|
pub use compaction::{compact_data_file, should_compact, CompactionResult};
|
||||||
pub use error::{StorageError, StorageResult};
|
pub use error::{StorageError, StorageResult};
|
||||||
pub use file::FileStorageAdapter;
|
pub use file::FileStorageAdapter;
|
||||||
|
pub use keydir::{BuildStats, KeyDir, KeyDirEntry};
|
||||||
pub use memory::MemoryStorageAdapter;
|
pub use memory::MemoryStorageAdapter;
|
||||||
pub use oplog::{OpLog, OpLogEntry, OpLogStats, OpType};
|
pub use oplog::{OpLog, OpLogEntry, OpLogStats, OpType};
|
||||||
pub use wal::{WalOp, WalRecord, WriteAheadLog};
|
pub use record::{
|
||||||
|
DataRecord, FileHeader, FileType, RecordScanner, FILE_HEADER_SIZE, FILE_MAGIC, FORMAT_VERSION,
|
||||||
|
RECORD_HEADER_SIZE, RECORD_MAGIC,
|
||||||
|
};
|
||||||
|
|||||||
@@ -0,0 +1,452 @@
|
|||||||
|
//! Binary data record format for the Bitcask-style storage engine.
|
||||||
|
//!
|
||||||
|
//! # File Version Header (64 bytes, at offset 0 of every .rdb / .hint file)
|
||||||
|
//!
|
||||||
|
//! ```text
|
||||||
|
//! ┌──────────────┬──────────┬──────────┬──────────┬──────────┬───────────────┐
|
||||||
|
//! │ magic │ version │ file_type│ flags │ created │ reserved │
|
||||||
|
//! │ 8 bytes │ u16 LE │ u8 │ u32 LE │ u64 LE │ 41 bytes │
|
||||||
|
//! │ "SMARTDB\0" │ │ │ │ epoch_ms │ (zeros) │
|
||||||
|
//! └──────────────┴──────────┴──────────┴──────────┴──────────┴───────────────┘
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! # Data Record (appended after the header)
|
||||||
|
//!
|
||||||
|
//! ```text
|
||||||
|
//! ┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────────────┐
|
||||||
|
//! │ magic │ timestamp│ key_len │ val_len │ crc32 │ payload │
|
||||||
|
//! │ u16 LE │ u64 LE │ u32 LE │ u32 LE │ u32 LE │ [key][value] │
|
||||||
|
//! │ 0xDB01 │ epoch_ms │ │ 0=delete │ │ │
|
||||||
|
//! └──────────┴──────────┴──────────┴──────────┴──────────┴──────────────────┘
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use std::io::{self, Read};
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
use crate::error::{StorageError, StorageResult};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Constants
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// File-level magic: b"SMARTDB\0"
|
||||||
|
pub const FILE_MAGIC: &[u8; 8] = b"SMARTDB\0";
|
||||||
|
|
||||||
|
/// Current storage format version.
|
||||||
|
pub const FORMAT_VERSION: u16 = 1;
|
||||||
|
|
||||||
|
/// File version header size.
|
||||||
|
pub const FILE_HEADER_SIZE: usize = 64;
|
||||||
|
|
||||||
|
/// Per-record magic.
|
||||||
|
pub const RECORD_MAGIC: u16 = 0xDB01;
|
||||||
|
|
||||||
|
/// Per-record header size (before payload).
|
||||||
|
pub const RECORD_HEADER_SIZE: usize = 2 + 8 + 4 + 4 + 4; // 22 bytes
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// File type tag stored in the version header
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
#[repr(u8)]
|
||||||
|
pub enum FileType {
|
||||||
|
Data = 1,
|
||||||
|
Wal = 2,
|
||||||
|
Hint = 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileType {
|
||||||
|
pub fn from_u8(v: u8) -> StorageResult<Self> {
|
||||||
|
match v {
|
||||||
|
1 => Ok(FileType::Data),
|
||||||
|
2 => Ok(FileType::Wal),
|
||||||
|
3 => Ok(FileType::Hint),
|
||||||
|
_ => Err(StorageError::CorruptRecord(format!(
|
||||||
|
"unknown file type tag: {v}"
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// File Version Header
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct FileHeader {
|
||||||
|
pub version: u16,
|
||||||
|
pub file_type: FileType,
|
||||||
|
pub flags: u32,
|
||||||
|
pub created_ms: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileHeader {
|
||||||
|
/// Create a new header for the current format version.
|
||||||
|
pub fn new(file_type: FileType) -> Self {
|
||||||
|
Self {
|
||||||
|
version: FORMAT_VERSION,
|
||||||
|
file_type,
|
||||||
|
flags: 0,
|
||||||
|
created_ms: now_ms(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Encode the header to a 64-byte buffer.
|
||||||
|
pub fn encode(&self) -> [u8; FILE_HEADER_SIZE] {
|
||||||
|
let mut buf = [0u8; FILE_HEADER_SIZE];
|
||||||
|
buf[0..8].copy_from_slice(FILE_MAGIC);
|
||||||
|
buf[8..10].copy_from_slice(&self.version.to_le_bytes());
|
||||||
|
buf[10] = self.file_type as u8;
|
||||||
|
buf[11..15].copy_from_slice(&self.flags.to_le_bytes());
|
||||||
|
buf[15..23].copy_from_slice(&self.created_ms.to_le_bytes());
|
||||||
|
// bytes 23..64 are reserved (zeros)
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode a 64-byte header. Validates magic and version.
|
||||||
|
pub fn decode(buf: &[u8; FILE_HEADER_SIZE]) -> StorageResult<Self> {
|
||||||
|
if &buf[0..8] != FILE_MAGIC {
|
||||||
|
return Err(StorageError::CorruptRecord(
|
||||||
|
"invalid file magic — not a SmartDB file".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let version = u16::from_le_bytes([buf[8], buf[9]]);
|
||||||
|
if version > FORMAT_VERSION {
|
||||||
|
return Err(StorageError::CorruptRecord(format!(
|
||||||
|
"file format version {version} is newer than supported version {FORMAT_VERSION} — please upgrade"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if version == 0 {
|
||||||
|
return Err(StorageError::CorruptRecord(
|
||||||
|
"file format version 0 is invalid".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let file_type = FileType::from_u8(buf[10])?;
|
||||||
|
let flags = u32::from_le_bytes([buf[11], buf[12], buf[13], buf[14]]);
|
||||||
|
let created_ms = u64::from_le_bytes([
|
||||||
|
buf[15], buf[16], buf[17], buf[18], buf[19], buf[20], buf[21], buf[22],
|
||||||
|
]);
|
||||||
|
Ok(Self {
|
||||||
|
version,
|
||||||
|
file_type,
|
||||||
|
flags,
|
||||||
|
created_ms,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Data Record
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// A single data record (live document or tombstone).
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct DataRecord {
|
||||||
|
pub timestamp: u64,
|
||||||
|
pub key: Vec<u8>,
|
||||||
|
/// BSON value bytes. Empty for tombstones.
|
||||||
|
pub value: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DataRecord {
|
||||||
|
/// Whether this record is a tombstone (delete marker).
|
||||||
|
pub fn is_tombstone(&self) -> bool {
|
||||||
|
self.value.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Total size on disk (header + payload).
|
||||||
|
pub fn disk_size(&self) -> usize {
|
||||||
|
RECORD_HEADER_SIZE + self.key.len() + self.value.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Encode to bytes. CRC32 covers magic + timestamp + key_len + val_len + payload.
|
||||||
|
pub fn encode(&self) -> Vec<u8> {
|
||||||
|
let key_len = self.key.len() as u32;
|
||||||
|
let val_len = self.value.len() as u32;
|
||||||
|
let total = RECORD_HEADER_SIZE + self.key.len() + self.value.len();
|
||||||
|
let mut buf = Vec::with_capacity(total);
|
||||||
|
|
||||||
|
// Write fields WITHOUT crc first to compute checksum.
|
||||||
|
buf.extend_from_slice(&RECORD_MAGIC.to_le_bytes()); // 2
|
||||||
|
buf.extend_from_slice(&self.timestamp.to_le_bytes()); // 8
|
||||||
|
buf.extend_from_slice(&key_len.to_le_bytes()); // 4
|
||||||
|
buf.extend_from_slice(&val_len.to_le_bytes()); // 4
|
||||||
|
// placeholder for crc32 — we'll fill it after computing
|
||||||
|
buf.extend_from_slice(&0u32.to_le_bytes()); // 4
|
||||||
|
buf.extend_from_slice(&self.key); // key_len
|
||||||
|
buf.extend_from_slice(&self.value); // val_len
|
||||||
|
|
||||||
|
// CRC covers everything except the crc32 field itself:
|
||||||
|
// bytes [0..18] (magic+ts+key_len+val_len) + bytes [22..] (payload)
|
||||||
|
let mut hasher = crc32fast::Hasher::new();
|
||||||
|
hasher.update(&buf[0..18]);
|
||||||
|
hasher.update(&buf[22..]);
|
||||||
|
let crc = hasher.finalize();
|
||||||
|
buf[18..22].copy_from_slice(&crc.to_le_bytes());
|
||||||
|
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode a record from a reader. Returns the record and its total disk size.
|
||||||
|
/// On EOF at the very start (no bytes to read), returns Ok(None).
|
||||||
|
pub fn decode_from<R: Read>(reader: &mut R) -> StorageResult<Option<(Self, usize)>> {
|
||||||
|
// Read header
|
||||||
|
let mut hdr = [0u8; RECORD_HEADER_SIZE];
|
||||||
|
match reader.read_exact(&mut hdr) {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
|
||||||
|
let magic = u16::from_le_bytes([hdr[0], hdr[1]]);
|
||||||
|
if magic != RECORD_MAGIC {
|
||||||
|
return Err(StorageError::CorruptRecord(format!(
|
||||||
|
"invalid record magic: 0x{magic:04X}, expected 0x{RECORD_MAGIC:04X}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let timestamp = u64::from_le_bytes(hdr[2..10].try_into().unwrap());
|
||||||
|
let key_len = u32::from_le_bytes(hdr[10..14].try_into().unwrap()) as usize;
|
||||||
|
let val_len = u32::from_le_bytes(hdr[14..18].try_into().unwrap()) as usize;
|
||||||
|
let stored_crc = u32::from_le_bytes(hdr[18..22].try_into().unwrap());
|
||||||
|
|
||||||
|
// Read payload
|
||||||
|
let payload_len = key_len + val_len;
|
||||||
|
let mut payload = vec![0u8; payload_len];
|
||||||
|
reader.read_exact(&mut payload)?;
|
||||||
|
|
||||||
|
// Verify CRC: covers header bytes [0..18] + payload
|
||||||
|
let mut hasher = crc32fast::Hasher::new();
|
||||||
|
hasher.update(&hdr[0..18]);
|
||||||
|
hasher.update(&payload);
|
||||||
|
let computed_crc = hasher.finalize();
|
||||||
|
if computed_crc != stored_crc {
|
||||||
|
return Err(StorageError::ChecksumMismatch {
|
||||||
|
expected: stored_crc,
|
||||||
|
actual: computed_crc,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let key = payload[..key_len].to_vec();
|
||||||
|
let value = payload[key_len..].to_vec();
|
||||||
|
let disk_size = RECORD_HEADER_SIZE + payload_len;
|
||||||
|
|
||||||
|
Ok(Some((
|
||||||
|
DataRecord {
|
||||||
|
timestamp,
|
||||||
|
key,
|
||||||
|
value,
|
||||||
|
},
|
||||||
|
disk_size,
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Record Scanner — iterate records from a byte slice or reader
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Scans records sequentially from a reader, yielding (offset, record) pairs.
|
||||||
|
/// Starts reading from the current reader position. The `base_offset` parameter
|
||||||
|
/// indicates the byte offset in the file where reading begins (typically
|
||||||
|
/// `FILE_HEADER_SIZE` for a data file).
|
||||||
|
pub struct RecordScanner<R> {
|
||||||
|
reader: R,
|
||||||
|
offset: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: Read> RecordScanner<R> {
|
||||||
|
pub fn new(reader: R, base_offset: u64) -> Self {
|
||||||
|
Self {
|
||||||
|
reader,
|
||||||
|
offset: base_offset,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: Read> Iterator for RecordScanner<R> {
|
||||||
|
/// (file_offset, record) or an error. Iteration stops on EOF or error.
|
||||||
|
type Item = StorageResult<(u64, DataRecord)>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match DataRecord::decode_from(&mut self.reader) {
|
||||||
|
Ok(Some((record, disk_size))) => {
|
||||||
|
let offset = self.offset;
|
||||||
|
self.offset += disk_size as u64;
|
||||||
|
Some(Ok((offset, record)))
|
||||||
|
}
|
||||||
|
Ok(None) => None, // clean EOF
|
||||||
|
Err(e) => Some(Err(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Current time in milliseconds since UNIX epoch.
|
||||||
|
pub fn now_ms() -> u64 {
|
||||||
|
SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_millis() as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn file_header_roundtrip() {
|
||||||
|
let hdr = FileHeader::new(FileType::Data);
|
||||||
|
let buf = hdr.encode();
|
||||||
|
assert_eq!(buf.len(), FILE_HEADER_SIZE);
|
||||||
|
|
||||||
|
let decoded = FileHeader::decode(&buf).unwrap();
|
||||||
|
assert_eq!(decoded.version, FORMAT_VERSION);
|
||||||
|
assert_eq!(decoded.file_type, FileType::Data);
|
||||||
|
assert_eq!(decoded.flags, 0);
|
||||||
|
assert_eq!(decoded.created_ms, hdr.created_ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn file_header_rejects_bad_magic() {
|
||||||
|
let mut buf = [0u8; FILE_HEADER_SIZE];
|
||||||
|
buf[0..8].copy_from_slice(b"BADMAGIC");
|
||||||
|
assert!(FileHeader::decode(&buf).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn file_header_rejects_future_version() {
|
||||||
|
let mut hdr = FileHeader::new(FileType::Data);
|
||||||
|
hdr.version = FORMAT_VERSION + 1;
|
||||||
|
let buf = hdr.encode();
|
||||||
|
// Manually patch the version in the buffer
|
||||||
|
let mut buf2 = buf;
|
||||||
|
buf2[8..10].copy_from_slice(&(FORMAT_VERSION + 1).to_le_bytes());
|
||||||
|
assert!(FileHeader::decode(&buf2).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn record_roundtrip_live() {
|
||||||
|
let rec = DataRecord {
|
||||||
|
timestamp: 1700000000000,
|
||||||
|
key: b"abc123".to_vec(),
|
||||||
|
value: b"\x10\x00\x00\x00\x02hi\x00\x03\x00\x00\x00ok\x00\x00".to_vec(),
|
||||||
|
};
|
||||||
|
let encoded = rec.encode();
|
||||||
|
assert_eq!(encoded.len(), rec.disk_size());
|
||||||
|
|
||||||
|
let mut cursor = std::io::Cursor::new(&encoded);
|
||||||
|
let (decoded, size) = DataRecord::decode_from(&mut cursor).unwrap().unwrap();
|
||||||
|
assert_eq!(size, encoded.len());
|
||||||
|
assert_eq!(decoded.timestamp, rec.timestamp);
|
||||||
|
assert_eq!(decoded.key, rec.key);
|
||||||
|
assert_eq!(decoded.value, rec.value);
|
||||||
|
assert!(!decoded.is_tombstone());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn record_roundtrip_tombstone() {
|
||||||
|
let rec = DataRecord {
|
||||||
|
timestamp: 1700000000000,
|
||||||
|
key: b"def456".to_vec(),
|
||||||
|
value: vec![],
|
||||||
|
};
|
||||||
|
assert!(rec.is_tombstone());
|
||||||
|
let encoded = rec.encode();
|
||||||
|
|
||||||
|
let mut cursor = std::io::Cursor::new(&encoded);
|
||||||
|
let (decoded, _) = DataRecord::decode_from(&mut cursor).unwrap().unwrap();
|
||||||
|
assert!(decoded.is_tombstone());
|
||||||
|
assert_eq!(decoded.key, b"def456");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn record_detects_corruption() {
|
||||||
|
let rec = DataRecord {
|
||||||
|
timestamp: 42,
|
||||||
|
key: b"key".to_vec(),
|
||||||
|
value: b"value".to_vec(),
|
||||||
|
};
|
||||||
|
let mut encoded = rec.encode();
|
||||||
|
// Flip a bit in the payload
|
||||||
|
let last = encoded.len() - 1;
|
||||||
|
encoded[last] ^= 0xFF;
|
||||||
|
|
||||||
|
let mut cursor = std::io::Cursor::new(&encoded);
|
||||||
|
let result = DataRecord::decode_from(&mut cursor);
|
||||||
|
assert!(matches!(result, Err(StorageError::ChecksumMismatch { .. })));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn record_detects_bad_magic() {
|
||||||
|
let rec = DataRecord {
|
||||||
|
timestamp: 42,
|
||||||
|
key: b"key".to_vec(),
|
||||||
|
value: b"value".to_vec(),
|
||||||
|
};
|
||||||
|
let mut encoded = rec.encode();
|
||||||
|
encoded[0] = 0xFF;
|
||||||
|
encoded[1] = 0xFF;
|
||||||
|
|
||||||
|
let mut cursor = std::io::Cursor::new(&encoded);
|
||||||
|
let result = DataRecord::decode_from(&mut cursor);
|
||||||
|
assert!(matches!(result, Err(StorageError::CorruptRecord(_))));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn eof_returns_none() {
|
||||||
|
let empty: &[u8] = &[];
|
||||||
|
let mut cursor = std::io::Cursor::new(empty);
|
||||||
|
let result = DataRecord::decode_from(&mut cursor).unwrap();
|
||||||
|
assert!(result.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn scanner_iterates_multiple_records() {
|
||||||
|
let records = vec![
|
||||||
|
DataRecord {
|
||||||
|
timestamp: 1,
|
||||||
|
key: b"a".to_vec(),
|
||||||
|
value: b"v1".to_vec(),
|
||||||
|
},
|
||||||
|
DataRecord {
|
||||||
|
timestamp: 2,
|
||||||
|
key: b"b".to_vec(),
|
||||||
|
value: b"v2".to_vec(),
|
||||||
|
},
|
||||||
|
DataRecord {
|
||||||
|
timestamp: 3,
|
||||||
|
key: b"c".to_vec(),
|
||||||
|
value: vec![],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
for r in &records {
|
||||||
|
buf.extend_from_slice(&r.encode());
|
||||||
|
}
|
||||||
|
|
||||||
|
let scanner = RecordScanner::new(std::io::Cursor::new(&buf), 0);
|
||||||
|
let results: Vec<_> = scanner.collect::<Result<Vec<_>, _>>().unwrap();
|
||||||
|
assert_eq!(results.len(), 3);
|
||||||
|
assert_eq!(results[0].1.key, b"a");
|
||||||
|
assert_eq!(results[1].1.key, b"b");
|
||||||
|
assert!(results[2].1.is_tombstone());
|
||||||
|
|
||||||
|
// Verify offsets are correct
|
||||||
|
assert_eq!(results[0].0, 0);
|
||||||
|
assert_eq!(results[1].0, records[0].disk_size() as u64);
|
||||||
|
assert_eq!(
|
||||||
|
results[2].0,
|
||||||
|
(records[0].disk_size() + records[1].disk_size()) as u64
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,324 @@
|
|||||||
|
//! Data integrity validation for RustDb storage directories.
|
||||||
|
//!
|
||||||
|
//! Provides offline validation of data files without starting the server.
|
||||||
|
//! Checks header magic, record CRC32 checksums, duplicate IDs, and
|
||||||
|
//! keydir.hint consistency.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::io::{BufReader, Read};
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use crate::error::{StorageError, StorageResult};
|
||||||
|
use crate::keydir::KeyDir;
|
||||||
|
use crate::record::{FileHeader, FileType, RecordScanner, FILE_HEADER_SIZE};
|
||||||
|
|
||||||
|
/// Result of validating an entire data directory.
|
||||||
|
pub struct ValidationReport {
|
||||||
|
pub collections: Vec<CollectionReport>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Result of validating a single collection.
|
||||||
|
pub struct CollectionReport {
|
||||||
|
pub db: String,
|
||||||
|
pub collection: String,
|
||||||
|
pub header_valid: bool,
|
||||||
|
pub total_records: u64,
|
||||||
|
pub live_documents: u64,
|
||||||
|
pub tombstones: u64,
|
||||||
|
pub duplicate_ids: Vec<String>,
|
||||||
|
pub checksum_errors: u64,
|
||||||
|
pub decode_errors: u64,
|
||||||
|
pub data_file_size: u64,
|
||||||
|
pub hint_file_exists: bool,
|
||||||
|
pub orphaned_hint_entries: u64,
|
||||||
|
pub errors: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ValidationReport {
|
||||||
|
/// Whether any errors were found across all collections.
|
||||||
|
pub fn has_errors(&self) -> bool {
|
||||||
|
self.collections.iter().any(|c| {
|
||||||
|
!c.header_valid
|
||||||
|
|| !c.duplicate_ids.is_empty()
|
||||||
|
|| c.checksum_errors > 0
|
||||||
|
|| c.decode_errors > 0
|
||||||
|
|| c.orphaned_hint_entries > 0
|
||||||
|
|| !c.errors.is_empty()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print a human-readable summary to stdout.
|
||||||
|
pub fn print_summary(&self) {
|
||||||
|
println!("=== SmartDB Data Integrity Report ===");
|
||||||
|
println!();
|
||||||
|
|
||||||
|
let mut total_errors = 0u64;
|
||||||
|
|
||||||
|
for report in &self.collections {
|
||||||
|
println!("Database: {}", report.db);
|
||||||
|
println!(" Collection: {}", report.collection);
|
||||||
|
println!(
|
||||||
|
" Header: {}",
|
||||||
|
if report.header_valid { "OK" } else { "INVALID" }
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
" Records: {} ({} live, {} tombstones)",
|
||||||
|
report.total_records, report.live_documents, report.tombstones
|
||||||
|
);
|
||||||
|
println!(" Data size: {} bytes", report.data_file_size);
|
||||||
|
|
||||||
|
if report.duplicate_ids.is_empty() {
|
||||||
|
println!(" Duplicates: 0");
|
||||||
|
} else {
|
||||||
|
let ids_preview: Vec<&str> = report.duplicate_ids.iter().take(5).map(|s| s.as_str()).collect();
|
||||||
|
let suffix = if report.duplicate_ids.len() > 5 {
|
||||||
|
format!(", ... and {} more", report.duplicate_ids.len() - 5)
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
println!(
|
||||||
|
" Duplicates: {} (ids: {}{})",
|
||||||
|
report.duplicate_ids.len(),
|
||||||
|
ids_preview.join(", "),
|
||||||
|
suffix
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if report.checksum_errors > 0 {
|
||||||
|
println!(" CRC errors: {}", report.checksum_errors);
|
||||||
|
} else {
|
||||||
|
println!(" CRC errors: 0");
|
||||||
|
}
|
||||||
|
|
||||||
|
if report.decode_errors > 0 {
|
||||||
|
println!(" Decode errors: {}", report.decode_errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
if report.hint_file_exists {
|
||||||
|
if report.orphaned_hint_entries > 0 {
|
||||||
|
println!(
|
||||||
|
" Hint file: STALE ({} orphaned entries)",
|
||||||
|
report.orphaned_hint_entries
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!(" Hint file: OK");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println!(" Hint file: absent");
|
||||||
|
}
|
||||||
|
|
||||||
|
for err in &report.errors {
|
||||||
|
println!(" ERROR: {}", err);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!();
|
||||||
|
|
||||||
|
if !report.header_valid { total_errors += 1; }
|
||||||
|
total_errors += report.duplicate_ids.len() as u64;
|
||||||
|
total_errors += report.checksum_errors;
|
||||||
|
total_errors += report.decode_errors;
|
||||||
|
total_errors += report.orphaned_hint_entries;
|
||||||
|
total_errors += report.errors.len() as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Summary: {} collection(s) checked, {} error(s) found.",
|
||||||
|
self.collections.len(),
|
||||||
|
total_errors
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate all collections in a data directory.
|
||||||
|
///
|
||||||
|
/// The directory structure is expected to be:
|
||||||
|
/// ```text
|
||||||
|
/// {base_path}/{db}/{collection}/data.rdb
|
||||||
|
/// ```
|
||||||
|
pub fn validate_data_directory(base_path: &str) -> StorageResult<ValidationReport> {
|
||||||
|
let base = Path::new(base_path);
|
||||||
|
if !base.exists() {
|
||||||
|
return Err(StorageError::IoError(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::NotFound,
|
||||||
|
format!("data directory not found: {base_path}"),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut collections = Vec::new();
|
||||||
|
|
||||||
|
// Iterate database directories
|
||||||
|
let entries = std::fs::read_dir(base)?;
|
||||||
|
for entry in entries {
|
||||||
|
let entry = entry?;
|
||||||
|
if !entry.file_type()?.is_dir() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let db_name = match entry.file_name().to_str() {
|
||||||
|
Some(s) => s.to_string(),
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Iterate collection directories
|
||||||
|
let db_entries = std::fs::read_dir(entry.path())?;
|
||||||
|
for coll_entry in db_entries {
|
||||||
|
let coll_entry = coll_entry?;
|
||||||
|
if !coll_entry.file_type()?.is_dir() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let coll_name = match coll_entry.file_name().to_str() {
|
||||||
|
Some(s) => s.to_string(),
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let data_path = coll_entry.path().join("data.rdb");
|
||||||
|
if !data_path.exists() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let report = validate_collection(&db_name, &coll_name, &coll_entry.path());
|
||||||
|
collections.push(report);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort for deterministic output
|
||||||
|
collections.sort_by(|a, b| (&a.db, &a.collection).cmp(&(&b.db, &b.collection)));
|
||||||
|
|
||||||
|
Ok(ValidationReport { collections })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate a single collection directory.
|
||||||
|
fn validate_collection(db: &str, coll: &str, coll_dir: &Path) -> CollectionReport {
|
||||||
|
let data_path = coll_dir.join("data.rdb");
|
||||||
|
let hint_path = coll_dir.join("keydir.hint");
|
||||||
|
|
||||||
|
let mut report = CollectionReport {
|
||||||
|
db: db.to_string(),
|
||||||
|
collection: coll.to_string(),
|
||||||
|
header_valid: false,
|
||||||
|
total_records: 0,
|
||||||
|
live_documents: 0,
|
||||||
|
tombstones: 0,
|
||||||
|
duplicate_ids: Vec::new(),
|
||||||
|
checksum_errors: 0,
|
||||||
|
decode_errors: 0,
|
||||||
|
data_file_size: 0,
|
||||||
|
hint_file_exists: hint_path.exists(),
|
||||||
|
orphaned_hint_entries: 0,
|
||||||
|
errors: Vec::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Get file size
|
||||||
|
match std::fs::metadata(&data_path) {
|
||||||
|
Ok(m) => report.data_file_size = m.len(),
|
||||||
|
Err(e) => {
|
||||||
|
report.errors.push(format!("cannot stat data.rdb: {e}"));
|
||||||
|
return report;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open and validate header
|
||||||
|
let file = match std::fs::File::open(&data_path) {
|
||||||
|
Ok(f) => f,
|
||||||
|
Err(e) => {
|
||||||
|
report.errors.push(format!("cannot open data.rdb: {e}"));
|
||||||
|
return report;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let mut reader = BufReader::new(file);
|
||||||
|
|
||||||
|
let mut hdr_buf = [0u8; FILE_HEADER_SIZE];
|
||||||
|
if let Err(e) = reader.read_exact(&mut hdr_buf) {
|
||||||
|
report.errors.push(format!("cannot read header: {e}"));
|
||||||
|
return report;
|
||||||
|
}
|
||||||
|
|
||||||
|
match FileHeader::decode(&hdr_buf) {
|
||||||
|
Ok(hdr) => {
|
||||||
|
if hdr.file_type != FileType::Data {
|
||||||
|
report.errors.push(format!(
|
||||||
|
"wrong file type: expected Data, got {:?}",
|
||||||
|
hdr.file_type
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
report.header_valid = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
report.errors.push(format!("invalid header: {e}"));
|
||||||
|
return report;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan all records
|
||||||
|
let mut id_counts: HashMap<String, u64> = HashMap::new();
|
||||||
|
let mut live_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||||
|
let scanner = RecordScanner::new(reader, FILE_HEADER_SIZE as u64);
|
||||||
|
|
||||||
|
for result in scanner {
|
||||||
|
match result {
|
||||||
|
Ok((_offset, record)) => {
|
||||||
|
report.total_records += 1;
|
||||||
|
let key = String::from_utf8_lossy(&record.key).to_string();
|
||||||
|
|
||||||
|
if record.is_tombstone() {
|
||||||
|
report.tombstones += 1;
|
||||||
|
live_ids.remove(&key);
|
||||||
|
} else {
|
||||||
|
*id_counts.entry(key.clone()).or_insert(0) += 1;
|
||||||
|
live_ids.insert(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let err_str = e.to_string();
|
||||||
|
if err_str.contains("checksum") || err_str.contains("Checksum") {
|
||||||
|
report.checksum_errors += 1;
|
||||||
|
} else {
|
||||||
|
report.decode_errors += 1;
|
||||||
|
}
|
||||||
|
// Cannot continue scanning after a decode error — the stream position is lost
|
||||||
|
report.errors.push(format!("record decode error: {e}"));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
report.live_documents = live_ids.len() as u64;
|
||||||
|
|
||||||
|
// Find duplicates (keys that appeared more than once as live inserts)
|
||||||
|
for (id, count) in &id_counts {
|
||||||
|
if *count > 1 {
|
||||||
|
report.duplicate_ids.push(id.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
report.duplicate_ids.sort();
|
||||||
|
|
||||||
|
// Validate hint file if present
|
||||||
|
if hint_path.exists() {
|
||||||
|
match KeyDir::load_from_hint_file(&hint_path) {
|
||||||
|
Ok(Some(hint_kd)) => {
|
||||||
|
// Check for orphaned entries: keys in hint but not live in data
|
||||||
|
hint_kd.for_each(|key, _entry| {
|
||||||
|
if !live_ids.contains(key) {
|
||||||
|
report.orphaned_hint_entries += 1;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Also check if hint references offsets beyond file size
|
||||||
|
hint_kd.for_each(|_key, entry| {
|
||||||
|
if entry.offset + entry.record_len as u64 > report.data_file_size {
|
||||||
|
report.orphaned_hint_entries += 1;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
// File existed but was empty or unreadable
|
||||||
|
report.errors.push("hint file exists but is empty".into());
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
report.errors.push(format!("hint file decode error: {e}"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
report
|
||||||
|
}
|
||||||
@@ -1,186 +0,0 @@
|
|||||||
//! Write-Ahead Log (WAL) for crash recovery.
|
|
||||||
//!
|
|
||||||
//! Before any mutation is applied to storage, it is first written to the WAL.
|
|
||||||
//! On recovery, uncommitted WAL entries can be replayed or discarded.
|
|
||||||
|
|
||||||
use std::path::PathBuf;
|
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
|
||||||
|
|
||||||
use bson::Document;
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use tokio::io::AsyncWriteExt;
|
|
||||||
use tracing::{debug, warn};
|
|
||||||
|
|
||||||
use crate::error::StorageResult;
|
|
||||||
|
|
||||||
/// WAL operation kind.
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
pub enum WalOp {
|
|
||||||
Insert,
|
|
||||||
Update,
|
|
||||||
Delete,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A single WAL record.
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct WalRecord {
|
|
||||||
/// Sequence number.
|
|
||||||
pub seq: u64,
|
|
||||||
/// Operation kind.
|
|
||||||
pub op: WalOp,
|
|
||||||
/// Database name.
|
|
||||||
pub db: String,
|
|
||||||
/// Collection name.
|
|
||||||
pub collection: String,
|
|
||||||
/// Document id (hex string).
|
|
||||||
pub document_id: String,
|
|
||||||
/// Document data (for insert/update).
|
|
||||||
pub document: Option<Document>,
|
|
||||||
/// Whether this record has been committed (applied to storage).
|
|
||||||
pub committed: bool,
|
|
||||||
/// CRC32 checksum of the serialized payload for integrity verification.
|
|
||||||
pub checksum: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write-ahead log that persists records to a file.
|
|
||||||
pub struct WriteAheadLog {
|
|
||||||
path: PathBuf,
|
|
||||||
next_seq: AtomicU64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl WriteAheadLog {
|
|
||||||
/// Create a new WAL at the given file path.
|
|
||||||
pub fn new(path: PathBuf) -> Self {
|
|
||||||
Self {
|
|
||||||
path,
|
|
||||||
next_seq: AtomicU64::new(1),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Initialize the WAL (create file if needed, load sequence counter).
|
|
||||||
pub async fn initialize(&self) -> StorageResult<()> {
|
|
||||||
if let Some(parent) = self.path.parent() {
|
|
||||||
tokio::fs::create_dir_all(parent).await?;
|
|
||||||
}
|
|
||||||
if self.path.exists() {
|
|
||||||
// Load existing records to find the max sequence number.
|
|
||||||
let records = self.read_all().await?;
|
|
||||||
if let Some(max_seq) = records.iter().map(|r| r.seq).max() {
|
|
||||||
self.next_seq.store(max_seq + 1, Ordering::SeqCst);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
debug!("WAL initialized at {:?}", self.path);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Append a record to the WAL. Returns the sequence number.
|
|
||||||
pub async fn append(
|
|
||||||
&self,
|
|
||||||
op: WalOp,
|
|
||||||
db: &str,
|
|
||||||
collection: &str,
|
|
||||||
document_id: &str,
|
|
||||||
document: Option<Document>,
|
|
||||||
) -> StorageResult<u64> {
|
|
||||||
let seq = self.next_seq.fetch_add(1, Ordering::SeqCst);
|
|
||||||
|
|
||||||
// Compute checksum over the payload.
|
|
||||||
let payload = serde_json::json!({
|
|
||||||
"op": op,
|
|
||||||
"db": db,
|
|
||||||
"collection": collection,
|
|
||||||
"document_id": document_id,
|
|
||||||
});
|
|
||||||
let payload_bytes = serde_json::to_vec(&payload)?;
|
|
||||||
let checksum = crc32fast::hash(&payload_bytes);
|
|
||||||
|
|
||||||
let record = WalRecord {
|
|
||||||
seq,
|
|
||||||
op,
|
|
||||||
db: db.to_string(),
|
|
||||||
collection: collection.to_string(),
|
|
||||||
document_id: document_id.to_string(),
|
|
||||||
document,
|
|
||||||
committed: false,
|
|
||||||
checksum,
|
|
||||||
};
|
|
||||||
|
|
||||||
let line = serde_json::to_string(&record)?;
|
|
||||||
let mut file = tokio::fs::OpenOptions::new()
|
|
||||||
.create(true)
|
|
||||||
.append(true)
|
|
||||||
.open(&self.path)
|
|
||||||
.await?;
|
|
||||||
file.write_all(line.as_bytes()).await?;
|
|
||||||
file.write_all(b"\n").await?;
|
|
||||||
file.flush().await?;
|
|
||||||
|
|
||||||
Ok(seq)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Mark a WAL record as committed by rewriting the file.
|
|
||||||
pub async fn mark_committed(&self, seq: u64) -> StorageResult<()> {
|
|
||||||
let mut records = self.read_all().await?;
|
|
||||||
for record in &mut records {
|
|
||||||
if record.seq == seq {
|
|
||||||
record.committed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.write_all(&records).await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Read all WAL records.
|
|
||||||
pub async fn read_all(&self) -> StorageResult<Vec<WalRecord>> {
|
|
||||||
if !self.path.exists() {
|
|
||||||
return Ok(vec![]);
|
|
||||||
}
|
|
||||||
let data = tokio::fs::read_to_string(&self.path).await?;
|
|
||||||
let mut records = Vec::new();
|
|
||||||
for line in data.lines() {
|
|
||||||
if line.trim().is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match serde_json::from_str::<WalRecord>(line) {
|
|
||||||
Ok(record) => records.push(record),
|
|
||||||
Err(e) => {
|
|
||||||
warn!("skipping corrupt WAL record: {e}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(records)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get all uncommitted records (for replay during recovery).
|
|
||||||
pub async fn uncommitted(&self) -> StorageResult<Vec<WalRecord>> {
|
|
||||||
let records = self.read_all().await?;
|
|
||||||
Ok(records.into_iter().filter(|r| !r.committed).collect())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Truncate the WAL, removing all committed records.
|
|
||||||
pub async fn truncate_committed(&self) -> StorageResult<()> {
|
|
||||||
let records = self.read_all().await?;
|
|
||||||
let uncommitted: Vec<_> = records.into_iter().filter(|r| !r.committed).collect();
|
|
||||||
self.write_all(&uncommitted).await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Clear the entire WAL.
|
|
||||||
pub async fn clear(&self) -> StorageResult<()> {
|
|
||||||
if self.path.exists() {
|
|
||||||
tokio::fs::write(&self.path, "").await?;
|
|
||||||
}
|
|
||||||
self.next_seq.store(1, Ordering::SeqCst);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write all records to the WAL file (overwrites).
|
|
||||||
async fn write_all(&self, records: &[WalRecord]) -> StorageResult<()> {
|
|
||||||
let mut content = String::new();
|
|
||||||
for record in records {
|
|
||||||
let line = serde_json::to_string(record)?;
|
|
||||||
content.push_str(&line);
|
|
||||||
content.push('\n');
|
|
||||||
}
|
|
||||||
tokio::fs::write(&self.path, content).await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
pub mod management;
|
pub mod management;
|
||||||
|
|
||||||
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use dashmap::DashMap;
|
use dashmap::DashMap;
|
||||||
@@ -14,7 +16,7 @@ use rustdb_config::{RustDbOptions, StorageType};
|
|||||||
use rustdb_wire::{WireCodec, OP_QUERY};
|
use rustdb_wire::{WireCodec, OP_QUERY};
|
||||||
use rustdb_wire::{encode_op_msg_response, encode_op_reply_response};
|
use rustdb_wire::{encode_op_msg_response, encode_op_reply_response};
|
||||||
use rustdb_storage::{StorageAdapter, MemoryStorageAdapter, FileStorageAdapter, OpLog};
|
use rustdb_storage::{StorageAdapter, MemoryStorageAdapter, FileStorageAdapter, OpLog};
|
||||||
// IndexEngine is used indirectly via CommandContext
|
use rustdb_index::{IndexEngine, IndexOptions};
|
||||||
use rustdb_txn::{TransactionEngine, SessionEngine};
|
use rustdb_txn::{TransactionEngine, SessionEngine};
|
||||||
use rustdb_commands::{CommandRouter, CommandContext};
|
use rustdb_commands::{CommandRouter, CommandContext};
|
||||||
|
|
||||||
@@ -33,7 +35,16 @@ impl RustDb {
|
|||||||
// Create storage adapter
|
// Create storage adapter
|
||||||
let storage: Arc<dyn StorageAdapter> = match options.storage {
|
let storage: Arc<dyn StorageAdapter> = match options.storage {
|
||||||
StorageType::Memory => {
|
StorageType::Memory => {
|
||||||
let adapter = MemoryStorageAdapter::new();
|
let adapter = if let Some(ref pp) = options.persist_path {
|
||||||
|
tracing::info!("MemoryStorageAdapter with periodic persistence to {}", pp);
|
||||||
|
MemoryStorageAdapter::with_persist_path(PathBuf::from(pp))
|
||||||
|
} else {
|
||||||
|
tracing::warn!(
|
||||||
|
"SmartDB is using in-memory storage — data will NOT survive a restart. \
|
||||||
|
Set storage to 'file' for durable persistence."
|
||||||
|
);
|
||||||
|
MemoryStorageAdapter::new()
|
||||||
|
};
|
||||||
Arc::new(adapter)
|
Arc::new(adapter)
|
||||||
}
|
}
|
||||||
StorageType::File => {
|
StorageType::File => {
|
||||||
@@ -49,9 +60,99 @@ impl RustDb {
|
|||||||
// Initialize storage
|
// Initialize storage
|
||||||
storage.initialize().await?;
|
storage.initialize().await?;
|
||||||
|
|
||||||
|
// Restore any previously persisted state (no-op for file storage and
|
||||||
|
// memory storage without a persist_path).
|
||||||
|
storage.restore().await?;
|
||||||
|
|
||||||
|
// Spawn periodic persistence task for memory storage with persist_path.
|
||||||
|
if options.storage == StorageType::Memory && options.persist_path.is_some() {
|
||||||
|
let persist_storage = storage.clone();
|
||||||
|
let interval_ms = options.persist_interval_ms;
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut interval = tokio::time::interval(Duration::from_millis(interval_ms));
|
||||||
|
interval.tick().await; // skip the immediate first tick
|
||||||
|
loop {
|
||||||
|
interval.tick().await;
|
||||||
|
if let Err(e) = persist_storage.persist().await {
|
||||||
|
tracing::error!("Periodic persist failed: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let indexes: Arc<DashMap<String, IndexEngine>> = Arc::new(DashMap::new());
|
||||||
|
|
||||||
|
// Restore persisted indexes from storage.
|
||||||
|
if let Ok(databases) = storage.list_databases().await {
|
||||||
|
for db_name in &databases {
|
||||||
|
if let Ok(collections) = storage.list_collections(db_name).await {
|
||||||
|
for coll_name in &collections {
|
||||||
|
if let Ok(specs) = storage.get_indexes(db_name, coll_name).await {
|
||||||
|
let has_custom = specs.iter().any(|s| {
|
||||||
|
s.get_str("name").unwrap_or("_id_") != "_id_"
|
||||||
|
});
|
||||||
|
if !has_custom {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ns_key = format!("{}.{}", db_name, coll_name);
|
||||||
|
let mut engine = IndexEngine::new();
|
||||||
|
|
||||||
|
for spec in &specs {
|
||||||
|
let name = spec.get_str("name").unwrap_or("").to_string();
|
||||||
|
if name == "_id_" {
|
||||||
|
continue; // already created by IndexEngine::new()
|
||||||
|
}
|
||||||
|
let key = match spec.get("key") {
|
||||||
|
Some(bson::Bson::Document(k)) => k.clone(),
|
||||||
|
_ => continue,
|
||||||
|
};
|
||||||
|
let unique = matches!(spec.get("unique"), Some(bson::Bson::Boolean(true)));
|
||||||
|
let sparse = matches!(spec.get("sparse"), Some(bson::Bson::Boolean(true)));
|
||||||
|
let expire_after_seconds = match spec.get("expireAfterSeconds") {
|
||||||
|
Some(bson::Bson::Int32(n)) => Some(*n as u64),
|
||||||
|
Some(bson::Bson::Int64(n)) => Some(*n as u64),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let options = IndexOptions {
|
||||||
|
name: Some(name.clone()),
|
||||||
|
unique,
|
||||||
|
sparse,
|
||||||
|
expire_after_seconds,
|
||||||
|
};
|
||||||
|
if let Err(e) = engine.create_index(key, options) {
|
||||||
|
tracing::warn!(
|
||||||
|
namespace = %ns_key,
|
||||||
|
index = %name,
|
||||||
|
error = %e,
|
||||||
|
"failed to restore index"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rebuild index data from existing documents.
|
||||||
|
if let Ok(docs) = storage.find_all(db_name, coll_name).await {
|
||||||
|
if !docs.is_empty() {
|
||||||
|
engine.rebuild_from_documents(&docs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
namespace = %ns_key,
|
||||||
|
indexes = engine.list_indexes().len(),
|
||||||
|
"restored indexes"
|
||||||
|
);
|
||||||
|
indexes.insert(ns_key, engine);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let ctx = Arc::new(CommandContext {
|
let ctx = Arc::new(CommandContext {
|
||||||
storage,
|
storage,
|
||||||
indexes: Arc::new(DashMap::new()),
|
indexes,
|
||||||
transactions: Arc::new(TransactionEngine::new()),
|
transactions: Arc::new(TransactionEngine::new()),
|
||||||
sessions: Arc::new(SessionEngine::new(30 * 60 * 1000, 60 * 1000)),
|
sessions: Arc::new(SessionEngine::new(30 * 60 * 1000, 60 * 1000)),
|
||||||
cursors: Arc::new(DashMap::new()),
|
cursors: Arc::new(DashMap::new()),
|
||||||
|
|||||||
@@ -25,6 +25,10 @@ struct Cli {
|
|||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
validate: bool,
|
validate: bool,
|
||||||
|
|
||||||
|
/// Validate data integrity of a storage directory (offline check)
|
||||||
|
#[arg(long, value_name = "PATH")]
|
||||||
|
validate_data: Option<String>,
|
||||||
|
|
||||||
/// Run in management mode (JSON-over-stdin IPC for TypeScript wrapper)
|
/// Run in management mode (JSON-over-stdin IPC for TypeScript wrapper)
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
management: bool,
|
management: bool,
|
||||||
@@ -55,7 +59,7 @@ async fn main() -> Result<()> {
|
|||||||
let options = RustDbOptions::from_file(&cli.config)
|
let options = RustDbOptions::from_file(&cli.config)
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to load config '{}': {}", cli.config, e))?;
|
.map_err(|e| anyhow::anyhow!("Failed to load config '{}': {}", cli.config, e))?;
|
||||||
|
|
||||||
// Validate-only mode
|
// Validate-only mode (config)
|
||||||
if cli.validate {
|
if cli.validate {
|
||||||
match options.validate() {
|
match options.validate() {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
@@ -69,6 +73,18 @@ async fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate data integrity mode
|
||||||
|
if let Some(ref data_path) = cli.validate_data {
|
||||||
|
tracing::info!("Validating data integrity at {}", data_path);
|
||||||
|
let report = rustdb_storage::validate::validate_data_directory(data_path)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Validation failed: {}", e))?;
|
||||||
|
report.print_summary();
|
||||||
|
if report.has_errors() {
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
// Create and start server
|
// Create and start server
|
||||||
let mut db = RustDb::new(options).await?;
|
let mut db = RustDb::new(options).await?;
|
||||||
db.start().await?;
|
db.start().await?;
|
||||||
|
|||||||
@@ -0,0 +1,256 @@
|
|||||||
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||||
|
import * as smartdb from '../ts/index.js';
|
||||||
|
import { MongoClient, Db } from 'mongodb';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import * as os from 'os';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
let tmpDir: string;
|
||||||
|
let server: smartdb.SmartdbServer;
|
||||||
|
let client: MongoClient;
|
||||||
|
let db: Db;
|
||||||
|
|
||||||
|
function makeTmpDir(): string {
|
||||||
|
return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-compact-test-'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanTmpDir(dir: string): void {
|
||||||
|
if (fs.existsSync(dir)) {
|
||||||
|
fs.rmSync(dir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getDataFileSize(storagePath: string, dbName: string, collName: string): number {
|
||||||
|
const dataPath = path.join(storagePath, dbName, collName, 'data.rdb');
|
||||||
|
if (!fs.existsSync(dataPath)) return 0;
|
||||||
|
return fs.statSync(dataPath).size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Compaction: Setup
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('compaction: start server with file storage', async () => {
|
||||||
|
tmpDir = makeTmpDir();
|
||||||
|
server = new smartdb.SmartdbServer({
|
||||||
|
socketPath: path.join(os.tmpdir(), `smartdb-compact-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
client = new MongoClient(server.getConnectionUri(), {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
db = client.db('compactdb');
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Compaction: Updates grow the data file
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('compaction: repeated updates grow the data file', async () => {
|
||||||
|
const coll = db.collection('growing');
|
||||||
|
|
||||||
|
// Insert a document
|
||||||
|
await coll.insertOne({ key: 'target', counter: 0, payload: 'x'.repeat(200) });
|
||||||
|
|
||||||
|
const sizeAfterInsert = getDataFileSize(tmpDir, 'compactdb', 'growing');
|
||||||
|
expect(sizeAfterInsert).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Update the same document 50 times — each update appends a new record
|
||||||
|
for (let i = 1; i <= 50; i++) {
|
||||||
|
await coll.updateOne(
|
||||||
|
{ key: 'target' },
|
||||||
|
{ $set: { counter: i, payload: 'y'.repeat(200) } }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const sizeAfterUpdates = getDataFileSize(tmpDir, 'compactdb', 'growing');
|
||||||
|
// Compaction may have run during updates, so we can't assert the file is
|
||||||
|
// much larger. What matters is the data is correct.
|
||||||
|
|
||||||
|
// The collection still has just 1 document
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(1);
|
||||||
|
|
||||||
|
const doc = await coll.findOne({ key: 'target' });
|
||||||
|
expect(doc!.counter).toEqual(50);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Compaction: Deletes create tombstones
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('compaction: insert-then-delete creates dead space', async () => {
|
||||||
|
const coll = db.collection('tombstones');
|
||||||
|
|
||||||
|
// Insert 100 documents
|
||||||
|
const docs = [];
|
||||||
|
for (let i = 0; i < 100; i++) {
|
||||||
|
docs.push({ idx: i, data: 'delete-me-' + 'z'.repeat(100) });
|
||||||
|
}
|
||||||
|
await coll.insertMany(docs);
|
||||||
|
|
||||||
|
const sizeAfterInsert = getDataFileSize(tmpDir, 'compactdb', 'tombstones');
|
||||||
|
|
||||||
|
// Delete all 100
|
||||||
|
await coll.deleteMany({});
|
||||||
|
|
||||||
|
const sizeAfterDelete = getDataFileSize(tmpDir, 'compactdb', 'tombstones');
|
||||||
|
// File may have been compacted during deletes (dead > 50% threshold),
|
||||||
|
// but the operation itself should succeed regardless of file size.
|
||||||
|
// After deleting all docs, the file might be very small (just header + compacted).
|
||||||
|
|
||||||
|
// But count is 0
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Compaction: Data integrity after compaction trigger
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('compaction: data file shrinks after heavy updates trigger compaction', async () => {
|
||||||
|
const coll = db.collection('shrinktest');
|
||||||
|
|
||||||
|
// Insert 10 documents with large payloads
|
||||||
|
const docs = [];
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
docs.push({ idx: i, data: 'a'.repeat(500) });
|
||||||
|
}
|
||||||
|
await coll.insertMany(docs);
|
||||||
|
|
||||||
|
const sizeAfterInsert = getDataFileSize(tmpDir, 'compactdb', 'shrinktest');
|
||||||
|
|
||||||
|
// Update each document 20 times (creates 200 dead records vs 10 live)
|
||||||
|
// This should trigger compaction (dead > 50% threshold)
|
||||||
|
for (let round = 0; round < 20; round++) {
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
await coll.updateOne(
|
||||||
|
{ idx: i },
|
||||||
|
{ $set: { data: `round-${round}-` + 'b'.repeat(500) } }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// After compaction, file should be smaller than the pre-compaction peak
|
||||||
|
// (We can't measure the peak exactly, but the final size should be reasonable)
|
||||||
|
const sizeAfterCompaction = getDataFileSize(tmpDir, 'compactdb', 'shrinktest');
|
||||||
|
|
||||||
|
// The file should not be 20x the insert size since compaction should have run
|
||||||
|
// With 10 live records of ~530 bytes each, the file should be roughly that
|
||||||
|
// plus header overhead. Without compaction it would be 210 * ~530 bytes.
|
||||||
|
const maxExpectedSize = sizeAfterInsert * 5; // generous upper bound
|
||||||
|
expect(sizeAfterCompaction).toBeLessThanOrEqual(maxExpectedSize);
|
||||||
|
|
||||||
|
// All documents should still be readable and correct
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(10);
|
||||||
|
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
const doc = await coll.findOne({ idx: i });
|
||||||
|
expect(doc).toBeTruthy();
|
||||||
|
expect(doc!.data.startsWith('round-19-')).toBeTrue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Compaction: Persistence after compaction + restart
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('compaction: data survives compaction + restart', async () => {
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
|
||||||
|
server = new smartdb.SmartdbServer({
|
||||||
|
socketPath: path.join(os.tmpdir(), `smartdb-compact-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
client = new MongoClient(server.getConnectionUri(), {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
db = client.db('compactdb');
|
||||||
|
|
||||||
|
// Verify shrinktest data
|
||||||
|
const coll = db.collection('shrinktest');
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(10);
|
||||||
|
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
const doc = await coll.findOne({ idx: i });
|
||||||
|
expect(doc).toBeTruthy();
|
||||||
|
expect(doc!.data.startsWith('round-19-')).toBeTrue();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify growing collection
|
||||||
|
const growing = db.collection('growing');
|
||||||
|
const growDoc = await growing.findOne({ key: 'target' });
|
||||||
|
expect(growDoc).toBeTruthy();
|
||||||
|
expect(growDoc!.counter).toEqual(50);
|
||||||
|
|
||||||
|
// Verify tombstones collection is empty
|
||||||
|
const tombCount = await db.collection('tombstones').countDocuments();
|
||||||
|
expect(tombCount).toEqual(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Compaction: Mixed operations stress test
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('compaction: mixed insert-update-delete stress test', async () => {
|
||||||
|
const coll = db.collection('stress');
|
||||||
|
|
||||||
|
// Phase 1: Insert 200 documents
|
||||||
|
const batch = [];
|
||||||
|
for (let i = 0; i < 200; i++) {
|
||||||
|
batch.push({ idx: i, value: `initial-${i}`, alive: true });
|
||||||
|
}
|
||||||
|
await coll.insertMany(batch);
|
||||||
|
|
||||||
|
// Phase 2: Update every even-indexed document
|
||||||
|
for (let i = 0; i < 200; i += 2) {
|
||||||
|
await coll.updateOne({ idx: i }, { $set: { value: `updated-${i}` } });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 3: Delete every document where idx % 3 === 0
|
||||||
|
await coll.deleteMany({ idx: { $in: Array.from({ length: 67 }, (_, k) => k * 3) } });
|
||||||
|
|
||||||
|
// Verify: documents where idx % 3 !== 0 should remain
|
||||||
|
const remaining = await coll.find({}).toArray();
|
||||||
|
for (const doc of remaining) {
|
||||||
|
expect(doc.idx % 3).not.toEqual(0);
|
||||||
|
if (doc.idx % 2 === 0) {
|
||||||
|
expect(doc.value).toEqual(`updated-${doc.idx}`);
|
||||||
|
} else {
|
||||||
|
expect(doc.value).toEqual(`initial-${doc.idx}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count should be 200 - 67 = 133
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(133);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Cleanup
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('compaction: cleanup', async () => {
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
cleanTmpDir(tmpDir);
|
||||||
|
});
|
||||||
|
|
||||||
|
export default tap.start();
|
||||||
@@ -0,0 +1,394 @@
|
|||||||
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||||
|
import * as smartdb from '../ts/index.js';
|
||||||
|
import { MongoClient, Db } from 'mongodb';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import * as os from 'os';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
let tmpDir: string;
|
||||||
|
let server: smartdb.SmartdbServer;
|
||||||
|
let client: MongoClient;
|
||||||
|
let db: Db;
|
||||||
|
|
||||||
|
function makeTmpDir(): string {
|
||||||
|
return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-test-'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanTmpDir(dir: string): void {
|
||||||
|
if (fs.existsSync(dir)) {
|
||||||
|
fs.rmSync(dir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Startup
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: should start server with file storage', async () => {
|
||||||
|
tmpDir = makeTmpDir();
|
||||||
|
server = new smartdb.SmartdbServer({
|
||||||
|
port: 27118,
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
expect(server.running).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: should connect MongoClient', async () => {
|
||||||
|
client = new MongoClient('mongodb://127.0.0.1:27118', {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
db = client.db('filetest');
|
||||||
|
expect(db).toBeTruthy();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Data files are created on disk
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: inserting creates data files on disk', async () => {
|
||||||
|
const coll = db.collection('diskcheck');
|
||||||
|
await coll.insertOne({ name: 'disk-test', value: 42 });
|
||||||
|
|
||||||
|
// The storage directory should now contain a database directory
|
||||||
|
const dbDir = path.join(tmpDir, 'filetest');
|
||||||
|
expect(fs.existsSync(dbDir)).toBeTrue();
|
||||||
|
|
||||||
|
// Collection directory with data.rdb should exist
|
||||||
|
const collDir = path.join(dbDir, 'diskcheck');
|
||||||
|
expect(fs.existsSync(collDir)).toBeTrue();
|
||||||
|
|
||||||
|
const dataFile = path.join(collDir, 'data.rdb');
|
||||||
|
expect(fs.existsSync(dataFile)).toBeTrue();
|
||||||
|
|
||||||
|
// data.rdb should have the SMARTDB magic header
|
||||||
|
const header = Buffer.alloc(8);
|
||||||
|
const fd = fs.openSync(dataFile, 'r');
|
||||||
|
fs.readSync(fd, header, 0, 8, 0);
|
||||||
|
fs.closeSync(fd);
|
||||||
|
expect(header.toString('ascii')).toEqual('SMARTDB\0');
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Full CRUD cycle
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: insertOne returns valid id', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
const result = await coll.insertOne({ name: 'Alice', age: 30 });
|
||||||
|
expect(result.acknowledged).toBeTrue();
|
||||||
|
expect(result.insertedId).toBeTruthy();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: insertMany returns all ids', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
const result = await coll.insertMany([
|
||||||
|
{ name: 'Bob', age: 25 },
|
||||||
|
{ name: 'Charlie', age: 35 },
|
||||||
|
{ name: 'Diana', age: 28 },
|
||||||
|
{ name: 'Eve', age: 32 },
|
||||||
|
]);
|
||||||
|
expect(result.insertedCount).toEqual(4);
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: findOne retrieves correct document', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
const doc = await coll.findOne({ name: 'Alice' });
|
||||||
|
expect(doc).toBeTruthy();
|
||||||
|
expect(doc!.name).toEqual('Alice');
|
||||||
|
expect(doc!.age).toEqual(30);
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: find with filter returns correct subset', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
const docs = await coll.find({ age: { $gte: 30 } }).toArray();
|
||||||
|
expect(docs.length).toEqual(3); // Alice(30), Charlie(35), Eve(32)
|
||||||
|
expect(docs.every(d => d.age >= 30)).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: updateOne modifies document', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
const result = await coll.updateOne(
|
||||||
|
{ name: 'Alice' },
|
||||||
|
{ $set: { age: 31, updated: true } }
|
||||||
|
);
|
||||||
|
expect(result.modifiedCount).toEqual(1);
|
||||||
|
|
||||||
|
const doc = await coll.findOne({ name: 'Alice' });
|
||||||
|
expect(doc!.age).toEqual(31);
|
||||||
|
expect(doc!.updated).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: deleteOne removes document', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
const result = await coll.deleteOne({ name: 'Eve' });
|
||||||
|
expect(result.deletedCount).toEqual(1);
|
||||||
|
|
||||||
|
const doc = await coll.findOne({ name: 'Eve' });
|
||||||
|
expect(doc).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: count reflects current state', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(4); // 5 inserted - 1 deleted = 4
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Persistence across server restart
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: stop server for restart test', async () => {
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
expect(server.running).toBeFalse();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: restart server with same data path', async () => {
|
||||||
|
server = new smartdb.SmartdbServer({
|
||||||
|
port: 27118,
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
expect(server.running).toBeTrue();
|
||||||
|
|
||||||
|
client = new MongoClient('mongodb://127.0.0.1:27118', {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
db = client.db('filetest');
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: data persists after restart', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
|
||||||
|
// Alice should still be there with updated age
|
||||||
|
const alice = await coll.findOne({ name: 'Alice' });
|
||||||
|
expect(alice).toBeTruthy();
|
||||||
|
expect(alice!.age).toEqual(31);
|
||||||
|
expect(alice!.updated).toBeTrue();
|
||||||
|
|
||||||
|
// Bob, Charlie, Diana should be there
|
||||||
|
const bob = await coll.findOne({ name: 'Bob' });
|
||||||
|
expect(bob).toBeTruthy();
|
||||||
|
expect(bob!.age).toEqual(25);
|
||||||
|
|
||||||
|
const charlie = await coll.findOne({ name: 'Charlie' });
|
||||||
|
expect(charlie).toBeTruthy();
|
||||||
|
|
||||||
|
const diana = await coll.findOne({ name: 'Diana' });
|
||||||
|
expect(diana).toBeTruthy();
|
||||||
|
|
||||||
|
// Eve should still be deleted
|
||||||
|
const eve = await coll.findOne({ name: 'Eve' });
|
||||||
|
expect(eve).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: count is correct after restart', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(4);
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: can write new data after restart', async () => {
|
||||||
|
const coll = db.collection('crud');
|
||||||
|
const result = await coll.insertOne({ name: 'Frank', age: 45 });
|
||||||
|
expect(result.acknowledged).toBeTrue();
|
||||||
|
|
||||||
|
const doc = await coll.findOne({ name: 'Frank' });
|
||||||
|
expect(doc).toBeTruthy();
|
||||||
|
expect(doc!.age).toEqual(45);
|
||||||
|
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(5);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Multiple collections in same database
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: multiple collections are independent', async () => {
|
||||||
|
const products = db.collection('products');
|
||||||
|
const orders = db.collection('orders');
|
||||||
|
|
||||||
|
await products.insertMany([
|
||||||
|
{ sku: 'A001', name: 'Widget', price: 9.99 },
|
||||||
|
{ sku: 'A002', name: 'Gadget', price: 19.99 },
|
||||||
|
]);
|
||||||
|
|
||||||
|
await orders.insertMany([
|
||||||
|
{ orderId: 1, sku: 'A001', qty: 3 },
|
||||||
|
{ orderId: 2, sku: 'A002', qty: 1 },
|
||||||
|
{ orderId: 3, sku: 'A001', qty: 2 },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const productCount = await products.countDocuments();
|
||||||
|
const orderCount = await orders.countDocuments();
|
||||||
|
expect(productCount).toEqual(2);
|
||||||
|
expect(orderCount).toEqual(3);
|
||||||
|
|
||||||
|
// Deleting from one collection doesn't affect the other
|
||||||
|
await products.deleteOne({ sku: 'A001' });
|
||||||
|
expect(await products.countDocuments()).toEqual(1);
|
||||||
|
expect(await orders.countDocuments()).toEqual(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Multiple databases
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: multiple databases are independent', async () => {
|
||||||
|
const db2 = client.db('filetest2');
|
||||||
|
const coll2 = db2.collection('items');
|
||||||
|
|
||||||
|
await coll2.insertOne({ name: 'cross-db-test', source: 'db2' });
|
||||||
|
|
||||||
|
// db2 has 1 doc
|
||||||
|
const count2 = await coll2.countDocuments();
|
||||||
|
expect(count2).toEqual(1);
|
||||||
|
|
||||||
|
// original db is unaffected
|
||||||
|
const crudCount = await db.collection('crud').countDocuments();
|
||||||
|
expect(crudCount).toEqual(5);
|
||||||
|
|
||||||
|
await db2.dropDatabase();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Large batch insert and retrieval
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: bulk insert 1000 documents', async () => {
|
||||||
|
const coll = db.collection('bulk');
|
||||||
|
const docs = [];
|
||||||
|
for (let i = 0; i < 1000; i++) {
|
||||||
|
docs.push({ index: i, data: `value-${i}`, timestamp: Date.now() });
|
||||||
|
}
|
||||||
|
const result = await coll.insertMany(docs);
|
||||||
|
expect(result.insertedCount).toEqual(1000);
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: find all 1000 documents', async () => {
|
||||||
|
const coll = db.collection('bulk');
|
||||||
|
const docs = await coll.find({}).toArray();
|
||||||
|
expect(docs.length).toEqual(1000);
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: range query on 1000 documents', async () => {
|
||||||
|
const coll = db.collection('bulk');
|
||||||
|
const docs = await coll.find({ index: { $gte: 500, $lt: 600 } }).toArray();
|
||||||
|
expect(docs.length).toEqual(100);
|
||||||
|
expect(docs.every(d => d.index >= 500 && d.index < 600)).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: sorted retrieval with limit', async () => {
|
||||||
|
const coll = db.collection('bulk');
|
||||||
|
const docs = await coll.find({}).sort({ index: -1 }).limit(10).toArray();
|
||||||
|
expect(docs.length).toEqual(10);
|
||||||
|
expect(docs[0].index).toEqual(999);
|
||||||
|
expect(docs[9].index).toEqual(990);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Update many and verify persistence
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: updateMany on bulk collection', async () => {
|
||||||
|
const coll = db.collection('bulk');
|
||||||
|
const result = await coll.updateMany(
|
||||||
|
{ index: { $lt: 100 } },
|
||||||
|
{ $set: { batch: 'first-hundred' } }
|
||||||
|
);
|
||||||
|
expect(result.modifiedCount).toEqual(100);
|
||||||
|
|
||||||
|
const updated = await coll.find({ batch: 'first-hundred' }).toArray();
|
||||||
|
expect(updated.length).toEqual(100);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Delete many and verify
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: deleteMany removes correct documents', async () => {
|
||||||
|
const coll = db.collection('bulk');
|
||||||
|
const result = await coll.deleteMany({ index: { $gte: 900 } });
|
||||||
|
expect(result.deletedCount).toEqual(100);
|
||||||
|
|
||||||
|
const remaining = await coll.countDocuments();
|
||||||
|
expect(remaining).toEqual(900);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Persistence of bulk data across restart
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: stop server for bulk restart test', async () => {
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
expect(server.running).toBeFalse();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('file-storage: restart and verify bulk data', async () => {
|
||||||
|
server = new smartdb.SmartdbServer({
|
||||||
|
port: 27118,
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
client = new MongoClient('mongodb://127.0.0.1:27118', {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
db = client.db('filetest');
|
||||||
|
|
||||||
|
const coll = db.collection('bulk');
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(900);
|
||||||
|
|
||||||
|
// Verify the updateMany persisted
|
||||||
|
const firstHundred = await coll.find({ batch: 'first-hundred' }).toArray();
|
||||||
|
expect(firstHundred.length).toEqual(100);
|
||||||
|
|
||||||
|
// Verify deleted docs are gone
|
||||||
|
const over900 = await coll.find({ index: { $gte: 900 } }).toArray();
|
||||||
|
expect(over900.length).toEqual(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// File Storage: Index persistence
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: default indexes.json exists on disk', async () => {
|
||||||
|
// The indexes.json is created when the collection is first created,
|
||||||
|
// containing the default _id_ index spec.
|
||||||
|
const indexFile = path.join(tmpDir, 'filetest', 'crud', 'indexes.json');
|
||||||
|
expect(fs.existsSync(indexFile)).toBeTrue();
|
||||||
|
|
||||||
|
const indexData = JSON.parse(fs.readFileSync(indexFile, 'utf-8'));
|
||||||
|
const names = indexData.map((i: any) => i.name);
|
||||||
|
expect(names).toContain('_id_');
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Cleanup
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('file-storage: cleanup', async () => {
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
expect(server.running).toBeFalse();
|
||||||
|
cleanTmpDir(tmpDir);
|
||||||
|
});
|
||||||
|
|
||||||
|
export default tap.start();
|
||||||
@@ -0,0 +1,235 @@
|
|||||||
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||||
|
import * as smartdb from '../ts/index.js';
|
||||||
|
import { MongoClient, Db } from 'mongodb';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import * as os from 'os';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
let tmpDir: string;
|
||||||
|
let localDb: smartdb.LocalSmartDb;
|
||||||
|
let client: MongoClient;
|
||||||
|
let db: Db;
|
||||||
|
|
||||||
|
function makeTmpDir(): string {
|
||||||
|
return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-local-test-'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanTmpDir(dir: string): void {
|
||||||
|
if (fs.existsSync(dir)) {
|
||||||
|
fs.rmSync(dir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// LocalSmartDb: Lifecycle
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('localsmartdb: should start with just a folder path', async () => {
|
||||||
|
tmpDir = makeTmpDir();
|
||||||
|
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
|
||||||
|
const info = await localDb.start();
|
||||||
|
|
||||||
|
expect(localDb.running).toBeTrue();
|
||||||
|
expect(info.socketPath).toBeTruthy();
|
||||||
|
expect(info.connectionUri).toBeTruthy();
|
||||||
|
expect(info.connectionUri.startsWith('mongodb://')).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('localsmartdb: should connect via returned connectionUri', async () => {
|
||||||
|
const info = localDb.getConnectionInfo();
|
||||||
|
client = new MongoClient(info.connectionUri, {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
db = client.db('localtest');
|
||||||
|
expect(db).toBeTruthy();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('localsmartdb: should reject double start', async () => {
|
||||||
|
let threw = false;
|
||||||
|
try {
|
||||||
|
await localDb.start();
|
||||||
|
} catch {
|
||||||
|
threw = true;
|
||||||
|
}
|
||||||
|
expect(threw).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// LocalSmartDb: CRUD via Unix socket
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('localsmartdb: insert and find documents', async () => {
|
||||||
|
const coll = db.collection('notes');
|
||||||
|
await coll.insertMany([
|
||||||
|
{ title: 'Note 1', body: 'First note', priority: 1 },
|
||||||
|
{ title: 'Note 2', body: 'Second note', priority: 2 },
|
||||||
|
{ title: 'Note 3', body: 'Third note', priority: 3 },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const all = await coll.find({}).toArray();
|
||||||
|
expect(all.length).toEqual(3);
|
||||||
|
|
||||||
|
const high = await coll.findOne({ priority: 3 });
|
||||||
|
expect(high).toBeTruthy();
|
||||||
|
expect(high!.title).toEqual('Note 3');
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('localsmartdb: update and verify', async () => {
|
||||||
|
const coll = db.collection('notes');
|
||||||
|
await coll.updateOne(
|
||||||
|
{ title: 'Note 2' },
|
||||||
|
{ $set: { body: 'Updated second note', edited: true } }
|
||||||
|
);
|
||||||
|
|
||||||
|
const doc = await coll.findOne({ title: 'Note 2' });
|
||||||
|
expect(doc!.body).toEqual('Updated second note');
|
||||||
|
expect(doc!.edited).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('localsmartdb: delete and verify', async () => {
|
||||||
|
const coll = db.collection('notes');
|
||||||
|
await coll.deleteOne({ title: 'Note 1' });
|
||||||
|
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(2);
|
||||||
|
|
||||||
|
const deleted = await coll.findOne({ title: 'Note 1' });
|
||||||
|
expect(deleted).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// LocalSmartDb: Persistence across restart
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('localsmartdb: stop for restart', async () => {
|
||||||
|
await client.close();
|
||||||
|
await localDb.stop();
|
||||||
|
expect(localDb.running).toBeFalse();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('localsmartdb: restart with same folder', async () => {
|
||||||
|
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
|
||||||
|
const info = await localDb.start();
|
||||||
|
expect(localDb.running).toBeTrue();
|
||||||
|
|
||||||
|
client = new MongoClient(info.connectionUri, {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
db = client.db('localtest');
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('localsmartdb: data persists after restart', async () => {
|
||||||
|
const coll = db.collection('notes');
|
||||||
|
|
||||||
|
const count = await coll.countDocuments();
|
||||||
|
expect(count).toEqual(2); // 3 inserted - 1 deleted
|
||||||
|
|
||||||
|
const note2 = await coll.findOne({ title: 'Note 2' });
|
||||||
|
expect(note2!.body).toEqual('Updated second note');
|
||||||
|
expect(note2!.edited).toBeTrue();
|
||||||
|
|
||||||
|
const note3 = await coll.findOne({ title: 'Note 3' });
|
||||||
|
expect(note3!.priority).toEqual(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// LocalSmartDb: Custom socket path
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('localsmartdb: works with custom socket path', async () => {
|
||||||
|
await client.close();
|
||||||
|
await localDb.stop();
|
||||||
|
|
||||||
|
const customSocket = path.join(os.tmpdir(), `smartdb-custom-${Date.now()}.sock`);
|
||||||
|
const tmpDir2 = makeTmpDir();
|
||||||
|
const localDb2 = new smartdb.LocalSmartDb({
|
||||||
|
folderPath: tmpDir2,
|
||||||
|
socketPath: customSocket,
|
||||||
|
});
|
||||||
|
|
||||||
|
const info = await localDb2.start();
|
||||||
|
expect(info.socketPath).toEqual(customSocket);
|
||||||
|
|
||||||
|
const client2 = new MongoClient(info.connectionUri, {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client2.connect();
|
||||||
|
const testDb = client2.db('customsock');
|
||||||
|
await testDb.collection('test').insertOne({ x: 1 });
|
||||||
|
const doc = await testDb.collection('test').findOne({ x: 1 });
|
||||||
|
expect(doc).toBeTruthy();
|
||||||
|
|
||||||
|
await client2.close();
|
||||||
|
await localDb2.stop();
|
||||||
|
cleanTmpDir(tmpDir2);
|
||||||
|
|
||||||
|
// Reconnect original for remaining tests
|
||||||
|
localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
|
||||||
|
const origInfo = await localDb.start();
|
||||||
|
client = new MongoClient(origInfo.connectionUri, {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
db = client.db('localtest');
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// LocalSmartDb: getConnectionUri and getServer helpers
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('localsmartdb: getConnectionUri returns valid uri', async () => {
|
||||||
|
const uri = localDb.getConnectionUri();
|
||||||
|
expect(uri.startsWith('mongodb://')).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('localsmartdb: getServer returns the SmartdbServer', async () => {
|
||||||
|
const srv = localDb.getServer();
|
||||||
|
expect(srv).toBeTruthy();
|
||||||
|
expect(srv.running).toBeTrue();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// LocalSmartDb: Data isolation between databases
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('localsmartdb: databases are isolated', async () => {
|
||||||
|
const dbA = client.db('isoA');
|
||||||
|
const dbB = client.db('isoB');
|
||||||
|
|
||||||
|
await dbA.collection('shared').insertOne({ source: 'A', val: 1 });
|
||||||
|
await dbB.collection('shared').insertOne({ source: 'B', val: 2 });
|
||||||
|
|
||||||
|
const docsA = await dbA.collection('shared').find({}).toArray();
|
||||||
|
const docsB = await dbB.collection('shared').find({}).toArray();
|
||||||
|
|
||||||
|
expect(docsA.length).toEqual(1);
|
||||||
|
expect(docsA[0].source).toEqual('A');
|
||||||
|
expect(docsB.length).toEqual(1);
|
||||||
|
expect(docsB[0].source).toEqual('B');
|
||||||
|
|
||||||
|
await dbA.dropDatabase();
|
||||||
|
await dbB.dropDatabase();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Cleanup
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('localsmartdb: cleanup', async () => {
|
||||||
|
await client.close();
|
||||||
|
await localDb.stop();
|
||||||
|
expect(localDb.running).toBeFalse();
|
||||||
|
cleanTmpDir(tmpDir);
|
||||||
|
});
|
||||||
|
|
||||||
|
export default tap.start();
|
||||||
@@ -0,0 +1,269 @@
|
|||||||
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||||
|
import * as smartdb from '../ts/index.js';
|
||||||
|
import { MongoClient, Db } from 'mongodb';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import * as os from 'os';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
let tmpDir: string;
|
||||||
|
|
||||||
|
function makeTmpDir(): string {
|
||||||
|
return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-migration-test-'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanTmpDir(dir: string): void {
|
||||||
|
if (fs.existsSync(dir)) {
|
||||||
|
fs.rmSync(dir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a v0 (legacy JSON) storage layout:
|
||||||
|
* {base}/{db}/{coll}.json
|
||||||
|
* {base}/{db}/{coll}.indexes.json
|
||||||
|
*/
|
||||||
|
function createV0Layout(basePath: string, dbName: string, collName: string, docs: any[]): void {
|
||||||
|
const dbDir = path.join(basePath, dbName);
|
||||||
|
fs.mkdirSync(dbDir, { recursive: true });
|
||||||
|
|
||||||
|
// Convert docs to the extended JSON format that the old Rust engine wrote:
|
||||||
|
// ObjectId is stored as { "$oid": "hex" }
|
||||||
|
const jsonDocs = docs.map(doc => {
|
||||||
|
const clone = { ...doc };
|
||||||
|
if (!clone._id) {
|
||||||
|
// Generate a fake ObjectId-like hex string
|
||||||
|
const hex = [...Array(24)].map(() => Math.floor(Math.random() * 16).toString(16)).join('');
|
||||||
|
clone._id = { '$oid': hex };
|
||||||
|
}
|
||||||
|
return clone;
|
||||||
|
});
|
||||||
|
|
||||||
|
const collPath = path.join(dbDir, `${collName}.json`);
|
||||||
|
fs.writeFileSync(collPath, JSON.stringify(jsonDocs, null, 2));
|
||||||
|
|
||||||
|
const indexPath = path.join(dbDir, `${collName}.indexes.json`);
|
||||||
|
fs.writeFileSync(indexPath, JSON.stringify([
|
||||||
|
{ name: '_id_', key: { _id: 1 } },
|
||||||
|
], null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Migration: v0 → v1 basic
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('migration: detects v0 format and migrates on startup', async () => {
|
||||||
|
tmpDir = makeTmpDir();
|
||||||
|
|
||||||
|
// Create v0 layout with test data
|
||||||
|
createV0Layout(tmpDir, 'mydb', 'users', [
|
||||||
|
{ name: 'Alice', age: 30, email: 'alice@test.com' },
|
||||||
|
{ name: 'Bob', age: 25, email: 'bob@test.com' },
|
||||||
|
{ name: 'Charlie', age: 35, email: 'charlie@test.com' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
createV0Layout(tmpDir, 'mydb', 'products', [
|
||||||
|
{ sku: 'W001', name: 'Widget', price: 9.99 },
|
||||||
|
{ sku: 'G001', name: 'Gadget', price: 19.99 },
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Verify v0 files exist
|
||||||
|
expect(fs.existsSync(path.join(tmpDir, 'mydb', 'users.json'))).toBeTrue();
|
||||||
|
expect(fs.existsSync(path.join(tmpDir, 'mydb', 'products.json'))).toBeTrue();
|
||||||
|
|
||||||
|
// Start server — migration should run automatically
|
||||||
|
const server = new smartdb.SmartdbServer({
|
||||||
|
socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
// v1 directories should now exist
|
||||||
|
expect(fs.existsSync(path.join(tmpDir, 'mydb', 'users', 'data.rdb'))).toBeTrue();
|
||||||
|
expect(fs.existsSync(path.join(tmpDir, 'mydb', 'products', 'data.rdb'))).toBeTrue();
|
||||||
|
|
||||||
|
// v0 files should still exist (not deleted)
|
||||||
|
expect(fs.existsSync(path.join(tmpDir, 'mydb', 'users.json'))).toBeTrue();
|
||||||
|
expect(fs.existsSync(path.join(tmpDir, 'mydb', 'products.json'))).toBeTrue();
|
||||||
|
|
||||||
|
// Connect and verify data is accessible
|
||||||
|
const client = new MongoClient(server.getConnectionUri(), {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
const db = client.db('mydb');
|
||||||
|
|
||||||
|
// Users collection
|
||||||
|
const users = await db.collection('users').find({}).toArray();
|
||||||
|
expect(users.length).toEqual(3);
|
||||||
|
const alice = users.find(u => u.name === 'Alice');
|
||||||
|
expect(alice).toBeTruthy();
|
||||||
|
expect(alice!.age).toEqual(30);
|
||||||
|
expect(alice!.email).toEqual('alice@test.com');
|
||||||
|
|
||||||
|
// Products collection
|
||||||
|
const products = await db.collection('products').find({}).toArray();
|
||||||
|
expect(products.length).toEqual(2);
|
||||||
|
const widget = products.find(p => p.sku === 'W001');
|
||||||
|
expect(widget).toBeTruthy();
|
||||||
|
expect(widget!.price).toEqual(9.99);
|
||||||
|
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Migration: migrated data survives another restart
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('migration: migrated data persists across restart', async () => {
|
||||||
|
const server = new smartdb.SmartdbServer({
|
||||||
|
socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
const client = new MongoClient(server.getConnectionUri(), {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
const db = client.db('mydb');
|
||||||
|
|
||||||
|
const users = await db.collection('users').find({}).toArray();
|
||||||
|
expect(users.length).toEqual(3);
|
||||||
|
|
||||||
|
const products = await db.collection('products').find({}).toArray();
|
||||||
|
expect(products.length).toEqual(2);
|
||||||
|
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Migration: can write new data after migration
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('migration: new writes work after migration', async () => {
|
||||||
|
const server = new smartdb.SmartdbServer({
|
||||||
|
socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
const client = new MongoClient(server.getConnectionUri(), {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
const db = client.db('mydb');
|
||||||
|
|
||||||
|
// Insert new documents
|
||||||
|
await db.collection('users').insertOne({ name: 'Diana', age: 28 });
|
||||||
|
const count = await db.collection('users').countDocuments();
|
||||||
|
expect(count).toEqual(4);
|
||||||
|
|
||||||
|
// Update existing migrated document
|
||||||
|
await db.collection('users').updateOne(
|
||||||
|
{ name: 'Alice' },
|
||||||
|
{ $set: { age: 31 } }
|
||||||
|
);
|
||||||
|
const alice = await db.collection('users').findOne({ name: 'Alice' });
|
||||||
|
expect(alice!.age).toEqual(31);
|
||||||
|
|
||||||
|
// Delete a migrated document
|
||||||
|
await db.collection('products').deleteOne({ sku: 'G001' });
|
||||||
|
const prodCount = await db.collection('products').countDocuments();
|
||||||
|
expect(prodCount).toEqual(1);
|
||||||
|
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
cleanTmpDir(tmpDir);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Migration: skips already-migrated data
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('migration: no-op for v1 format', async () => {
|
||||||
|
tmpDir = makeTmpDir();
|
||||||
|
|
||||||
|
// Start fresh to create v1 layout
|
||||||
|
const server = new smartdb.SmartdbServer({
|
||||||
|
socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
const client = new MongoClient(server.getConnectionUri(), {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
const db = client.db('v1test');
|
||||||
|
await db.collection('items').insertOne({ x: 1 });
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
|
||||||
|
// Restart — migration should detect v1 and skip
|
||||||
|
const server2 = new smartdb.SmartdbServer({
|
||||||
|
socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server2.start();
|
||||||
|
|
||||||
|
const client2 = new MongoClient(server2.getConnectionUri(), {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client2.connect();
|
||||||
|
const db2 = client2.db('v1test');
|
||||||
|
const doc = await db2.collection('items').findOne({ x: 1 });
|
||||||
|
expect(doc).toBeTruthy();
|
||||||
|
|
||||||
|
await client2.close();
|
||||||
|
await server2.stop();
|
||||||
|
cleanTmpDir(tmpDir);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Migration: empty storage is handled gracefully
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
tap.test('migration: empty storage directory works', async () => {
|
||||||
|
tmpDir = makeTmpDir();
|
||||||
|
|
||||||
|
const server = new smartdb.SmartdbServer({
|
||||||
|
socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
|
||||||
|
storage: 'file',
|
||||||
|
storagePath: tmpDir,
|
||||||
|
});
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
const client = new MongoClient(server.getConnectionUri(), {
|
||||||
|
directConnection: true,
|
||||||
|
serverSelectionTimeoutMS: 5000,
|
||||||
|
});
|
||||||
|
await client.connect();
|
||||||
|
|
||||||
|
// Should work fine with empty storage
|
||||||
|
const db = client.db('emptytest');
|
||||||
|
await db.collection('first').insertOne({ hello: 'world' });
|
||||||
|
const doc = await db.collection('first').findOne({ hello: 'world' });
|
||||||
|
expect(doc).toBeTruthy();
|
||||||
|
|
||||||
|
await client.close();
|
||||||
|
await server.stop();
|
||||||
|
cleanTmpDir(tmpDir);
|
||||||
|
});
|
||||||
|
|
||||||
|
export default tap.start();
|
||||||
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@push.rocks/smartdb',
|
name: '@push.rocks/smartdb',
|
||||||
version: '2.1.0',
|
version: '2.5.7',
|
||||||
description: 'A MongoDB-compatible embedded database server with wire protocol support, backed by a high-performance Rust engine.'
|
description: 'A MongoDB-compatible embedded database server with wire protocol support, backed by a high-performance Rust engine.'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ export * from './ts_smartdb/index.js';
|
|||||||
export { LocalSmartDb } from './ts_local/index.js';
|
export { LocalSmartDb } from './ts_local/index.js';
|
||||||
export type { ILocalSmartDbOptions, ILocalSmartDbConnectionInfo } from './ts_local/index.js';
|
export type { ILocalSmartDbOptions, ILocalSmartDbConnectionInfo } from './ts_local/index.js';
|
||||||
|
|
||||||
|
// Export migration
|
||||||
|
export { StorageMigrator } from './ts_migration/index.js';
|
||||||
|
|
||||||
// Export commitinfo
|
// Export commitinfo
|
||||||
export { commitinfo };
|
export { commitinfo };
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
import * as crypto from 'crypto';
|
import * as crypto from 'crypto';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
import * as net from 'net';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import * as os from 'os';
|
import * as os from 'os';
|
||||||
import { SmartdbServer } from '../ts_smartdb/index.js';
|
import { SmartdbServer } from '../ts_smartdb/index.js';
|
||||||
|
import { StorageMigrator } from '../ts_migration/index.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Connection information returned by LocalSmartDb.start()
|
* Connection information returned by LocalSmartDb.start()
|
||||||
@@ -65,6 +68,55 @@ export class LocalSmartDb {
|
|||||||
return path.join(os.tmpdir(), `smartdb-${randomId}.sock`);
|
return path.join(os.tmpdir(), `smartdb-${randomId}.sock`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a Unix socket is alive by attempting to connect.
|
||||||
|
*/
|
||||||
|
private static isSocketAlive(socketPath: string): Promise<boolean> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const client = net.createConnection({ path: socketPath }, () => {
|
||||||
|
client.destroy();
|
||||||
|
resolve(true);
|
||||||
|
});
|
||||||
|
client.on('error', () => {
|
||||||
|
resolve(false);
|
||||||
|
});
|
||||||
|
client.setTimeout(500, () => {
|
||||||
|
client.destroy();
|
||||||
|
resolve(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove stale smartdb-*.sock files from /tmp.
|
||||||
|
* A socket is considered stale if connecting to it fails.
|
||||||
|
*/
|
||||||
|
private static async cleanStaleSockets(): Promise<void> {
|
||||||
|
const tmpDir = os.tmpdir();
|
||||||
|
let entries: string[];
|
||||||
|
try {
|
||||||
|
entries = await fs.readdir(tmpDir);
|
||||||
|
} catch {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const socketFiles = entries.filter(
|
||||||
|
(f) => f.startsWith('smartdb-') && f.endsWith('.sock')
|
||||||
|
);
|
||||||
|
for (const name of socketFiles) {
|
||||||
|
const fullPath = path.join(tmpDir, name);
|
||||||
|
try {
|
||||||
|
const stat = await fs.stat(fullPath);
|
||||||
|
if (!stat.isSocket()) continue;
|
||||||
|
const alive = await LocalSmartDb.isSocketAlive(fullPath);
|
||||||
|
if (!alive) {
|
||||||
|
await fs.unlink(fullPath);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// File may have been removed already; ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start the local SmartDB server and return connection info
|
* Start the local SmartDB server and return connection info
|
||||||
*/
|
*/
|
||||||
@@ -73,6 +125,13 @@ export class LocalSmartDb {
|
|||||||
throw new Error('LocalSmartDb is already running');
|
throw new Error('LocalSmartDb is already running');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean up stale sockets from previous crashed instances
|
||||||
|
await LocalSmartDb.cleanStaleSockets();
|
||||||
|
|
||||||
|
// Run storage migration before starting the Rust engine
|
||||||
|
const migrator = new StorageMigrator(this.options.folderPath);
|
||||||
|
await migrator.run();
|
||||||
|
|
||||||
// Use provided socket path or generate one
|
// Use provided socket path or generate one
|
||||||
this.generatedSocketPath = this.options.socketPath ?? this.generateSocketPath();
|
this.generatedSocketPath = this.options.socketPath ?? this.generateSocketPath();
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,93 @@
|
|||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import { migrateV0ToV1 } from './migrators/v0_to_v1.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detected storage format version.
|
||||||
|
* - v0: Legacy JSON format ({db}/{coll}.json files)
|
||||||
|
* - v1: Bitcask binary format ({db}/{coll}/data.rdb directories)
|
||||||
|
*/
|
||||||
|
type TStorageVersion = 0 | 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* StorageMigrator — runs before the Rust engine starts.
|
||||||
|
*
|
||||||
|
* Detects the current storage format version and runs the appropriate
|
||||||
|
* migration chain. The Rust engine only knows the current format (v1).
|
||||||
|
*
|
||||||
|
* Migration is safe: original files are never modified or deleted.
|
||||||
|
* On success, a console hint is printed about which old files can be removed.
|
||||||
|
*/
|
||||||
|
export class StorageMigrator {
|
||||||
|
private storagePath: string;
|
||||||
|
|
||||||
|
constructor(storagePath: string) {
|
||||||
|
this.storagePath = storagePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run any needed migrations. Safe to call even if storage is already current.
|
||||||
|
*/
|
||||||
|
async run(): Promise<void> {
|
||||||
|
if (!fs.existsSync(this.storagePath)) {
|
||||||
|
return; // No data yet — nothing to migrate
|
||||||
|
}
|
||||||
|
|
||||||
|
const version = this.detectVersion();
|
||||||
|
|
||||||
|
if (version === 1) {
|
||||||
|
return; // Already current
|
||||||
|
}
|
||||||
|
|
||||||
|
if (version === 0) {
|
||||||
|
console.log(`[smartdb] Detected v0 (JSON) storage format at ${this.storagePath}`);
|
||||||
|
console.log(`[smartdb] Running migration v0 → v1 (Bitcask binary format)...`);
|
||||||
|
|
||||||
|
const deletableFiles = await migrateV0ToV1(this.storagePath);
|
||||||
|
|
||||||
|
if (deletableFiles.length > 0) {
|
||||||
|
console.log(`[smartdb] Migration v0 → v1 complete.`);
|
||||||
|
console.log(`[smartdb] The following old files can be safely deleted:`);
|
||||||
|
for (const f of deletableFiles) {
|
||||||
|
console.log(`[smartdb] ${f}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(`[smartdb] Migration v0 → v1 complete. No old files to clean up.`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect the storage format version by inspecting the directory structure.
|
||||||
|
*
|
||||||
|
* v0: {db}/{coll}.json files exist
|
||||||
|
* v1: {db}/{coll}/data.rdb directories exist
|
||||||
|
*/
|
||||||
|
private detectVersion(): TStorageVersion {
|
||||||
|
const entries = fs.readdirSync(this.storagePath, { withFileTypes: true });
|
||||||
|
|
||||||
|
for (const entry of entries) {
|
||||||
|
if (!entry.isDirectory()) continue;
|
||||||
|
|
||||||
|
const dbDir = path.join(this.storagePath, entry.name);
|
||||||
|
const dbEntries = fs.readdirSync(dbDir, { withFileTypes: true });
|
||||||
|
|
||||||
|
for (const dbEntry of dbEntries) {
|
||||||
|
// v1: subdirectory with data.rdb
|
||||||
|
if (dbEntry.isDirectory()) {
|
||||||
|
const dataRdb = path.join(dbDir, dbEntry.name, 'data.rdb');
|
||||||
|
if (fs.existsSync(dataRdb)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// v0: .json file (not .indexes.json)
|
||||||
|
if (dbEntry.isFile() && dbEntry.name.endsWith('.json') && !dbEntry.name.endsWith('.indexes.json')) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Empty or unrecognized — treat as v1 (fresh start)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
export { StorageMigrator } from './classes.storagemigrator.js';
|
||||||
@@ -0,0 +1,253 @@
|
|||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import * as crypto from 'crypto';
|
||||||
|
import { BSON } from 'bson';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Binary format constants (must match Rust: record.rs)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/** File-level magic: "SMARTDB\0" */
|
||||||
|
const FILE_MAGIC = Buffer.from('SMARTDB\0', 'ascii');
|
||||||
|
/** Current format version */
|
||||||
|
const FORMAT_VERSION = 1;
|
||||||
|
/** File type tags */
|
||||||
|
const FILE_TYPE_DATA = 1;
|
||||||
|
const FILE_TYPE_HINT = 3;
|
||||||
|
/** File header total size */
|
||||||
|
const FILE_HEADER_SIZE = 64;
|
||||||
|
/** Per-record magic */
|
||||||
|
const RECORD_MAGIC = 0xDB01;
|
||||||
|
/** Per-record header size */
|
||||||
|
const RECORD_HEADER_SIZE = 22; // 2 + 8 + 4 + 4 + 4
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Binary encoding helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function writeFileHeader(fileType: number): Buffer {
|
||||||
|
const buf = Buffer.alloc(FILE_HEADER_SIZE, 0);
|
||||||
|
FILE_MAGIC.copy(buf, 0);
|
||||||
|
buf.writeUInt16LE(FORMAT_VERSION, 8);
|
||||||
|
buf.writeUInt8(fileType, 10);
|
||||||
|
buf.writeUInt32LE(0, 11); // flags
|
||||||
|
const now = BigInt(Date.now());
|
||||||
|
buf.writeBigUInt64LE(now, 15);
|
||||||
|
// bytes 23..64 are reserved (zeros)
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
function encodeDataRecord(timestamp: bigint, key: Buffer, value: Buffer): Buffer {
|
||||||
|
const keyLen = key.length;
|
||||||
|
const valLen = value.length;
|
||||||
|
const totalSize = RECORD_HEADER_SIZE + keyLen + valLen;
|
||||||
|
const buf = Buffer.alloc(totalSize);
|
||||||
|
|
||||||
|
// Write header fields (without CRC)
|
||||||
|
buf.writeUInt16LE(RECORD_MAGIC, 0);
|
||||||
|
buf.writeBigUInt64LE(timestamp, 2);
|
||||||
|
buf.writeUInt32LE(keyLen, 10);
|
||||||
|
buf.writeUInt32LE(valLen, 14);
|
||||||
|
// CRC placeholder at offset 18..22 (will fill below)
|
||||||
|
key.copy(buf, RECORD_HEADER_SIZE);
|
||||||
|
value.copy(buf, RECORD_HEADER_SIZE + keyLen);
|
||||||
|
|
||||||
|
// CRC32 covers everything except the CRC field itself:
|
||||||
|
// bytes [0..18] + bytes [22..]
|
||||||
|
const crc = crc32(Buffer.concat([
|
||||||
|
buf.subarray(0, 18),
|
||||||
|
buf.subarray(22),
|
||||||
|
]));
|
||||||
|
buf.writeUInt32LE(crc, 18);
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
function encodeHintEntry(key: string, offset: bigint, recordLen: number, valueLen: number, timestamp: bigint): Buffer {
|
||||||
|
const keyBuf = Buffer.from(key, 'utf-8');
|
||||||
|
const buf = Buffer.alloc(4 + keyBuf.length + 8 + 4 + 4 + 8);
|
||||||
|
let pos = 0;
|
||||||
|
buf.writeUInt32LE(keyBuf.length, pos); pos += 4;
|
||||||
|
keyBuf.copy(buf, pos); pos += keyBuf.length;
|
||||||
|
buf.writeBigUInt64LE(offset, pos); pos += 8;
|
||||||
|
buf.writeUInt32LE(recordLen, pos); pos += 4;
|
||||||
|
buf.writeUInt32LE(valueLen, pos); pos += 4;
|
||||||
|
buf.writeBigUInt64LE(timestamp, pos);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// CRC32 (matching crc32fast in Rust)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const CRC32_TABLE = (() => {
|
||||||
|
const table = new Uint32Array(256);
|
||||||
|
for (let i = 0; i < 256; i++) {
|
||||||
|
let crc = i;
|
||||||
|
for (let j = 0; j < 8; j++) {
|
||||||
|
crc = (crc & 1) ? (0xEDB88320 ^ (crc >>> 1)) : (crc >>> 1);
|
||||||
|
}
|
||||||
|
table[i] = crc;
|
||||||
|
}
|
||||||
|
return table;
|
||||||
|
})();
|
||||||
|
|
||||||
|
function crc32(data: Buffer): number {
|
||||||
|
let crc = 0xFFFFFFFF;
|
||||||
|
for (let i = 0; i < data.length; i++) {
|
||||||
|
crc = CRC32_TABLE[(crc ^ data[i]) & 0xFF] ^ (crc >>> 8);
|
||||||
|
}
|
||||||
|
return (crc ^ 0xFFFFFFFF) >>> 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Migration: v0 (JSON) → v1 (Bitcask binary)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
interface IKeyDirEntry {
|
||||||
|
offset: bigint;
|
||||||
|
recordLen: number;
|
||||||
|
valueLen: number;
|
||||||
|
timestamp: bigint;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Migrate a storage directory from v0 (JSON-per-collection) to v1 (Bitcask binary).
|
||||||
|
*
|
||||||
|
* - Original .json files are NOT modified or deleted.
|
||||||
|
* - New v1 files are written into {db}/{coll}/ subdirectories.
|
||||||
|
* - Returns a list of old files that can be safely deleted.
|
||||||
|
* - On failure, cleans up any partial new files and throws.
|
||||||
|
*/
|
||||||
|
export async function migrateV0ToV1(storagePath: string): Promise<string[]> {
|
||||||
|
const deletableFiles: string[] = [];
|
||||||
|
const createdDirs: string[] = [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const dbEntries = fs.readdirSync(storagePath, { withFileTypes: true });
|
||||||
|
|
||||||
|
for (const dbEntry of dbEntries) {
|
||||||
|
if (!dbEntry.isDirectory()) continue;
|
||||||
|
|
||||||
|
const dbDir = path.join(storagePath, dbEntry.name);
|
||||||
|
const collFiles = fs.readdirSync(dbDir, { withFileTypes: true });
|
||||||
|
|
||||||
|
for (const collFile of collFiles) {
|
||||||
|
if (!collFile.isFile()) continue;
|
||||||
|
if (!collFile.name.endsWith('.json')) continue;
|
||||||
|
if (collFile.name.endsWith('.indexes.json')) continue;
|
||||||
|
|
||||||
|
const collName = collFile.name.replace(/\.json$/, '');
|
||||||
|
const jsonPath = path.join(dbDir, collFile.name);
|
||||||
|
const indexJsonPath = path.join(dbDir, `${collName}.indexes.json`);
|
||||||
|
|
||||||
|
// Target directory
|
||||||
|
const collDir = path.join(dbDir, collName);
|
||||||
|
if (fs.existsSync(collDir)) {
|
||||||
|
// Already migrated
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[smartdb] Migrating ${dbEntry.name}.${collName}...`);
|
||||||
|
|
||||||
|
// Read the JSON collection
|
||||||
|
const jsonData = fs.readFileSync(jsonPath, 'utf-8');
|
||||||
|
const docs: any[] = JSON.parse(jsonData);
|
||||||
|
|
||||||
|
// Create collection directory
|
||||||
|
fs.mkdirSync(collDir, { recursive: true });
|
||||||
|
createdDirs.push(collDir);
|
||||||
|
|
||||||
|
// Write data.rdb
|
||||||
|
const dataPath = path.join(collDir, 'data.rdb');
|
||||||
|
const fd = fs.openSync(dataPath, 'w');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// File header
|
||||||
|
const headerBuf = writeFileHeader(FILE_TYPE_DATA);
|
||||||
|
fs.writeSync(fd, headerBuf);
|
||||||
|
|
||||||
|
let currentOffset = BigInt(FILE_HEADER_SIZE);
|
||||||
|
const keydir: Map<string, IKeyDirEntry> = new Map();
|
||||||
|
const ts = BigInt(Date.now());
|
||||||
|
|
||||||
|
for (const doc of docs) {
|
||||||
|
// Extract _id
|
||||||
|
let idHex: string;
|
||||||
|
if (doc._id && doc._id.$oid) {
|
||||||
|
idHex = doc._id.$oid;
|
||||||
|
} else if (typeof doc._id === 'string') {
|
||||||
|
idHex = doc._id;
|
||||||
|
} else if (doc._id) {
|
||||||
|
idHex = String(doc._id);
|
||||||
|
} else {
|
||||||
|
// Generate a new ObjectId
|
||||||
|
idHex = crypto.randomBytes(12).toString('hex');
|
||||||
|
doc._id = { $oid: idHex };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize to BSON
|
||||||
|
const bsonBytes = BSON.serialize(doc);
|
||||||
|
const keyBuf = Buffer.from(idHex, 'utf-8');
|
||||||
|
const valueBuf = Buffer.from(bsonBytes);
|
||||||
|
|
||||||
|
const record = encodeDataRecord(ts, keyBuf, valueBuf);
|
||||||
|
fs.writeSync(fd, record);
|
||||||
|
|
||||||
|
keydir.set(idHex, {
|
||||||
|
offset: currentOffset,
|
||||||
|
recordLen: record.length,
|
||||||
|
valueLen: valueBuf.length,
|
||||||
|
timestamp: ts,
|
||||||
|
});
|
||||||
|
|
||||||
|
currentOffset += BigInt(record.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.fsyncSync(fd);
|
||||||
|
fs.closeSync(fd);
|
||||||
|
|
||||||
|
// Write keydir.hint
|
||||||
|
const hintPath = path.join(collDir, 'keydir.hint');
|
||||||
|
const hintFd = fs.openSync(hintPath, 'w');
|
||||||
|
fs.writeSync(hintFd, writeFileHeader(FILE_TYPE_HINT));
|
||||||
|
for (const [key, entry] of keydir) {
|
||||||
|
fs.writeSync(hintFd, encodeHintEntry(key, entry.offset, entry.recordLen, entry.valueLen, entry.timestamp));
|
||||||
|
}
|
||||||
|
fs.fsyncSync(hintFd);
|
||||||
|
fs.closeSync(hintFd);
|
||||||
|
|
||||||
|
} catch (writeErr) {
|
||||||
|
// Clean up on write failure
|
||||||
|
try { fs.closeSync(fd); } catch {}
|
||||||
|
throw writeErr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy indexes.json if it exists
|
||||||
|
if (fs.existsSync(indexJsonPath)) {
|
||||||
|
const destIndexPath = path.join(collDir, 'indexes.json');
|
||||||
|
fs.copyFileSync(indexJsonPath, destIndexPath);
|
||||||
|
deletableFiles.push(indexJsonPath);
|
||||||
|
} else {
|
||||||
|
// Write default _id index
|
||||||
|
const destIndexPath = path.join(collDir, 'indexes.json');
|
||||||
|
fs.writeFileSync(destIndexPath, JSON.stringify([{ name: '_id_', key: { _id: 1 } }], null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
deletableFiles.push(jsonPath);
|
||||||
|
|
||||||
|
console.log(`[smartdb] Migrated ${dbEntry.name}.${collName}: ${docs.length} documents`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
// Clean up any partially created directories
|
||||||
|
for (const dir of createdDirs) {
|
||||||
|
try {
|
||||||
|
fs.rmSync(dir, { recursive: true, force: true });
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
|
||||||
|
return deletableFiles;
|
||||||
|
}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import { RustDbBridge } from '../rust-db-bridge.js';
|
import { RustDbBridge } from '../rust-db-bridge.js';
|
||||||
|
import { StorageMigrator } from '../../ts_migration/index.js';
|
||||||
import type {
|
import type {
|
||||||
IOpLogEntry,
|
IOpLogEntry,
|
||||||
IOpLogResult,
|
IOpLogResult,
|
||||||
@@ -75,6 +76,12 @@ export class SmartdbServer {
|
|||||||
throw new Error('Server is already running');
|
throw new Error('Server is already running');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run storage migration for file-based storage before starting Rust engine
|
||||||
|
if (this.options.storage === 'file' && this.options.storagePath) {
|
||||||
|
const migrator = new StorageMigrator(this.options.storagePath);
|
||||||
|
await migrator.run();
|
||||||
|
}
|
||||||
|
|
||||||
const spawned = await this.bridge.spawn();
|
const spawned = await this.bridge.spawn();
|
||||||
if (!spawned) {
|
if (!spawned) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user