v2.5.2

fix(rustdb-indexes): persist created indexes and restore them on server startup
v2.5.1
2026-04-05 03:26:52 +00:00 · 2026-04-05 03:26:52 +00:00 · 2026-04-05 02:48:00 +00:00 · 2026-04-05 02:48:00 +00:00 · 2026-04-05 02:46:05 +00:00 · 2026-04-05 02:46:05 +00:00
33 changed files with 4841 additions and 427 deletions
@@ -1,5 +1,65 @@
 # Changelog

+## 2026-04-05 - 2.5.2 - fix(rustdb-indexes)
+persist created indexes and restore them on server startup
+
+- Save index specifications to storage when indexes are created.
+- Remove persisted index metadata when indexes are dropped by name, key spec, or wildcard.
+- Rebuild in-memory index engines from stored definitions and existing documents during startup.
+
+## 2026-04-05 - 2.5.1 - fix(docs)
+update project documentation
+
+- Modifies a single documentation-related file with a minimal text change.
+- No source code, API, or package metadata changes are indicated in the diff summary.
+
+## 2026-04-05 - 2.5.0 - feat(storage)
+add offline data validation and strengthen storage/index integrity checks
+
+- adds a `--validate-data <PATH>` CLI mode to run offline integrity checks on storage directories
+- introduces storage validation reporting for headers, checksums, duplicate ids, tombstones, and stale or orphaned hint entries
+- pre-checks unique index constraints before insert, update, upsert, and findAndModify writes to prevent duplicate-key violations before storage changes
+- validates hint files against data files during collection load and rebuilds indexes from data when hints are stale
+- ensures new data files always receive a SMARTDB header and persists fresh hint files after successful compaction
+- cleans up stale local Unix socket files before starting the TypeScript local server
+
+## 2026-04-05 - 2.4.1 - fix(package)
+update package metadata
+
+- Adjusts package manifest content with a minimal one-line change.
+
+## 2026-04-05 - 2.4.0 - feat(rustdb)
+add restore and periodic persistence support for in-memory storage
+
+- Restore previously persisted state during startup when a persist path is configured.
+- Spawn a background task to periodically persist in-memory data using the configured interval.
+- Warn when running purely in-memory without durable persistence configured.
+
+## 2026-04-04 - 2.3.1 - fix(package)
+update package metadata
+
+- Adjusts a single package-level metadata entry in the project configuration.
+
+## 2026-04-04 - 2.3.0 - feat(test)
+add integration coverage for file storage, compaction, migration, and LocalSmartDb workflows
+
+- adds end-to-end tests for file-backed storage creation, CRUD operations, bulk updates, persistence, and index file generation
+- adds compaction stress tests covering repeated updates, tombstones, file shrinking behavior, and restart integrity
+- adds migration tests for automatic v0 JSON layout detection, v1 conversion, restart persistence, and post-migration writes
+- adds LocalSmartDb lifecycle and unix socket tests, including restart persistence, custom socket paths, and database isolation
+
+## 2026-04-04 - 2.2.0 - feat(storage)
+add Bitcask storage migration, binary WAL, and data compaction support
+
+- add TypeScript storage migration from legacy JSON collections to the v1 Bitcask binary format before starting the Rust engine
+- replace the legacy JSON WAL with a binary write-ahead log plus shared binary record and KeyDir infrastructure in rustdb-storage
+- introduce data file compaction with dead-record reclamation and tests, and add the bson dependency for BSON serialization during migration
+
+## 2026-04-02 - 2.1.1 - fix(package)
+update package metadata
+
+- Adjusts a single package metadata entry in package.json.
+
 ## 2026-04-02 - 2.1.0 - feat(smartdb)
 add operation log APIs, point-in-time revert support, and a web-based debug dashboard

@@ -1,6 +1,6 @@
 {
  "name": "@push.rocks/smartdb",
-  "version": "2.1.0",
+  "version": "2.5.2",
  "private": false,
  "description": "A MongoDB-compatible embedded database server with wire protocol support, backed by a high-performance Rust engine.",
  "exports": {
@@ -29,7 +29,8 @@
  "dependencies": {
    "@api.global/typedserver": "^8.0.0",
    "@design.estate/dees-element": "^2.0.0",
-    "@push.rocks/smartrust": "^1.3.2"
+    "@push.rocks/smartrust": "^1.3.2",
+    "bson": "^7.2.0"
  },
  "browserslist": [
    "last 1 chrome versions"
@@ -17,6 +17,9 @@ importers:
      '@push.rocks/smartrust':
        specifier: ^1.3.2
        version: 1.3.2
+      bson:
+        specifier: ^7.2.0
+        version: 7.2.0
    devDependencies:
      '@git.zone/tsbuild':
        specifier: ^4.4.0
@@ -275,6 +275,12 @@ dependencies = [
 "windows-sys",
 ]

+[[package]]
+name = "fastrand"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a043dc74da1e37d6afe657061213aa6f425f855399a11d3463c6ecccc4dfda1f"
+
 [[package]]
 name = "find-msvc-tools"
 version = "0.1.9"
@@ -477,6 +483,12 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "linux-raw-sys"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
+
 [[package]]
 name = "lock_api"
 version = "0.4.14"
@@ -802,6 +814,7 @@ dependencies = [
 "dashmap",
 "serde",
 "serde_json",
+ "tempfile",
 "thiserror",
 "tokio",
 "tracing",
@@ -835,6 +848,19 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "rustix"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys",
+]
+
 [[package]]
 name = "rustversion"
 version = "1.0.22"
@@ -977,6 +1003,19 @@ version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"

+[[package]]
+name = "tempfile"
+version = "3.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
+dependencies = [
+ "fastrand",
+ "getrandom 0.4.2",
+ "once_cell",
+ "rustix",
+ "windows-sys",
+]
+
 [[package]]
 name = "thiserror"
 version = "2.0.18"
@@ -66,6 +66,9 @@ uuid = { version = "1", features = ["v4", "serde"] }
 # Async traits
 async-trait = "0.1"

+# Test utilities
+tempfile = "3"
+
 # Internal crates
 rustdb-config = { path = "crates/rustdb-config" }
 rustdb-wire = { path = "crates/rustdb-wire" }
@@ -101,7 +101,15 @@ async fn handle_create_indexes(
            expire_after_seconds,
        };

-        // Create the index.
+        let options_for_persist = IndexOptions {
+            name: options.name.clone(),
+            unique: options.unique,
+            sparse: options.sparse,
+            expire_after_seconds: options.expire_after_seconds,
+        };
+        let key_for_persist = key.clone();
+
+        // Create the index in-memory.
        let mut engine = ctx
            .indexes
            .entry(ns_key.clone())
@@ -110,6 +118,22 @@ async fn handle_create_indexes(
        match engine.create_index(key, options) {
            Ok(index_name) => {
                debug!(index_name = %index_name, "Created index");
+
+                // Persist index spec to disk.
+                let mut spec = doc! { "key": key_for_persist };
+                if options_for_persist.unique {
+                    spec.insert("unique", true);
+                }
+                if options_for_persist.sparse {
+                    spec.insert("sparse", true);
+                }
+                if let Some(ttl) = options_for_persist.expire_after_seconds {
+                    spec.insert("expireAfterSeconds", ttl as i64);
+                }
+                if let Err(e) = ctx.storage.save_index(db, coll, &index_name, spec).await {
+                    tracing::warn!(index = %index_name, error = %e, "failed to persist index spec");
+                }
+
                created_count += 1;
            }
            Err(e) => {
@@ -180,9 +204,21 @@ async fn handle_drop_indexes(
    match index_spec {
        Some(Bson::String(name)) if name == "*" => {
            // Drop all indexes except _id_.
+            // Collect names to drop from storage first.
+            let names_to_drop: Vec<String> = if let Some(engine) = ctx.indexes.get(&ns_key) {
+                engine.list_indexes().iter()
+                    .filter(|info| info.name != "_id_")
+                    .map(|info| info.name.clone())
+                    .collect()
+            } else {
+                Vec::new()
+            };
            if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
                engine.drop_all_indexes();
            }
+            for idx_name in &names_to_drop {
+                let _ = ctx.storage.drop_index(db, coll, idx_name).await;
+            }
        }
        Some(Bson::String(name)) => {
            // Drop by name.
@@ -196,6 +232,7 @@ async fn handle_drop_indexes(
                    name
                )));
            }
+            let _ = ctx.storage.drop_index(db, coll, name).await;
        }
        Some(Bson::Document(key_spec)) => {
            // Drop by key spec: find the index with matching key.
@@ -210,6 +247,7 @@ async fn handle_drop_indexes(
                    engine.drop_index(&name).map_err(|e| {
                        CommandError::IndexError(e.to_string())
                    })?;
+                    let _ = ctx.storage.drop_index(db, coll, &name).await;
                } else {
                    return Err(CommandError::IndexError(
                        "index not found with specified key".into(),
@@ -3,7 +3,7 @@ use std::collections::HashMap;
 use bson::{doc, oid::ObjectId, Bson, Document};
 use rustdb_index::IndexEngine;
 use rustdb_storage::OpType;
-use tracing::{debug, warn};
+use tracing::debug;

 use crate::context::CommandContext;
 use crate::error::{CommandError, CommandResult};
@@ -62,6 +62,23 @@ pub async fn handle(
            doc.insert("_id", ObjectId::new());
        }

+        // Pre-check unique index constraints BEFORE storage write.
+        if let Some(engine) = ctx.indexes.get(&ns_key) {
+            if let Err(e) = engine.check_unique_constraints(&doc) {
+                let err_msg = e.to_string();
+                write_errors.push(doc! {
+                    "index": idx as i32,
+                    "code": 11000_i32,
+                    "codeName": "DuplicateKey",
+                    "errmsg": &err_msg,
+                });
+                if ordered {
+                    break;
+                }
+                continue;
+            }
+        }
+
        // Attempt storage insert.
        match ctx.storage.insert_one(db, coll, doc.clone()).await {
            Ok(id_str) => {
@@ -81,10 +98,10 @@ pub async fn handle(
                    .entry(ns_key.clone())
                    .or_insert_with(IndexEngine::new);
                if let Err(e) = engine.on_insert(&doc) {
-                    warn!(
+                    tracing::error!(
                        namespace = %ns_key,
                        error = %e,
-                        "index update failed after successful insert"
+                        "index update failed after successful insert (pre-check passed but insert failed)"
                    );
                }
                inserted_count += 1;
@@ -150,6 +150,22 @@ async fn handle_update(
                        updated.get("_id").unwrap().clone()
                    };

+                    // Pre-check unique index constraints before upsert insert.
+                    if let Some(engine) = ctx.indexes.get(&ns_key) {
+                        if let Err(e) = engine.check_unique_constraints(&updated) {
+                            write_errors.push(doc! {
+                                "index": idx as i32,
+                                "code": 11000_i32,
+                                "codeName": "DuplicateKey",
+                                "errmsg": e.to_string(),
+                            });
+                            if ordered {
+                                break;
+                            }
+                            continue;
+                        }
+                    }
+
                    // Insert the new document.
                    match ctx.storage.insert_one(db, coll, updated.clone()).await {
                        Ok(id_str) => {
@@ -168,7 +184,9 @@ async fn handle_update(
                                .indexes
                                .entry(ns_key.clone())
                                .or_insert_with(IndexEngine::new);
-                            let _ = engine.on_insert(&updated);
+                            if let Err(e) = engine.on_insert(&updated) {
+                                tracing::error!(namespace = %ns_key, error = %e, "index update failed after upsert insert");
+                            }

                            total_n += 1;
                            upserted_list.push(doc! {
@@ -216,6 +234,22 @@ async fn handle_update(
                    array_filters.as_deref(),
                ) {
                    Ok(updated_doc) => {
+                        // Pre-check unique index constraints before storage write.
+                        if let Some(engine) = ctx.indexes.get(&ns_key) {
+                            if let Err(e) = engine.check_unique_constraints_for_update(matched_doc, &updated_doc) {
+                                write_errors.push(doc! {
+                                    "index": idx as i32,
+                                    "code": 11000_i32,
+                                    "codeName": "DuplicateKey",
+                                    "errmsg": e.to_string(),
+                                });
+                                if ordered {
+                                    break;
+                                }
+                                continue;
+                            }
+                        }
+
                        let id_str = extract_id_string(matched_doc);
                        match ctx
                            .storage
@@ -235,7 +269,9 @@ async fn handle_update(

                                // Update index.
                                if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
-                                    let _ = engine.on_update(matched_doc, &updated_doc);
+                                    if let Err(e) = engine.on_update(matched_doc, &updated_doc) {
+                                        tracing::error!(namespace = %ns_key, error = %e, "index update failed after update");
+                                    }
                                }
                                total_n += 1;
                                // Check if the document actually changed.
@@ -444,6 +480,13 @@ async fn handle_find_and_modify(
        )
        .map_err(|e| CommandError::InternalError(e.to_string()))?;

+        // Pre-check unique index constraints before storage write.
+        if let Some(engine) = ctx.indexes.get(&ns_key) {
+            if let Err(e) = engine.check_unique_constraints_for_update(&original_doc, &updated_doc) {
+                return Err(CommandError::StorageError(e.to_string()));
+            }
+        }
+
        let id_str = extract_id_string(&original_doc);
        ctx.storage
            .update_by_id(db, coll, &id_str, updated_doc.clone())
@@ -461,7 +504,9 @@ async fn handle_find_and_modify(

        // Update index.
        if let Some(mut engine) = ctx.indexes.get_mut(&ns_key) {
-            let _ = engine.on_update(&original_doc, &updated_doc);
+            if let Err(e) = engine.on_update(&original_doc, &updated_doc) {
+                tracing::error!(namespace = %ns_key, error = %e, "index update failed after findAndModify update");
+            }
        }

        let return_doc = if return_new {
@@ -505,6 +550,13 @@ async fn handle_find_and_modify(
            updated_doc.get("_id").unwrap().clone()
        };

+        // Pre-check unique index constraints before upsert insert.
+        if let Some(engine) = ctx.indexes.get(&ns_key) {
+            if let Err(e) = engine.check_unique_constraints(&updated_doc) {
+                return Err(CommandError::StorageError(e.to_string()));
+            }
+        }
+
        let inserted_id_str = ctx.storage
            .insert_one(db, coll, updated_doc.clone())
            .await?;
@@ -525,7 +577,9 @@ async fn handle_find_and_modify(
                .indexes
                .entry(ns_key.clone())
                .or_insert_with(IndexEngine::new);
-            let _ = engine.on_insert(&updated_doc);
+            if let Err(e) = engine.on_insert(&updated_doc) {
+                tracing::error!(namespace = %ns_key, error = %e, "index update failed after findAndModify upsert");
+            }
        }

        let value = if return_new {
@@ -153,6 +153,55 @@ impl IndexEngine {
        self.indexes.contains_key(name)
    }

+    /// Check unique constraints for a document without modifying the index.
+    /// Returns Ok(()) if no conflict, Err(DuplicateKey) if a unique constraint
+    /// would be violated. This is a read-only check (immutable &self).
+    pub fn check_unique_constraints(&self, doc: &Document) -> Result<(), IndexError> {
+        for idx in self.indexes.values() {
+            if idx.unique {
+                let key_bytes = Self::extract_key_bytes(doc, &idx.key, idx.sparse);
+                if let Some(ref kb) = key_bytes {
+                    if let Some(existing_ids) = idx.hash.get(kb) {
+                        if !existing_ids.is_empty() {
+                            return Err(IndexError::DuplicateKey {
+                                index: idx.name.clone(),
+                                key: format!("{:?}", kb),
+                            });
+                        }
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    /// Check unique constraints for an update, excluding the document being updated.
+    /// Returns Ok(()) if no conflict. This is a read-only check (immutable &self).
+    pub fn check_unique_constraints_for_update(
+        &self,
+        old_doc: &Document,
+        new_doc: &Document,
+    ) -> Result<(), IndexError> {
+        let doc_id = Self::extract_id(old_doc);
+        for idx in self.indexes.values() {
+            if idx.unique {
+                let new_key_bytes = Self::extract_key_bytes(new_doc, &idx.key, idx.sparse);
+                if let Some(ref kb) = new_key_bytes {
+                    if let Some(existing_ids) = idx.hash.get(kb) {
+                        let has_conflict = existing_ids.iter().any(|id| *id != doc_id);
+                        if has_conflict {
+                            return Err(IndexError::DuplicateKey {
+                                index: idx.name.clone(),
+                                key: format!("{:?}", kb),
+                            });
+                        }
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
    /// Notify the engine that a document has been inserted.
    /// Checks unique constraints and updates all index structures.
    pub fn on_insert(&mut self, doc: &Document) -> Result<(), IndexError> {
@@ -17,3 +17,6 @@ tracing = { workspace = true }
 crc32fast = { workspace = true }
 uuid = { workspace = true }
 async-trait = { workspace = true }
+
+[dev-dependencies]
+tempfile = { workspace = true }
@@ -0,0 +1,499 @@
+//! Binary Write-Ahead Log for crash recovery.
+//!
+//! # Protocol
+//!
+//! Every mutation follows this sequence:
+//! 1. Append WAL record → fsync
+//! 2. Perform the actual data write
+//! 3. Append WAL commit marker → fsync
+//!
+//! On recovery, uncommitted entries (those without a matching commit marker)
+//! are replayed or verified.
+//!
+//! # Record format
+//!
+//! ```text
+//! ┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬────────────┐
+//! │ magic    │ seq      │ op       │ key_len  │ val_len  │ crc32    │ payload    │
+//! │ u16 LE   │ u64 LE   │ u8       │ u32 LE   │ u32 LE   │ u32 LE   │ [key][val] │
+//! │ 0xWA01   │          │          │          │          │          │            │
+//! └──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴────────────┘
+//! ```
+//!
+//! # Commit marker
+//!
+//! ```text
+//! ┌──────────┬──────────┬──────────┐
+//! │ magic    │ seq      │ crc32    │
+//! │ u16 LE   │ u64 LE   │ u32 LE   │
+//! │ 0xCA01   │          │          │
+//! └──────────┴──────────┴──────────┘
+//! ```
+
+use std::io::{self, BufReader, Read, Write};
+use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicU64, Ordering};
+
+use crate::error::{StorageError, StorageResult};
+use crate::record::{FileHeader, FileType, FILE_HEADER_SIZE};
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+const WAL_RECORD_MAGIC: u16 = 0xAA01;
+const WAL_COMMIT_MAGIC: u16 = 0xCC01;
+
+/// WAL record header: magic(2) + seq(8) + op(1) + key_len(4) + val_len(4) + crc(4) = 23
+const WAL_RECORD_HEADER: usize = 23;
+
+/// Commit marker size: magic(2) + seq(8) + crc(4) = 14
+const WAL_COMMIT_SIZE: usize = 14;
+
+// ---------------------------------------------------------------------------
+// WAL operation type
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(u8)]
+pub enum WalOpType {
+    Insert = 1,
+    Update = 2,
+    Delete = 3,
+}
+
+impl WalOpType {
+    fn from_u8(v: u8) -> StorageResult<Self> {
+        match v {
+            1 => Ok(WalOpType::Insert),
+            2 => Ok(WalOpType::Update),
+            3 => Ok(WalOpType::Delete),
+            _ => Err(StorageError::WalError(format!("unknown WAL op: {v}"))),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// WAL entry (parsed from file)
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone)]
+pub struct WalEntry {
+    pub seq: u64,
+    pub op: WalOpType,
+    pub key: Vec<u8>,
+    pub value: Vec<u8>,
+}
+
+// ---------------------------------------------------------------------------
+// Internal: what we read from the WAL file
+// ---------------------------------------------------------------------------
+
+#[derive(Debug)]
+enum WalItem {
+    Record(WalEntry),
+    Commit(u64), // seq that was committed
+}
+
+// ---------------------------------------------------------------------------
+// BinaryWal
+// ---------------------------------------------------------------------------
+
+/// Binary write-ahead log backed by a single file.
+pub struct BinaryWal {
+    path: PathBuf,
+    next_seq: AtomicU64,
+}
+
+impl BinaryWal {
+    /// Create a new WAL. Does not touch the filesystem until `initialize()`.
+    pub fn new(path: PathBuf) -> Self {
+        Self {
+            path,
+            next_seq: AtomicU64::new(1),
+        }
+    }
+
+    /// Initialize: create parent dirs, recover sequence counter from existing file.
+    pub fn initialize(&self) -> StorageResult<()> {
+        if let Some(parent) = self.path.parent() {
+            std::fs::create_dir_all(parent)?;
+        }
+
+        if self.path.exists() {
+            // Scan to find highest seq
+            let items = self.read_all_items()?;
+            let max_seq = items
+                .iter()
+                .map(|item| match item {
+                    WalItem::Record(e) => e.seq,
+                    WalItem::Commit(s) => *s,
+                })
+                .max()
+                .unwrap_or(0);
+            self.next_seq.store(max_seq + 1, Ordering::SeqCst);
+        } else {
+            // Create the file with a header
+            let mut f = std::fs::File::create(&self.path)?;
+            let hdr = FileHeader::new(FileType::Wal);
+            f.write_all(&hdr.encode())?;
+            f.flush()?;
+            f.sync_all()?;
+        }
+
+        Ok(())
+    }
+
+    /// Append a WAL record. Returns the sequence number. Fsyncs.
+    pub fn append(
+        &self,
+        op: WalOpType,
+        key: &[u8],
+        value: &[u8],
+    ) -> StorageResult<u64> {
+        let seq = self.next_seq.fetch_add(1, Ordering::SeqCst);
+        let key_len = key.len() as u32;
+        let val_len = value.len() as u32;
+
+        // Build header bytes (without CRC)
+        let mut hdr = Vec::with_capacity(WAL_RECORD_HEADER);
+        hdr.extend_from_slice(&WAL_RECORD_MAGIC.to_le_bytes());
+        hdr.extend_from_slice(&seq.to_le_bytes());
+        hdr.push(op as u8);
+        hdr.extend_from_slice(&key_len.to_le_bytes());
+        hdr.extend_from_slice(&val_len.to_le_bytes());
+        // CRC placeholder
+        hdr.extend_from_slice(&0u32.to_le_bytes());
+
+        // Compute CRC over header (without crc field) + payload
+        let mut hasher = crc32fast::Hasher::new();
+        hasher.update(&hdr[0..19]); // magic + seq + op + key_len + val_len
+        hasher.update(key);
+        hasher.update(value);
+        let crc = hasher.finalize();
+        hdr[19..23].copy_from_slice(&crc.to_le_bytes());
+
+        // Append to file
+        let mut f = std::fs::OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(&self.path)?;
+        f.write_all(&hdr)?;
+        f.write_all(key)?;
+        f.write_all(value)?;
+        f.sync_all()?;
+
+        Ok(seq)
+    }
+
+    /// Append a commit marker for the given sequence. Fsyncs.
+    pub fn append_commit(&self, seq: u64) -> StorageResult<()> {
+        let mut buf = Vec::with_capacity(WAL_COMMIT_SIZE);
+        buf.extend_from_slice(&WAL_COMMIT_MAGIC.to_le_bytes());
+        buf.extend_from_slice(&seq.to_le_bytes());
+
+        // CRC over magic + seq
+        let mut hasher = crc32fast::Hasher::new();
+        hasher.update(&buf[0..10]);
+        let crc = hasher.finalize();
+        buf.extend_from_slice(&crc.to_le_bytes());
+
+        let mut f = std::fs::OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(&self.path)?;
+        f.write_all(&buf)?;
+        f.sync_all()?;
+
+        Ok(())
+    }
+
+    /// Recover: return all WAL entries that were NOT committed.
+    pub fn recover(&self) -> StorageResult<Vec<WalEntry>> {
+        let items = self.read_all_items()?;
+
+        // Collect committed seq numbers
+        let committed: std::collections::HashSet<u64> = items
+            .iter()
+            .filter_map(|item| {
+                if let WalItem::Commit(s) = item {
+                    Some(*s)
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        // Return records without a commit marker
+        let uncommitted: Vec<WalEntry> = items
+            .into_iter()
+            .filter_map(|item| {
+                if let WalItem::Record(entry) = item {
+                    if !committed.contains(&entry.seq) {
+                        return Some(entry);
+                    }
+                }
+                None
+            })
+            .collect();
+
+        Ok(uncommitted)
+    }
+
+    /// Truncate the WAL: rewrite with just the file header (clears all entries).
+    pub fn truncate(&self) -> StorageResult<()> {
+        let mut f = std::fs::File::create(&self.path)?;
+        let hdr = FileHeader::new(FileType::Wal);
+        f.write_all(&hdr.encode())?;
+        f.flush()?;
+        f.sync_all()?;
+        // Don't reset next_seq — it should keep incrementing
+        Ok(())
+    }
+
+    /// Path to the WAL file.
+    pub fn path(&self) -> &Path {
+        &self.path
+    }
+
+    // -----------------------------------------------------------------------
+    // Internal: read all items from the WAL file
+    // -----------------------------------------------------------------------
+
+    fn read_all_items(&self) -> StorageResult<Vec<WalItem>> {
+        if !self.path.exists() {
+            return Ok(vec![]);
+        }
+
+        let file = std::fs::File::open(&self.path)?;
+        let mut reader = BufReader::new(file);
+
+        // Skip file header (if present)
+        let file_len = std::fs::metadata(&self.path)?.len();
+        if file_len >= FILE_HEADER_SIZE as u64 {
+            let mut hdr_buf = [0u8; FILE_HEADER_SIZE];
+            reader.read_exact(&mut hdr_buf)?;
+            // Validate but don't fail hard — allow reading even slightly off headers
+            let _ = FileHeader::decode(&hdr_buf);
+        }
+
+        let mut items = Vec::new();
+
+        loop {
+            // Peek at the magic to determine if this is a record or commit marker
+            let mut magic_buf = [0u8; 2];
+            match reader.read_exact(&mut magic_buf) {
+                Ok(()) => {}
+                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
+                Err(e) => return Err(e.into()),
+            }
+            let magic = u16::from_le_bytes(magic_buf);
+
+            match magic {
+                WAL_RECORD_MAGIC => {
+                    // Read rest of header: seq(8) + op(1) + key_len(4) + val_len(4) + crc(4) = 21
+                    let mut rest = [0u8; 21];
+                    match reader.read_exact(&mut rest) {
+                        Ok(()) => {}
+                        Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
+                        Err(e) => return Err(e.into()),
+                    }
+
+                    let seq = u64::from_le_bytes(rest[0..8].try_into().unwrap());
+                    let op = WalOpType::from_u8(rest[8])?;
+                    let key_len = u32::from_le_bytes(rest[9..13].try_into().unwrap()) as usize;
+                    let val_len = u32::from_le_bytes(rest[13..17].try_into().unwrap()) as usize;
+                    let stored_crc = u32::from_le_bytes(rest[17..21].try_into().unwrap());
+
+                    let mut payload = vec![0u8; key_len + val_len];
+                    match reader.read_exact(&mut payload) {
+                        Ok(()) => {}
+                        Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
+                        Err(e) => return Err(e.into()),
+                    }
+
+                    // Verify CRC
+                    let mut hasher = crc32fast::Hasher::new();
+                    hasher.update(&magic_buf);
+                    hasher.update(&rest[0..17]); // seq + op + key_len + val_len
+                    hasher.update(&payload);
+                    let computed = hasher.finalize();
+
+                    if computed != stored_crc {
+                        // Corrupt WAL entry — skip it (best-effort recovery)
+                        tracing::warn!(
+                            seq,
+                            "skipping corrupt WAL record: CRC mismatch (expected 0x{stored_crc:08X}, got 0x{computed:08X})"
+                        );
+                        continue;
+                    }
+
+                    let key = payload[..key_len].to_vec();
+                    let value = payload[key_len..].to_vec();
+                    items.push(WalItem::Record(WalEntry {
+                        seq,
+                        op,
+                        key,
+                        value,
+                    }));
+                }
+                WAL_COMMIT_MAGIC => {
+                    // Read rest: seq(8) + crc(4) = 12
+                    let mut rest = [0u8; 12];
+                    match reader.read_exact(&mut rest) {
+                        Ok(()) => {}
+                        Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
+                        Err(e) => return Err(e.into()),
+                    }
+
+                    let seq = u64::from_le_bytes(rest[0..8].try_into().unwrap());
+                    let stored_crc = u32::from_le_bytes(rest[8..12].try_into().unwrap());
+
+                    let mut hasher = crc32fast::Hasher::new();
+                    hasher.update(&magic_buf);
+                    hasher.update(&rest[0..8]);
+                    let computed = hasher.finalize();
+
+                    if computed != stored_crc {
+                        tracing::warn!(
+                            seq,
+                            "skipping corrupt WAL commit marker: CRC mismatch"
+                        );
+                        continue;
+                    }
+
+                    items.push(WalItem::Commit(seq));
+                }
+                _ => {
+                    // Unknown magic — file is corrupt past this point
+                    tracing::warn!("unknown WAL magic 0x{magic:04X}, stopping scan");
+                    break;
+                }
+            }
+        }
+
+        Ok(items)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_wal(dir: &tempfile::TempDir) -> BinaryWal {
+        let path = dir.path().join("test.wal");
+        let wal = BinaryWal::new(path);
+        wal.initialize().unwrap();
+        wal
+    }
+
+    #[test]
+    fn append_and_commit() {
+        let dir = tempfile::tempdir().unwrap();
+        let wal = make_wal(&dir);
+
+        let seq = wal
+            .append(WalOpType::Insert, b"key1", b"value1")
+            .unwrap();
+        assert_eq!(seq, 1);
+
+        wal.append_commit(seq).unwrap();
+
+        // All committed — recover should return empty
+        let uncommitted = wal.recover().unwrap();
+        assert!(uncommitted.is_empty());
+    }
+
+    #[test]
+    fn uncommitted_entries_recovered() {
+        let dir = tempfile::tempdir().unwrap();
+        let wal = make_wal(&dir);
+
+        let s1 = wal
+            .append(WalOpType::Insert, b"k1", b"v1")
+            .unwrap();
+        wal.append_commit(s1).unwrap();
+
+        // s2 is NOT committed
+        let s2 = wal
+            .append(WalOpType::Update, b"k2", b"v2")
+            .unwrap();
+
+        let uncommitted = wal.recover().unwrap();
+        assert_eq!(uncommitted.len(), 1);
+        assert_eq!(uncommitted[0].seq, s2);
+        assert_eq!(uncommitted[0].op, WalOpType::Update);
+        assert_eq!(uncommitted[0].key, b"k2");
+        assert_eq!(uncommitted[0].value, b"v2");
+    }
+
+    #[test]
+    fn truncate_clears_wal() {
+        let dir = tempfile::tempdir().unwrap();
+        let wal = make_wal(&dir);
+
+        wal.append(WalOpType::Insert, b"k", b"v").unwrap();
+        wal.truncate().unwrap();
+
+        let uncommitted = wal.recover().unwrap();
+        assert!(uncommitted.is_empty());
+    }
+
+    #[test]
+    fn multiple_operations() {
+        let dir = tempfile::tempdir().unwrap();
+        let wal = make_wal(&dir);
+
+        let s1 = wal.append(WalOpType::Insert, b"a", b"1").unwrap();
+        let s2 = wal.append(WalOpType::Update, b"b", b"2").unwrap();
+        let s3 = wal.append(WalOpType::Delete, b"c", b"").unwrap();
+
+        // Commit only s1 and s3
+        wal.append_commit(s1).unwrap();
+        wal.append_commit(s3).unwrap();
+
+        let uncommitted = wal.recover().unwrap();
+        assert_eq!(uncommitted.len(), 1);
+        assert_eq!(uncommitted[0].seq, s2);
+    }
+
+    #[test]
+    fn sequence_numbers_persist_across_reinit() {
+        let dir = tempfile::tempdir().unwrap();
+        let path = dir.path().join("persist.wal");
+
+        {
+            let wal = BinaryWal::new(path.clone());
+            wal.initialize().unwrap();
+            let s1 = wal.append(WalOpType::Insert, b"k", b"v").unwrap();
+            assert_eq!(s1, 1);
+            wal.append_commit(s1).unwrap();
+        }
+
+        // Re-open — seq should continue from 2+ (since max committed was 1)
+        {
+            let wal = BinaryWal::new(path);
+            wal.initialize().unwrap();
+            let s2 = wal.append(WalOpType::Insert, b"k2", b"v2").unwrap();
+            assert!(s2 >= 2, "seq should continue: got {s2}");
+        }
+    }
+
+    #[test]
+    fn delete_has_empty_value() {
+        let dir = tempfile::tempdir().unwrap();
+        let wal = make_wal(&dir);
+
+        let seq = wal.append(WalOpType::Delete, b"key", b"").unwrap();
+
+        let uncommitted = wal.recover().unwrap();
+        assert_eq!(uncommitted.len(), 1);
+        assert_eq!(uncommitted[0].seq, seq);
+        assert_eq!(uncommitted[0].op, WalOpType::Delete);
+        assert!(uncommitted[0].value.is_empty());
+    }
+}
@@ -0,0 +1,270 @@
+//! Compaction for the Bitcask-style storage engine.
+//!
+//! Over time, the data file accumulates dead records (superseded by updates,
+//! tombstones from deletes). Compaction rewrites the data file with only live
+//! records, reclaiming disk space.
+//!
+//! The process is:
+//! 1. Create a new `data.rdb.compact` file with a fresh file header.
+//! 2. Iterate all live entries from the KeyDir.
+//! 3. Read each live document from the old data file, write to the new file.
+//! 4. Atomically rename `data.rdb.compact` → `data.rdb`.
+//! 5. Update KeyDir entries with new offsets.
+//! 6. Reset dead_bytes counter.
+
+use std::io::{Seek, SeekFrom, Write};
+use std::path::Path;
+use std::sync::atomic::Ordering;
+
+use tracing::info;
+
+use crate::error::StorageResult;
+use crate::keydir::{KeyDir, KeyDirEntry};
+use crate::record::{DataRecord, FileHeader, FileType, FILE_HEADER_SIZE};
+
+/// Result of a compaction operation.
+#[derive(Debug)]
+pub struct CompactionResult {
+    /// Number of live records written.
+    pub records_written: u64,
+    /// Bytes reclaimed (old file size - new file size).
+    pub bytes_reclaimed: u64,
+    /// New data file size.
+    pub new_file_size: u64,
+}
+
+/// Compact a collection's data file.
+///
+/// This function:
+/// - Reads all live documents (entries present in the KeyDir) from the old data file
+/// - Writes them sequentially to a new file
+/// - Atomically renames the new file over the old one
+/// - Updates all KeyDir entries with their new offsets
+///
+/// The caller must hold the collection's write lock during this operation.
+pub fn compact_data_file(
+    data_path: &Path,
+    keydir: &KeyDir,
+    dead_bytes: &std::sync::atomic::AtomicU64,
+    data_file_size: &std::sync::atomic::AtomicU64,
+) -> StorageResult<CompactionResult> {
+    let compact_path = data_path.with_extension("rdb.compact");
+
+    let old_file_size = std::fs::metadata(data_path)
+        .map(|m| m.len())
+        .unwrap_or(0);
+
+    // Collect all live entries with their keys
+    let mut live_entries: Vec<(String, KeyDirEntry)> = Vec::with_capacity(keydir.len() as usize);
+    keydir.for_each(|key, entry| {
+        live_entries.push((key.to_string(), *entry));
+    });
+
+    // Sort by offset for sequential reads (cache-friendly)
+    live_entries.sort_by_key(|(_, e)| e.offset);
+
+    // Create compact file with header
+    let mut compact_file = std::fs::File::create(&compact_path)?;
+    let hdr = FileHeader::new(FileType::Data);
+    compact_file.write_all(&hdr.encode())?;
+
+    let mut current_offset = FILE_HEADER_SIZE as u64;
+    let mut new_entries: Vec<(String, KeyDirEntry)> = Vec::with_capacity(live_entries.len());
+    let mut old_data_file = std::fs::File::open(data_path)?;
+
+    for (key, entry) in &live_entries {
+        // Read the record from the old file
+        old_data_file.seek(SeekFrom::Start(entry.offset))?;
+        let (record, _disk_size) = DataRecord::decode_from(&mut old_data_file)?
+            .ok_or_else(|| {
+                crate::error::StorageError::CorruptRecord(format!(
+                    "compaction: unexpected EOF reading doc '{key}' at offset {}",
+                    entry.offset
+                ))
+            })?;
+
+        // Write to compact file
+        let encoded = record.encode();
+        let new_disk_size = encoded.len() as u32;
+        compact_file.write_all(&encoded)?;
+
+        new_entries.push((
+            key.clone(),
+            KeyDirEntry {
+                offset: current_offset,
+                record_len: new_disk_size,
+                value_len: entry.value_len,
+                timestamp: entry.timestamp,
+            },
+        ));
+
+        current_offset += new_disk_size as u64;
+    }
+
+    compact_file.sync_all()?;
+    drop(compact_file);
+    drop(old_data_file);
+
+    // Atomic rename
+    std::fs::rename(&compact_path, data_path)?;
+
+    // Update KeyDir with new offsets
+    for (key, new_entry) in new_entries {
+        keydir.insert(key, new_entry);
+    }
+
+    // Reset counters
+    dead_bytes.store(0, Ordering::Relaxed);
+    data_file_size.store(current_offset, Ordering::Relaxed);
+
+    let bytes_reclaimed = old_file_size.saturating_sub(current_offset);
+
+    info!(
+        records = live_entries.len(),
+        old_size = old_file_size,
+        new_size = current_offset,
+        reclaimed = bytes_reclaimed,
+        "compaction complete"
+    );
+
+    Ok(CompactionResult {
+        records_written: live_entries.len() as u64,
+        bytes_reclaimed,
+        new_file_size: current_offset,
+    })
+}
+
+/// Check if compaction is warranted for a collection.
+/// Returns true if dead bytes exceed 50% of live data.
+pub fn should_compact(dead_bytes: u64, data_file_size: u64) -> bool {
+    if data_file_size <= FILE_HEADER_SIZE as u64 {
+        return false;
+    }
+    let useful_bytes = data_file_size - FILE_HEADER_SIZE as u64;
+    // Trigger when dead > 50% of total useful data
+    dead_bytes > useful_bytes / 2
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::keydir::KeyDir;
+    use crate::record::{now_ms, DataRecord, FileHeader, FileType};
+    use std::io::Write;
+    use std::sync::atomic::AtomicU64;
+
+    #[test]
+    fn compact_removes_dead_records() {
+        let dir = tempfile::tempdir().unwrap();
+        let data_path = dir.path().join("data.rdb");
+
+        // Write a data file: insert A, update A (new version), insert B
+        let mut f = std::fs::File::create(&data_path).unwrap();
+        let hdr = FileHeader::new(FileType::Data);
+        f.write_all(&hdr.encode()).unwrap();
+
+        let ts = now_ms();
+
+        // Record 1: A v1 (will be superseded)
+        let r1 = DataRecord {
+            timestamp: ts,
+            key: b"aaa".to_vec(),
+            value: b"old_value".to_vec(),
+        };
+        let r1_enc = r1.encode();
+        let r1_offset = FILE_HEADER_SIZE as u64;
+        let r1_size = r1_enc.len();
+        f.write_all(&r1_enc).unwrap();
+
+        // Record 2: A v2 (current)
+        let r2 = DataRecord {
+            timestamp: ts + 1,
+            key: b"aaa".to_vec(),
+            value: b"new_value".to_vec(),
+        };
+        let r2_enc = r2.encode();
+        let r2_offset = r1_offset + r1_size as u64;
+        let r2_size = r2_enc.len();
+        f.write_all(&r2_enc).unwrap();
+
+        // Record 3: B (live)
+        let r3 = DataRecord {
+            timestamp: ts + 2,
+            key: b"bbb".to_vec(),
+            value: b"bbb_value".to_vec(),
+        };
+        let r3_enc = r3.encode();
+        let r3_offset = r2_offset + r2_size as u64;
+        f.write_all(&r3_enc).unwrap();
+        f.sync_all().unwrap();
+        drop(f);
+
+        let total_size = std::fs::metadata(&data_path).unwrap().len();
+
+        // Build KeyDir — only points to latest versions
+        let keydir = KeyDir::new();
+        keydir.insert(
+            "aaa".into(),
+            KeyDirEntry {
+                offset: r2_offset,
+                record_len: r2_size as u32,
+                value_len: r2.value.len() as u32,
+                timestamp: ts + 1,
+            },
+        );
+        keydir.insert(
+            "bbb".into(),
+            KeyDirEntry {
+                offset: r3_offset,
+                record_len: r3.encode().len() as u32,
+                value_len: r3.value.len() as u32,
+                timestamp: ts + 2,
+            },
+        );
+
+        let dead_bytes_counter = AtomicU64::new(r1_size as u64);
+        let data_file_size_counter = AtomicU64::new(total_size);
+
+        let result = compact_data_file(
+            &data_path,
+            &keydir,
+            &dead_bytes_counter,
+            &data_file_size_counter,
+        )
+        .unwrap();
+
+        assert_eq!(result.records_written, 2);
+        assert!(result.bytes_reclaimed > 0);
+        assert!(result.new_file_size < total_size);
+
+        // Verify dead_bytes was reset
+        assert_eq!(dead_bytes_counter.load(Ordering::Relaxed), 0);
+
+        // Verify KeyDir was updated with new offsets
+        let a_entry = keydir.get("aaa").unwrap();
+        assert_eq!(a_entry.offset, FILE_HEADER_SIZE as u64); // first record after header
+        assert_eq!(a_entry.value_len, b"new_value".len() as u32);
+
+        let b_entry = keydir.get("bbb").unwrap();
+        assert!(b_entry.offset > a_entry.offset);
+
+        // Verify the compacted file can be used to rebuild KeyDir
+        let (rebuilt, dead, _stats) = KeyDir::build_from_data_file(&data_path).unwrap();
+        assert_eq!(rebuilt.len(), 2);
+        assert_eq!(dead, 0); // no dead records in compacted file
+    }
+
+    #[test]
+    fn should_compact_thresholds() {
+        // Under threshold
+        assert!(!should_compact(10, 100 + FILE_HEADER_SIZE as u64));
+        // Over threshold (dead > 50% of useful)
+        assert!(should_compact(60, 100 + FILE_HEADER_SIZE as u64));
+        // Empty file
+        assert!(!should_compact(0, FILE_HEADER_SIZE as u64));
+    }
+}
@@ -17,6 +17,15 @@ pub enum StorageError {

    #[error("conflict detected: {0}")]
    ConflictError(String),
+
+    #[error("corrupt record: {0}")]
+    CorruptRecord(String),
+
+    #[error("checksum mismatch: expected 0x{expected:08X}, got 0x{actual:08X}")]
+    ChecksumMismatch { expected: u32, actual: u32 },
+
+    #[error("WAL error: {0}")]
+    WalError(String),
 }

 impl From<serde_json::Error> for StorageError {
@@ -0,0 +1,562 @@
+//! KeyDir — in-memory document location index for the Bitcask storage engine.
+//!
+//! Maps document `_id` (hex string) to its location in the append-only data file.
+//! Backed by `DashMap` for lock-free concurrent reads and fine-grained write locking.
+//!
+//! The KeyDir can be rebuilt from a data file scan, or loaded quickly from a
+//! persisted hint file for fast restart.
+
+use std::io::{self, BufReader, BufWriter, Read, Seek, SeekFrom, Write};
+use std::path::Path;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+use dashmap::DashMap;
+
+use crate::error::{StorageError, StorageResult};
+use crate::record::{
+    DataRecord, FileHeader, FileType, RecordScanner, FILE_HEADER_SIZE, FORMAT_VERSION,
+};
+
+// ---------------------------------------------------------------------------
+// KeyDirEntry
+// ---------------------------------------------------------------------------
+
+/// Location of a single document in the data file.
+#[derive(Debug, Clone, Copy)]
+pub struct KeyDirEntry {
+    /// Byte offset of the record in `data.rdb`.
+    pub offset: u64,
+    /// Total record size on disk (header + payload).
+    pub record_len: u32,
+    /// BSON value length. 0 means tombstone (used during compaction accounting).
+    pub value_len: u32,
+    /// Timestamp (epoch ms) from the record. Used for conflict detection.
+    pub timestamp: u64,
+}
+
+// ---------------------------------------------------------------------------
+// BuildStats — statistics from building KeyDir from a data file scan
+// ---------------------------------------------------------------------------
+
+/// Statistics collected while building a KeyDir from a data file scan.
+#[derive(Debug, Clone, Default)]
+pub struct BuildStats {
+    /// Total records scanned (live + tombstones + superseded).
+    pub total_records_scanned: u64,
+    /// Number of live documents in the final KeyDir.
+    pub live_documents: u64,
+    /// Number of tombstone records encountered.
+    pub tombstones: u64,
+    /// Number of records superseded by a later write for the same key.
+    pub superseded_records: u64,
+}
+
+// ---------------------------------------------------------------------------
+// KeyDir
+// ---------------------------------------------------------------------------
+
+/// In-memory index mapping document ID → data file location.
+pub struct KeyDir {
+    map: DashMap<String, KeyDirEntry>,
+    /// Running count of live documents.
+    doc_count: AtomicU64,
+}
+
+impl KeyDir {
+    /// Create an empty KeyDir.
+    pub fn new() -> Self {
+        Self {
+            map: DashMap::new(),
+            doc_count: AtomicU64::new(0),
+        }
+    }
+
+    /// Insert or update an entry. Returns the previous entry if one existed.
+    pub fn insert(&self, key: String, entry: KeyDirEntry) -> Option<KeyDirEntry> {
+        let prev = self.map.insert(key, entry);
+        if prev.is_none() {
+            self.doc_count.fetch_add(1, Ordering::Relaxed);
+        }
+        prev
+    }
+
+    /// Look up an entry by key.
+    pub fn get(&self, key: &str) -> Option<KeyDirEntry> {
+        self.map.get(key).map(|r| *r.value())
+    }
+
+    /// Remove an entry. Returns the removed entry if it existed.
+    pub fn remove(&self, key: &str) -> Option<KeyDirEntry> {
+        let removed = self.map.remove(key).map(|(_, v)| v);
+        if removed.is_some() {
+            self.doc_count.fetch_sub(1, Ordering::Relaxed);
+        }
+        removed
+    }
+
+    /// Number of live documents.
+    pub fn len(&self) -> u64 {
+        self.doc_count.load(Ordering::Relaxed)
+    }
+
+    /// Whether the index is empty.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Check if a key exists.
+    pub fn contains(&self, key: &str) -> bool {
+        self.map.contains_key(key)
+    }
+
+    /// Iterate over all entries. The closure receives (key, entry).
+    pub fn for_each(&self, mut f: impl FnMut(&str, &KeyDirEntry)) {
+        for entry in self.map.iter() {
+            f(entry.key(), entry.value());
+        }
+    }
+
+    /// Collect all keys.
+    pub fn keys(&self) -> Vec<String> {
+        self.map.iter().map(|e| e.key().clone()).collect()
+    }
+
+    /// Clear all entries.
+    pub fn clear(&self) {
+        self.map.clear();
+        self.doc_count.store(0, Ordering::Relaxed);
+    }
+
+    // -----------------------------------------------------------------------
+    // Build from data file
+    // -----------------------------------------------------------------------
+
+    /// Rebuild the KeyDir by scanning an entire data file.
+    /// The file must start with a valid `FileHeader`.
+    /// Returns `(keydir, dead_bytes, stats)` where `dead_bytes` is the total size of
+    /// stale records (superseded by later writes or tombstoned).
+    pub fn build_from_data_file(path: &Path) -> StorageResult<(Self, u64, BuildStats)> {
+        let file = std::fs::File::open(path)?;
+        let mut reader = BufReader::new(file);
+
+        // Read and validate file header
+        let mut hdr_buf = [0u8; FILE_HEADER_SIZE];
+        reader.read_exact(&mut hdr_buf)?;
+        let hdr = FileHeader::decode(&hdr_buf)?;
+        if hdr.file_type != FileType::Data {
+            return Err(StorageError::CorruptRecord(format!(
+                "expected data file (type 1), got type {:?}",
+                hdr.file_type
+            )));
+        }
+
+        let keydir = KeyDir::new();
+        let mut dead_bytes: u64 = 0;
+        let mut stats = BuildStats::default();
+
+        let scanner = RecordScanner::new(reader, FILE_HEADER_SIZE as u64);
+        for result in scanner {
+            let (offset, record) = result?;
+            let is_tombstone = record.is_tombstone();
+            let disk_size = record.disk_size() as u32;
+            let value_len = record.value.len() as u32;
+            let timestamp = record.timestamp;
+            let key = String::from_utf8(record.key)
+                .map_err(|e| StorageError::CorruptRecord(format!("invalid UTF-8 key: {e}")))?;
+
+            stats.total_records_scanned += 1;
+
+            if is_tombstone {
+                stats.tombstones += 1;
+                // Remove from index; the tombstone itself is dead weight
+                if let Some(prev) = keydir.remove(&key) {
+                    dead_bytes += prev.record_len as u64;
+                }
+                dead_bytes += disk_size as u64;
+            } else {
+                let entry = KeyDirEntry {
+                    offset,
+                    record_len: disk_size,
+                    value_len,
+                    timestamp,
+                };
+                if let Some(prev) = keydir.insert(key, entry) {
+                    // Previous version of same key is now dead
+                    dead_bytes += prev.record_len as u64;
+                    stats.superseded_records += 1;
+                }
+            }
+        }
+
+        stats.live_documents = keydir.len();
+        Ok((keydir, dead_bytes, stats))
+    }
+
+    // -----------------------------------------------------------------------
+    // Hint file persistence (for fast startup)
+    // -----------------------------------------------------------------------
+
+    /// Persist the KeyDir to a hint file for fast restart.
+    ///
+    /// Hint file format (after the 64-byte file header):
+    /// For each entry: [key_len:u32 LE][key bytes][offset:u64 LE][record_len:u32 LE][value_len:u32 LE][timestamp:u64 LE]
+    pub fn persist_to_hint_file(&self, path: &Path) -> StorageResult<()> {
+        let file = std::fs::File::create(path)?;
+        let mut writer = BufWriter::new(file);
+
+        // Write file header
+        let hdr = FileHeader::new(FileType::Hint);
+        writer.write_all(&hdr.encode())?;
+
+        // Write entries
+        for entry in self.map.iter() {
+            let key_bytes = entry.key().as_bytes();
+            let key_len = key_bytes.len() as u32;
+            writer.write_all(&key_len.to_le_bytes())?;
+            writer.write_all(key_bytes)?;
+            writer.write_all(&entry.value().offset.to_le_bytes())?;
+            writer.write_all(&entry.value().record_len.to_le_bytes())?;
+            writer.write_all(&entry.value().value_len.to_le_bytes())?;
+            writer.write_all(&entry.value().timestamp.to_le_bytes())?;
+        }
+
+        writer.flush()?;
+        Ok(())
+    }
+
+    /// Load a KeyDir from a hint file. Returns None if the file doesn't exist.
+    pub fn load_from_hint_file(path: &Path) -> StorageResult<Option<Self>> {
+        if !path.exists() {
+            return Ok(None);
+        }
+
+        let file = std::fs::File::open(path)?;
+        let mut reader = BufReader::new(file);
+
+        // Read and validate header
+        let mut hdr_buf = [0u8; FILE_HEADER_SIZE];
+        match reader.read_exact(&mut hdr_buf) {
+            Ok(()) => {}
+            Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
+            Err(e) => return Err(e.into()),
+        }
+        let hdr = FileHeader::decode(&hdr_buf)?;
+        if hdr.file_type != FileType::Hint {
+            return Err(StorageError::CorruptRecord(format!(
+                "expected hint file (type 3), got type {:?}",
+                hdr.file_type
+            )));
+        }
+        if hdr.version > FORMAT_VERSION {
+            return Err(StorageError::CorruptRecord(format!(
+                "hint file version {} is newer than supported {}",
+                hdr.version, FORMAT_VERSION
+            )));
+        }
+
+        let keydir = KeyDir::new();
+
+        loop {
+            // Read key_len
+            let mut key_len_buf = [0u8; 4];
+            match reader.read_exact(&mut key_len_buf) {
+                Ok(()) => {}
+                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
+                Err(e) => return Err(e.into()),
+            }
+            let key_len = u32::from_le_bytes(key_len_buf) as usize;
+
+            // Read key
+            let mut key_buf = vec![0u8; key_len];
+            reader.read_exact(&mut key_buf)?;
+            let key = String::from_utf8(key_buf)
+                .map_err(|e| StorageError::CorruptRecord(format!("invalid UTF-8 key: {e}")))?;
+
+            // Read entry fields
+            let mut fields = [0u8; 8 + 4 + 4 + 8]; // offset + record_len + value_len + timestamp = 24
+            reader.read_exact(&mut fields)?;
+
+            let offset = u64::from_le_bytes(fields[0..8].try_into().unwrap());
+            let record_len = u32::from_le_bytes(fields[8..12].try_into().unwrap());
+            let value_len = u32::from_le_bytes(fields[12..16].try_into().unwrap());
+            let timestamp = u64::from_le_bytes(fields[16..24].try_into().unwrap());
+
+            keydir.insert(
+                key,
+                KeyDirEntry {
+                    offset,
+                    record_len,
+                    value_len,
+                    timestamp,
+                },
+            );
+        }
+
+        Ok(Some(keydir))
+    }
+
+    // -----------------------------------------------------------------------
+    // Hint file validation
+    // -----------------------------------------------------------------------
+
+    /// Validate this KeyDir (loaded from a hint file) against the actual data file.
+    /// Returns `Ok(true)` if the hint appears consistent, `Ok(false)` if a rebuild
+    /// from the data file is recommended.
+    ///
+    /// Checks:
+    /// 1. All entry offsets + record_len fit within the data file size.
+    /// 2. All entry offsets are >= FILE_HEADER_SIZE.
+    /// 3. A random sample of entries is spot-checked by reading the record at
+    ///    the offset and verifying the key matches.
+    pub fn validate_against_data_file(&self, data_path: &Path, sample_size: usize) -> StorageResult<bool> {
+        let file_size = std::fs::metadata(data_path)
+            .map(|m| m.len())
+            .unwrap_or(0);
+
+        if file_size < FILE_HEADER_SIZE as u64 {
+            // Data file is too small to even contain a header
+            return Ok(self.is_empty());
+        }
+
+        // Pass 1: bounds check all entries
+        let mut all_keys: Vec<(String, KeyDirEntry)> = Vec::with_capacity(self.len() as usize);
+        let mut bounds_ok = true;
+        self.for_each(|key, entry| {
+            if entry.offset < FILE_HEADER_SIZE as u64
+                || entry.offset + entry.record_len as u64 > file_size
+            {
+                bounds_ok = false;
+            }
+            all_keys.push((key.to_string(), *entry));
+        });
+
+        if !bounds_ok {
+            return Ok(false);
+        }
+
+        // Pass 2: spot-check a sample of entries by reading records from data.rdb
+        if all_keys.is_empty() {
+            return Ok(true);
+        }
+
+        // Sort by offset for sequential I/O, take first `sample_size` entries
+        all_keys.sort_by_key(|(_, e)| e.offset);
+        let step = if all_keys.len() <= sample_size {
+            1
+        } else {
+            all_keys.len() / sample_size
+        };
+
+        let mut file = std::fs::File::open(data_path)?;
+        let mut checked = 0usize;
+        for (i, (expected_key, entry)) in all_keys.iter().enumerate() {
+            if checked >= sample_size {
+                break;
+            }
+            if i % step != 0 {
+                continue;
+            }
+            // Seek to the entry's offset and try to decode the record
+            file.seek(SeekFrom::Start(entry.offset))?;
+            match DataRecord::decode_from(&mut file) {
+                Ok(Some((record, _disk_size))) => {
+                    let record_key = String::from_utf8_lossy(&record.key);
+                    if record_key != *expected_key {
+                        return Ok(false);
+                    }
+                }
+                Ok(None) | Err(_) => {
+                    return Ok(false);
+                }
+            }
+            checked += 1;
+        }
+
+        Ok(true)
+    }
+}
+
+impl Default for KeyDir {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::record::DataRecord;
+    use std::io::Write;
+
+    #[test]
+    fn basic_insert_get_remove() {
+        let kd = KeyDir::new();
+        assert!(kd.is_empty());
+
+        let entry = KeyDirEntry {
+            offset: 100,
+            record_len: 50,
+            value_len: 30,
+            timestamp: 1700000000000,
+        };
+
+        assert!(kd.insert("abc".into(), entry).is_none());
+        assert_eq!(kd.len(), 1);
+        assert!(kd.contains("abc"));
+
+        let got = kd.get("abc").unwrap();
+        assert_eq!(got.offset, 100);
+        assert_eq!(got.value_len, 30);
+
+        let removed = kd.remove("abc").unwrap();
+        assert_eq!(removed.offset, 100);
+        assert_eq!(kd.len(), 0);
+        assert!(!kd.contains("abc"));
+    }
+
+    #[test]
+    fn insert_overwrites_returns_previous() {
+        let kd = KeyDir::new();
+        let e1 = KeyDirEntry {
+            offset: 100,
+            record_len: 50,
+            value_len: 30,
+            timestamp: 1,
+        };
+        let e2 = KeyDirEntry {
+            offset: 200,
+            record_len: 60,
+            value_len: 40,
+            timestamp: 2,
+        };
+
+        kd.insert("k".into(), e1);
+        assert_eq!(kd.len(), 1);
+
+        let prev = kd.insert("k".into(), e2).unwrap();
+        assert_eq!(prev.offset, 100);
+        // Count stays at 1 (overwrite, not new)
+        assert_eq!(kd.len(), 1);
+        assert_eq!(kd.get("k").unwrap().offset, 200);
+    }
+
+    #[test]
+    fn build_from_data_file() {
+        let dir = tempfile::tempdir().unwrap();
+        let data_path = dir.path().join("data.rdb");
+
+        // Write a data file with 3 records: insert A, insert B, delete A
+        {
+            let mut f = std::fs::File::create(&data_path).unwrap();
+            let hdr = FileHeader::new(FileType::Data);
+            f.write_all(&hdr.encode()).unwrap();
+
+            let r1 = DataRecord {
+                timestamp: 1,
+                key: b"aaa".to_vec(),
+                value: b"val_a".to_vec(),
+            };
+            let r2 = DataRecord {
+                timestamp: 2,
+                key: b"bbb".to_vec(),
+                value: b"val_b".to_vec(),
+            };
+            let r3 = DataRecord {
+                timestamp: 3,
+                key: b"aaa".to_vec(),
+                value: vec![], // tombstone
+            };
+            f.write_all(&r1.encode()).unwrap();
+            f.write_all(&r2.encode()).unwrap();
+            f.write_all(&r3.encode()).unwrap();
+        }
+
+        let (kd, dead_bytes, stats) = KeyDir::build_from_data_file(&data_path).unwrap();
+
+        // Only B should be live
+        assert_eq!(kd.len(), 1);
+        assert!(kd.contains("bbb"));
+        assert!(!kd.contains("aaa"));
+
+        // Dead bytes: r1 (aaa live, then superseded by tombstone) + r3 (tombstone itself)
+        assert!(dead_bytes > 0);
+
+        // Stats
+        assert_eq!(stats.total_records_scanned, 3);
+        assert_eq!(stats.live_documents, 1);
+        assert_eq!(stats.tombstones, 1);
+        assert_eq!(stats.superseded_records, 0); // aaa was removed by tombstone, not superseded
+    }
+
+    #[test]
+    fn hint_file_roundtrip() {
+        let dir = tempfile::tempdir().unwrap();
+        let hint_path = dir.path().join("keydir.hint");
+
+        let kd = KeyDir::new();
+        kd.insert(
+            "doc1".into(),
+            KeyDirEntry {
+                offset: 64,
+                record_len: 100,
+                value_len: 80,
+                timestamp: 1000,
+            },
+        );
+        kd.insert(
+            "doc2".into(),
+            KeyDirEntry {
+                offset: 164,
+                record_len: 200,
+                value_len: 150,
+                timestamp: 2000,
+            },
+        );
+
+        kd.persist_to_hint_file(&hint_path).unwrap();
+        let loaded = KeyDir::load_from_hint_file(&hint_path).unwrap().unwrap();
+
+        assert_eq!(loaded.len(), 2);
+        let e1 = loaded.get("doc1").unwrap();
+        assert_eq!(e1.offset, 64);
+        assert_eq!(e1.record_len, 100);
+        assert_eq!(e1.value_len, 80);
+        assert_eq!(e1.timestamp, 1000);
+
+        let e2 = loaded.get("doc2").unwrap();
+        assert_eq!(e2.offset, 164);
+        assert_eq!(e2.timestamp, 2000);
+    }
+
+    #[test]
+    fn hint_file_nonexistent_returns_none() {
+        let result = KeyDir::load_from_hint_file(Path::new("/tmp/nonexistent_hint_file.hint"));
+        assert!(result.unwrap().is_none());
+    }
+
+    #[test]
+    fn for_each_and_keys() {
+        let kd = KeyDir::new();
+        let e = KeyDirEntry {
+            offset: 0,
+            record_len: 10,
+            value_len: 5,
+            timestamp: 1,
+        };
+        kd.insert("x".into(), e);
+        kd.insert("y".into(), e);
+
+        let mut collected = Vec::new();
+        kd.for_each(|k, _| collected.push(k.to_string()));
+        collected.sort();
+        assert_eq!(collected, vec!["x", "y"]);
+
+        let mut keys = kd.keys();
+        keys.sort();
+        assert_eq!(keys, vec!["x", "y"]);
+    }
+}
@@ -2,21 +2,31 @@
 //!
 //! Provides the [`StorageAdapter`] trait and two concrete implementations:
 //! - [`MemoryStorageAdapter`] -- fast in-memory store backed by `DashMap`
-//! - [`FileStorageAdapter`] -- JSON-file-per-collection persistent store
+//! - [`FileStorageAdapter`] -- Bitcask-style append-only log with crash recovery
 //!
-//! Also includes an [`OpLog`] for operation logging and a [`WriteAheadLog`]
-//! for crash recovery.
+//! Also includes an [`OpLog`] for operation logging, a [`BinaryWal`] for
+//! write-ahead logging, and [`compaction`] for dead record reclamation.

 pub mod adapter;
+pub mod binary_wal;
+pub mod compaction;
 pub mod error;
 pub mod file;
+pub mod keydir;
 pub mod memory;
 pub mod oplog;
-pub mod wal;
+pub mod record;
+pub mod validate;

 pub use adapter::StorageAdapter;
+pub use binary_wal::{BinaryWal, WalEntry, WalOpType};
+pub use compaction::{compact_data_file, should_compact, CompactionResult};
 pub use error::{StorageError, StorageResult};
 pub use file::FileStorageAdapter;
+pub use keydir::{BuildStats, KeyDir, KeyDirEntry};
 pub use memory::MemoryStorageAdapter;
 pub use oplog::{OpLog, OpLogEntry, OpLogStats, OpType};
-pub use wal::{WalOp, WalRecord, WriteAheadLog};
+pub use record::{
+    DataRecord, FileHeader, FileType, RecordScanner, FILE_HEADER_SIZE, FILE_MAGIC, FORMAT_VERSION,
+    RECORD_HEADER_SIZE, RECORD_MAGIC,
+};
@@ -0,0 +1,452 @@
+//! Binary data record format for the Bitcask-style storage engine.
+//!
+//! # File Version Header (64 bytes, at offset 0 of every .rdb / .hint file)
+//!
+//! ```text
+//! ┌──────────────┬──────────┬──────────┬──────────┬──────────┬───────────────┐
+//! │ magic        │ version  │ file_type│ flags    │ created  │ reserved      │
+//! │ 8 bytes      │ u16 LE   │ u8       │ u32 LE   │ u64 LE   │ 41 bytes      │
+//! │ "SMARTDB\0"  │          │          │          │ epoch_ms │ (zeros)       │
+//! └──────────────┴──────────┴──────────┴──────────┴──────────┴───────────────┘
+//! ```
+//!
+//! # Data Record (appended after the header)
+//!
+//! ```text
+//! ┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────────────┐
+//! │ magic    │ timestamp│ key_len  │ val_len  │ crc32    │ payload          │
+//! │ u16 LE   │ u64 LE   │ u32 LE   │ u32 LE   │ u32 LE   │ [key][value]     │
+//! │ 0xDB01   │ epoch_ms │          │ 0=delete │          │                  │
+//! └──────────┴──────────┴──────────┴──────────┴──────────┴──────────────────┘
+//! ```
+
+use std::io::{self, Read};
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use crate::error::{StorageError, StorageResult};
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/// File-level magic: b"SMARTDB\0"
+pub const FILE_MAGIC: &[u8; 8] = b"SMARTDB\0";
+
+/// Current storage format version.
+pub const FORMAT_VERSION: u16 = 1;
+
+/// File version header size.
+pub const FILE_HEADER_SIZE: usize = 64;
+
+/// Per-record magic.
+pub const RECORD_MAGIC: u16 = 0xDB01;
+
+/// Per-record header size (before payload).
+pub const RECORD_HEADER_SIZE: usize = 2 + 8 + 4 + 4 + 4; // 22 bytes
+
+// ---------------------------------------------------------------------------
+// File type tag stored in the version header
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(u8)]
+pub enum FileType {
+    Data = 1,
+    Wal = 2,
+    Hint = 3,
+}
+
+impl FileType {
+    pub fn from_u8(v: u8) -> StorageResult<Self> {
+        match v {
+            1 => Ok(FileType::Data),
+            2 => Ok(FileType::Wal),
+            3 => Ok(FileType::Hint),
+            _ => Err(StorageError::CorruptRecord(format!(
+                "unknown file type tag: {v}"
+            ))),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// File Version Header
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone)]
+pub struct FileHeader {
+    pub version: u16,
+    pub file_type: FileType,
+    pub flags: u32,
+    pub created_ms: u64,
+}
+
+impl FileHeader {
+    /// Create a new header for the current format version.
+    pub fn new(file_type: FileType) -> Self {
+        Self {
+            version: FORMAT_VERSION,
+            file_type,
+            flags: 0,
+            created_ms: now_ms(),
+        }
+    }
+
+    /// Encode the header to a 64-byte buffer.
+    pub fn encode(&self) -> [u8; FILE_HEADER_SIZE] {
+        let mut buf = [0u8; FILE_HEADER_SIZE];
+        buf[0..8].copy_from_slice(FILE_MAGIC);
+        buf[8..10].copy_from_slice(&self.version.to_le_bytes());
+        buf[10] = self.file_type as u8;
+        buf[11..15].copy_from_slice(&self.flags.to_le_bytes());
+        buf[15..23].copy_from_slice(&self.created_ms.to_le_bytes());
+        // bytes 23..64 are reserved (zeros)
+        buf
+    }
+
+    /// Decode a 64-byte header. Validates magic and version.
+    pub fn decode(buf: &[u8; FILE_HEADER_SIZE]) -> StorageResult<Self> {
+        if &buf[0..8] != FILE_MAGIC {
+            return Err(StorageError::CorruptRecord(
+                "invalid file magic — not a SmartDB file".into(),
+            ));
+        }
+        let version = u16::from_le_bytes([buf[8], buf[9]]);
+        if version > FORMAT_VERSION {
+            return Err(StorageError::CorruptRecord(format!(
+                "file format version {version} is newer than supported version {FORMAT_VERSION} — please upgrade"
+            )));
+        }
+        if version == 0 {
+            return Err(StorageError::CorruptRecord(
+                "file format version 0 is invalid".into(),
+            ));
+        }
+        let file_type = FileType::from_u8(buf[10])?;
+        let flags = u32::from_le_bytes([buf[11], buf[12], buf[13], buf[14]]);
+        let created_ms = u64::from_le_bytes([
+            buf[15], buf[16], buf[17], buf[18], buf[19], buf[20], buf[21], buf[22],
+        ]);
+        Ok(Self {
+            version,
+            file_type,
+            flags,
+            created_ms,
+        })
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Data Record
+// ---------------------------------------------------------------------------
+
+/// A single data record (live document or tombstone).
+#[derive(Debug, Clone)]
+pub struct DataRecord {
+    pub timestamp: u64,
+    pub key: Vec<u8>,
+    /// BSON value bytes. Empty for tombstones.
+    pub value: Vec<u8>,
+}
+
+impl DataRecord {
+    /// Whether this record is a tombstone (delete marker).
+    pub fn is_tombstone(&self) -> bool {
+        self.value.is_empty()
+    }
+
+    /// Total size on disk (header + payload).
+    pub fn disk_size(&self) -> usize {
+        RECORD_HEADER_SIZE + self.key.len() + self.value.len()
+    }
+
+    /// Encode to bytes. CRC32 covers magic + timestamp + key_len + val_len + payload.
+    pub fn encode(&self) -> Vec<u8> {
+        let key_len = self.key.len() as u32;
+        let val_len = self.value.len() as u32;
+        let total = RECORD_HEADER_SIZE + self.key.len() + self.value.len();
+        let mut buf = Vec::with_capacity(total);
+
+        // Write fields WITHOUT crc first to compute checksum.
+        buf.extend_from_slice(&RECORD_MAGIC.to_le_bytes()); // 2
+        buf.extend_from_slice(&self.timestamp.to_le_bytes()); // 8
+        buf.extend_from_slice(&key_len.to_le_bytes()); // 4
+        buf.extend_from_slice(&val_len.to_le_bytes()); // 4
+        // placeholder for crc32 — we'll fill it after computing
+        buf.extend_from_slice(&0u32.to_le_bytes()); // 4
+        buf.extend_from_slice(&self.key); // key_len
+        buf.extend_from_slice(&self.value); // val_len
+
+        // CRC covers everything except the crc32 field itself:
+        // bytes [0..18] (magic+ts+key_len+val_len) + bytes [22..] (payload)
+        let mut hasher = crc32fast::Hasher::new();
+        hasher.update(&buf[0..18]);
+        hasher.update(&buf[22..]);
+        let crc = hasher.finalize();
+        buf[18..22].copy_from_slice(&crc.to_le_bytes());
+
+        buf
+    }
+
+    /// Decode a record from a reader. Returns the record and its total disk size.
+    /// On EOF at the very start (no bytes to read), returns Ok(None).
+    pub fn decode_from<R: Read>(reader: &mut R) -> StorageResult<Option<(Self, usize)>> {
+        // Read header
+        let mut hdr = [0u8; RECORD_HEADER_SIZE];
+        match reader.read_exact(&mut hdr) {
+            Ok(()) => {}
+            Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
+            Err(e) => return Err(e.into()),
+        }
+
+        let magic = u16::from_le_bytes([hdr[0], hdr[1]]);
+        if magic != RECORD_MAGIC {
+            return Err(StorageError::CorruptRecord(format!(
+                "invalid record magic: 0x{magic:04X}, expected 0x{RECORD_MAGIC:04X}"
+            )));
+        }
+
+        let timestamp = u64::from_le_bytes(hdr[2..10].try_into().unwrap());
+        let key_len = u32::from_le_bytes(hdr[10..14].try_into().unwrap()) as usize;
+        let val_len = u32::from_le_bytes(hdr[14..18].try_into().unwrap()) as usize;
+        let stored_crc = u32::from_le_bytes(hdr[18..22].try_into().unwrap());
+
+        // Read payload
+        let payload_len = key_len + val_len;
+        let mut payload = vec![0u8; payload_len];
+        reader.read_exact(&mut payload)?;
+
+        // Verify CRC: covers header bytes [0..18] + payload
+        let mut hasher = crc32fast::Hasher::new();
+        hasher.update(&hdr[0..18]);
+        hasher.update(&payload);
+        let computed_crc = hasher.finalize();
+        if computed_crc != stored_crc {
+            return Err(StorageError::ChecksumMismatch {
+                expected: stored_crc,
+                actual: computed_crc,
+            });
+        }
+
+        let key = payload[..key_len].to_vec();
+        let value = payload[key_len..].to_vec();
+        let disk_size = RECORD_HEADER_SIZE + payload_len;
+
+        Ok(Some((
+            DataRecord {
+                timestamp,
+                key,
+                value,
+            },
+            disk_size,
+        )))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Record Scanner — iterate records from a byte slice or reader
+// ---------------------------------------------------------------------------
+
+/// Scans records sequentially from a reader, yielding (offset, record) pairs.
+/// Starts reading from the current reader position. The `base_offset` parameter
+/// indicates the byte offset in the file where reading begins (typically
+/// `FILE_HEADER_SIZE` for a data file).
+pub struct RecordScanner<R> {
+    reader: R,
+    offset: u64,
+}
+
+impl<R: Read> RecordScanner<R> {
+    pub fn new(reader: R, base_offset: u64) -> Self {
+        Self {
+            reader,
+            offset: base_offset,
+        }
+    }
+}
+
+impl<R: Read> Iterator for RecordScanner<R> {
+    /// (file_offset, record) or an error. Iteration stops on EOF or error.
+    type Item = StorageResult<(u64, DataRecord)>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match DataRecord::decode_from(&mut self.reader) {
+            Ok(Some((record, disk_size))) => {
+                let offset = self.offset;
+                self.offset += disk_size as u64;
+                Some(Ok((offset, record)))
+            }
+            Ok(None) => None, // clean EOF
+            Err(e) => Some(Err(e)),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Current time in milliseconds since UNIX epoch.
+pub fn now_ms() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap()
+        .as_millis() as u64
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn file_header_roundtrip() {
+        let hdr = FileHeader::new(FileType::Data);
+        let buf = hdr.encode();
+        assert_eq!(buf.len(), FILE_HEADER_SIZE);
+
+        let decoded = FileHeader::decode(&buf).unwrap();
+        assert_eq!(decoded.version, FORMAT_VERSION);
+        assert_eq!(decoded.file_type, FileType::Data);
+        assert_eq!(decoded.flags, 0);
+        assert_eq!(decoded.created_ms, hdr.created_ms);
+    }
+
+    #[test]
+    fn file_header_rejects_bad_magic() {
+        let mut buf = [0u8; FILE_HEADER_SIZE];
+        buf[0..8].copy_from_slice(b"BADMAGIC");
+        assert!(FileHeader::decode(&buf).is_err());
+    }
+
+    #[test]
+    fn file_header_rejects_future_version() {
+        let mut hdr = FileHeader::new(FileType::Data);
+        hdr.version = FORMAT_VERSION + 1;
+        let buf = hdr.encode();
+        // Manually patch the version in the buffer
+        let mut buf2 = buf;
+        buf2[8..10].copy_from_slice(&(FORMAT_VERSION + 1).to_le_bytes());
+        assert!(FileHeader::decode(&buf2).is_err());
+    }
+
+    #[test]
+    fn record_roundtrip_live() {
+        let rec = DataRecord {
+            timestamp: 1700000000000,
+            key: b"abc123".to_vec(),
+            value: b"\x10\x00\x00\x00\x02hi\x00\x03\x00\x00\x00ok\x00\x00".to_vec(),
+        };
+        let encoded = rec.encode();
+        assert_eq!(encoded.len(), rec.disk_size());
+
+        let mut cursor = std::io::Cursor::new(&encoded);
+        let (decoded, size) = DataRecord::decode_from(&mut cursor).unwrap().unwrap();
+        assert_eq!(size, encoded.len());
+        assert_eq!(decoded.timestamp, rec.timestamp);
+        assert_eq!(decoded.key, rec.key);
+        assert_eq!(decoded.value, rec.value);
+        assert!(!decoded.is_tombstone());
+    }
+
+    #[test]
+    fn record_roundtrip_tombstone() {
+        let rec = DataRecord {
+            timestamp: 1700000000000,
+            key: b"def456".to_vec(),
+            value: vec![],
+        };
+        assert!(rec.is_tombstone());
+        let encoded = rec.encode();
+
+        let mut cursor = std::io::Cursor::new(&encoded);
+        let (decoded, _) = DataRecord::decode_from(&mut cursor).unwrap().unwrap();
+        assert!(decoded.is_tombstone());
+        assert_eq!(decoded.key, b"def456");
+    }
+
+    #[test]
+    fn record_detects_corruption() {
+        let rec = DataRecord {
+            timestamp: 42,
+            key: b"key".to_vec(),
+            value: b"value".to_vec(),
+        };
+        let mut encoded = rec.encode();
+        // Flip a bit in the payload
+        let last = encoded.len() - 1;
+        encoded[last] ^= 0xFF;
+
+        let mut cursor = std::io::Cursor::new(&encoded);
+        let result = DataRecord::decode_from(&mut cursor);
+        assert!(matches!(result, Err(StorageError::ChecksumMismatch { .. })));
+    }
+
+    #[test]
+    fn record_detects_bad_magic() {
+        let rec = DataRecord {
+            timestamp: 42,
+            key: b"key".to_vec(),
+            value: b"value".to_vec(),
+        };
+        let mut encoded = rec.encode();
+        encoded[0] = 0xFF;
+        encoded[1] = 0xFF;
+
+        let mut cursor = std::io::Cursor::new(&encoded);
+        let result = DataRecord::decode_from(&mut cursor);
+        assert!(matches!(result, Err(StorageError::CorruptRecord(_))));
+    }
+
+    #[test]
+    fn eof_returns_none() {
+        let empty: &[u8] = &[];
+        let mut cursor = std::io::Cursor::new(empty);
+        let result = DataRecord::decode_from(&mut cursor).unwrap();
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn scanner_iterates_multiple_records() {
+        let records = vec![
+            DataRecord {
+                timestamp: 1,
+                key: b"a".to_vec(),
+                value: b"v1".to_vec(),
+            },
+            DataRecord {
+                timestamp: 2,
+                key: b"b".to_vec(),
+                value: b"v2".to_vec(),
+            },
+            DataRecord {
+                timestamp: 3,
+                key: b"c".to_vec(),
+                value: vec![],
+            },
+        ];
+
+        let mut buf = Vec::new();
+        for r in &records {
+            buf.extend_from_slice(&r.encode());
+        }
+
+        let scanner = RecordScanner::new(std::io::Cursor::new(&buf), 0);
+        let results: Vec<_> = scanner.collect::<Result<Vec<_>, _>>().unwrap();
+        assert_eq!(results.len(), 3);
+        assert_eq!(results[0].1.key, b"a");
+        assert_eq!(results[1].1.key, b"b");
+        assert!(results[2].1.is_tombstone());
+
+        // Verify offsets are correct
+        assert_eq!(results[0].0, 0);
+        assert_eq!(results[1].0, records[0].disk_size() as u64);
+        assert_eq!(
+            results[2].0,
+            (records[0].disk_size() + records[1].disk_size()) as u64
+        );
+    }
+}
@@ -0,0 +1,324 @@
+//! Data integrity validation for RustDb storage directories.
+//!
+//! Provides offline validation of data files without starting the server.
+//! Checks header magic, record CRC32 checksums, duplicate IDs, and
+//! keydir.hint consistency.
+
+use std::collections::HashMap;
+use std::io::{BufReader, Read};
+use std::path::Path;
+
+use crate::error::{StorageError, StorageResult};
+use crate::keydir::KeyDir;
+use crate::record::{FileHeader, FileType, RecordScanner, FILE_HEADER_SIZE};
+
+/// Result of validating an entire data directory.
+pub struct ValidationReport {
+    pub collections: Vec<CollectionReport>,
+}
+
+/// Result of validating a single collection.
+pub struct CollectionReport {
+    pub db: String,
+    pub collection: String,
+    pub header_valid: bool,
+    pub total_records: u64,
+    pub live_documents: u64,
+    pub tombstones: u64,
+    pub duplicate_ids: Vec<String>,
+    pub checksum_errors: u64,
+    pub decode_errors: u64,
+    pub data_file_size: u64,
+    pub hint_file_exists: bool,
+    pub orphaned_hint_entries: u64,
+    pub errors: Vec<String>,
+}
+
+impl ValidationReport {
+    /// Whether any errors were found across all collections.
+    pub fn has_errors(&self) -> bool {
+        self.collections.iter().any(|c| {
+            !c.header_valid
+                || !c.duplicate_ids.is_empty()
+                || c.checksum_errors > 0
+                || c.decode_errors > 0
+                || c.orphaned_hint_entries > 0
+                || !c.errors.is_empty()
+        })
+    }
+
+    /// Print a human-readable summary to stdout.
+    pub fn print_summary(&self) {
+        println!("=== SmartDB Data Integrity Report ===");
+        println!();
+
+        let mut total_errors = 0u64;
+
+        for report in &self.collections {
+            println!("Database: {}", report.db);
+            println!("  Collection: {}", report.collection);
+            println!(
+                "    Header:       {}",
+                if report.header_valid { "OK" } else { "INVALID" }
+            );
+            println!(
+                "    Records:      {} ({} live, {} tombstones)",
+                report.total_records, report.live_documents, report.tombstones
+            );
+            println!("    Data size:    {} bytes", report.data_file_size);
+
+            if report.duplicate_ids.is_empty() {
+                println!("    Duplicates:   0");
+            } else {
+                let ids_preview: Vec<&str> = report.duplicate_ids.iter().take(5).map(|s| s.as_str()).collect();
+                let suffix = if report.duplicate_ids.len() > 5 {
+                    format!(", ... and {} more", report.duplicate_ids.len() - 5)
+                } else {
+                    String::new()
+                };
+                println!(
+                    "    Duplicates:   {} (ids: {}{})",
+                    report.duplicate_ids.len(),
+                    ids_preview.join(", "),
+                    suffix
+                );
+            }
+
+            if report.checksum_errors > 0 {
+                println!("    CRC errors:   {}", report.checksum_errors);
+            } else {
+                println!("    CRC errors:   0");
+            }
+
+            if report.decode_errors > 0 {
+                println!("    Decode errors: {}", report.decode_errors);
+            }
+
+            if report.hint_file_exists {
+                if report.orphaned_hint_entries > 0 {
+                    println!(
+                        "    Hint file:    STALE ({} orphaned entries)",
+                        report.orphaned_hint_entries
+                    );
+                } else {
+                    println!("    Hint file:    OK");
+                }
+            } else {
+                println!("    Hint file:    absent");
+            }
+
+            for err in &report.errors {
+                println!("    ERROR: {}", err);
+            }
+
+            println!();
+
+            if !report.header_valid { total_errors += 1; }
+            total_errors += report.duplicate_ids.len() as u64;
+            total_errors += report.checksum_errors;
+            total_errors += report.decode_errors;
+            total_errors += report.orphaned_hint_entries;
+            total_errors += report.errors.len() as u64;
+        }
+
+        println!(
+            "Summary: {} collection(s) checked, {} error(s) found.",
+            self.collections.len(),
+            total_errors
+        );
+    }
+}
+
+/// Validate all collections in a data directory.
+///
+/// The directory structure is expected to be:
+/// ```text
+/// {base_path}/{db}/{collection}/data.rdb
+/// ```
+pub fn validate_data_directory(base_path: &str) -> StorageResult<ValidationReport> {
+    let base = Path::new(base_path);
+    if !base.exists() {
+        return Err(StorageError::IoError(std::io::Error::new(
+            std::io::ErrorKind::NotFound,
+            format!("data directory not found: {base_path}"),
+        )));
+    }
+
+    let mut collections = Vec::new();
+
+    // Iterate database directories
+    let entries = std::fs::read_dir(base)?;
+    for entry in entries {
+        let entry = entry?;
+        if !entry.file_type()?.is_dir() {
+            continue;
+        }
+        let db_name = match entry.file_name().to_str() {
+            Some(s) => s.to_string(),
+            None => continue,
+        };
+
+        // Iterate collection directories
+        let db_entries = std::fs::read_dir(entry.path())?;
+        for coll_entry in db_entries {
+            let coll_entry = coll_entry?;
+            if !coll_entry.file_type()?.is_dir() {
+                continue;
+            }
+            let coll_name = match coll_entry.file_name().to_str() {
+                Some(s) => s.to_string(),
+                None => continue,
+            };
+
+            let data_path = coll_entry.path().join("data.rdb");
+            if !data_path.exists() {
+                continue;
+            }
+
+            let report = validate_collection(&db_name, &coll_name, &coll_entry.path());
+            collections.push(report);
+        }
+    }
+
+    // Sort for deterministic output
+    collections.sort_by(|a, b| (&a.db, &a.collection).cmp(&(&b.db, &b.collection)));
+
+    Ok(ValidationReport { collections })
+}
+
+/// Validate a single collection directory.
+fn validate_collection(db: &str, coll: &str, coll_dir: &Path) -> CollectionReport {
+    let data_path = coll_dir.join("data.rdb");
+    let hint_path = coll_dir.join("keydir.hint");
+
+    let mut report = CollectionReport {
+        db: db.to_string(),
+        collection: coll.to_string(),
+        header_valid: false,
+        total_records: 0,
+        live_documents: 0,
+        tombstones: 0,
+        duplicate_ids: Vec::new(),
+        checksum_errors: 0,
+        decode_errors: 0,
+        data_file_size: 0,
+        hint_file_exists: hint_path.exists(),
+        orphaned_hint_entries: 0,
+        errors: Vec::new(),
+    };
+
+    // Get file size
+    match std::fs::metadata(&data_path) {
+        Ok(m) => report.data_file_size = m.len(),
+        Err(e) => {
+            report.errors.push(format!("cannot stat data.rdb: {e}"));
+            return report;
+        }
+    }
+
+    // Open and validate header
+    let file = match std::fs::File::open(&data_path) {
+        Ok(f) => f,
+        Err(e) => {
+            report.errors.push(format!("cannot open data.rdb: {e}"));
+            return report;
+        }
+    };
+    let mut reader = BufReader::new(file);
+
+    let mut hdr_buf = [0u8; FILE_HEADER_SIZE];
+    if let Err(e) = reader.read_exact(&mut hdr_buf) {
+        report.errors.push(format!("cannot read header: {e}"));
+        return report;
+    }
+
+    match FileHeader::decode(&hdr_buf) {
+        Ok(hdr) => {
+            if hdr.file_type != FileType::Data {
+                report.errors.push(format!(
+                    "wrong file type: expected Data, got {:?}",
+                    hdr.file_type
+                ));
+            } else {
+                report.header_valid = true;
+            }
+        }
+        Err(e) => {
+            report.errors.push(format!("invalid header: {e}"));
+            return report;
+        }
+    }
+
+    // Scan all records
+    let mut id_counts: HashMap<String, u64> = HashMap::new();
+    let mut live_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
+    let scanner = RecordScanner::new(reader, FILE_HEADER_SIZE as u64);
+
+    for result in scanner {
+        match result {
+            Ok((_offset, record)) => {
+                report.total_records += 1;
+                let key = String::from_utf8_lossy(&record.key).to_string();
+
+                if record.is_tombstone() {
+                    report.tombstones += 1;
+                    live_ids.remove(&key);
+                } else {
+                    *id_counts.entry(key.clone()).or_insert(0) += 1;
+                    live_ids.insert(key);
+                }
+            }
+            Err(e) => {
+                let err_str = e.to_string();
+                if err_str.contains("checksum") || err_str.contains("Checksum") {
+                    report.checksum_errors += 1;
+                } else {
+                    report.decode_errors += 1;
+                }
+                // Cannot continue scanning after a decode error — the stream position is lost
+                report.errors.push(format!("record decode error: {e}"));
+                break;
+            }
+        }
+    }
+
+    report.live_documents = live_ids.len() as u64;
+
+    // Find duplicates (keys that appeared more than once as live inserts)
+    for (id, count) in &id_counts {
+        if *count > 1 {
+            report.duplicate_ids.push(id.clone());
+        }
+    }
+    report.duplicate_ids.sort();
+
+    // Validate hint file if present
+    if hint_path.exists() {
+        match KeyDir::load_from_hint_file(&hint_path) {
+            Ok(Some(hint_kd)) => {
+                // Check for orphaned entries: keys in hint but not live in data
+                hint_kd.for_each(|key, _entry| {
+                    if !live_ids.contains(key) {
+                        report.orphaned_hint_entries += 1;
+                    }
+                });
+
+                // Also check if hint references offsets beyond file size
+                hint_kd.for_each(|_key, entry| {
+                    if entry.offset + entry.record_len as u64 > report.data_file_size {
+                        report.orphaned_hint_entries += 1;
+                    }
+                });
+            }
+            Ok(None) => {
+                // File existed but was empty or unreadable
+                report.errors.push("hint file exists but is empty".into());
+            }
+            Err(e) => {
+                report.errors.push(format!("hint file decode error: {e}"));
+            }
+        }
+    }
+
+    report
+}
@@ -1,186 +0,0 @@
-//! Write-Ahead Log (WAL) for crash recovery.
-//!
-//! Before any mutation is applied to storage, it is first written to the WAL.
-//! On recovery, uncommitted WAL entries can be replayed or discarded.
-
-use std::path::PathBuf;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use bson::Document;
-use serde::{Deserialize, Serialize};
-use tokio::io::AsyncWriteExt;
-use tracing::{debug, warn};
-
-use crate::error::StorageResult;
-
-/// WAL operation kind.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub enum WalOp {
-    Insert,
-    Update,
-    Delete,
-}
-
-/// A single WAL record.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct WalRecord {
-    /// Sequence number.
-    pub seq: u64,
-    /// Operation kind.
-    pub op: WalOp,
-    /// Database name.
-    pub db: String,
-    /// Collection name.
-    pub collection: String,
-    /// Document id (hex string).
-    pub document_id: String,
-    /// Document data (for insert/update).
-    pub document: Option<Document>,
-    /// Whether this record has been committed (applied to storage).
-    pub committed: bool,
-    /// CRC32 checksum of the serialized payload for integrity verification.
-    pub checksum: u32,
-}
-
-/// Write-ahead log that persists records to a file.
-pub struct WriteAheadLog {
-    path: PathBuf,
-    next_seq: AtomicU64,
-}
-
-impl WriteAheadLog {
-    /// Create a new WAL at the given file path.
-    pub fn new(path: PathBuf) -> Self {
-        Self {
-            path,
-            next_seq: AtomicU64::new(1),
-        }
-    }
-
-    /// Initialize the WAL (create file if needed, load sequence counter).
-    pub async fn initialize(&self) -> StorageResult<()> {
-        if let Some(parent) = self.path.parent() {
-            tokio::fs::create_dir_all(parent).await?;
-        }
-        if self.path.exists() {
-            // Load existing records to find the max sequence number.
-            let records = self.read_all().await?;
-            if let Some(max_seq) = records.iter().map(|r| r.seq).max() {
-                self.next_seq.store(max_seq + 1, Ordering::SeqCst);
-            }
-        }
-        debug!("WAL initialized at {:?}", self.path);
-        Ok(())
-    }
-
-    /// Append a record to the WAL. Returns the sequence number.
-    pub async fn append(
-        &self,
-        op: WalOp,
-        db: &str,
-        collection: &str,
-        document_id: &str,
-        document: Option<Document>,
-    ) -> StorageResult<u64> {
-        let seq = self.next_seq.fetch_add(1, Ordering::SeqCst);
-
-        // Compute checksum over the payload.
-        let payload = serde_json::json!({
-            "op": op,
-            "db": db,
-            "collection": collection,
-            "document_id": document_id,
-        });
-        let payload_bytes = serde_json::to_vec(&payload)?;
-        let checksum = crc32fast::hash(&payload_bytes);
-
-        let record = WalRecord {
-            seq,
-            op,
-            db: db.to_string(),
-            collection: collection.to_string(),
-            document_id: document_id.to_string(),
-            document,
-            committed: false,
-            checksum,
-        };
-
-        let line = serde_json::to_string(&record)?;
-        let mut file = tokio::fs::OpenOptions::new()
-            .create(true)
-            .append(true)
-            .open(&self.path)
-            .await?;
-        file.write_all(line.as_bytes()).await?;
-        file.write_all(b"\n").await?;
-        file.flush().await?;
-
-        Ok(seq)
-    }
-
-    /// Mark a WAL record as committed by rewriting the file.
-    pub async fn mark_committed(&self, seq: u64) -> StorageResult<()> {
-        let mut records = self.read_all().await?;
-        for record in &mut records {
-            if record.seq == seq {
-                record.committed = true;
-            }
-        }
-        self.write_all(&records).await
-    }
-
-    /// Read all WAL records.
-    pub async fn read_all(&self) -> StorageResult<Vec<WalRecord>> {
-        if !self.path.exists() {
-            return Ok(vec![]);
-        }
-        let data = tokio::fs::read_to_string(&self.path).await?;
-        let mut records = Vec::new();
-        for line in data.lines() {
-            if line.trim().is_empty() {
-                continue;
-            }
-            match serde_json::from_str::<WalRecord>(line) {
-                Ok(record) => records.push(record),
-                Err(e) => {
-                    warn!("skipping corrupt WAL record: {e}");
-                }
-            }
-        }
-        Ok(records)
-    }
-
-    /// Get all uncommitted records (for replay during recovery).
-    pub async fn uncommitted(&self) -> StorageResult<Vec<WalRecord>> {
-        let records = self.read_all().await?;
-        Ok(records.into_iter().filter(|r| !r.committed).collect())
-    }
-
-    /// Truncate the WAL, removing all committed records.
-    pub async fn truncate_committed(&self) -> StorageResult<()> {
-        let records = self.read_all().await?;
-        let uncommitted: Vec<_> = records.into_iter().filter(|r| !r.committed).collect();
-        self.write_all(&uncommitted).await
-    }
-
-    /// Clear the entire WAL.
-    pub async fn clear(&self) -> StorageResult<()> {
-        if self.path.exists() {
-            tokio::fs::write(&self.path, "").await?;
-        }
-        self.next_seq.store(1, Ordering::SeqCst);
-        Ok(())
-    }
-
-    /// Write all records to the WAL file (overwrites).
-    async fn write_all(&self, records: &[WalRecord]) -> StorageResult<()> {
-        let mut content = String::new();
-        for record in records {
-            let line = serde_json::to_string(record)?;
-            content.push_str(&line);
-            content.push('\n');
-        }
-        tokio::fs::write(&self.path, content).await?;
-        Ok(())
-    }
-}
@@ -1,6 +1,8 @@
 pub mod management;

+use std::path::PathBuf;
 use std::sync::Arc;
+use std::time::Duration;

 use anyhow::Result;
 use dashmap::DashMap;
@@ -14,7 +16,7 @@ use rustdb_config::{RustDbOptions, StorageType};
 use rustdb_wire::{WireCodec, OP_QUERY};
 use rustdb_wire::{encode_op_msg_response, encode_op_reply_response};
 use rustdb_storage::{StorageAdapter, MemoryStorageAdapter, FileStorageAdapter, OpLog};
-// IndexEngine is used indirectly via CommandContext
+use rustdb_index::{IndexEngine, IndexOptions};
 use rustdb_txn::{TransactionEngine, SessionEngine};
 use rustdb_commands::{CommandRouter, CommandContext};

@@ -33,7 +35,16 @@ impl RustDb {
        // Create storage adapter
        let storage: Arc<dyn StorageAdapter> = match options.storage {
            StorageType::Memory => {
-                let adapter = MemoryStorageAdapter::new();
+                let adapter = if let Some(ref pp) = options.persist_path {
+                    tracing::info!("MemoryStorageAdapter with periodic persistence to {}", pp);
+                    MemoryStorageAdapter::with_persist_path(PathBuf::from(pp))
+                } else {
+                    tracing::warn!(
+                        "SmartDB is using in-memory storage — data will NOT survive a restart. \
+                         Set storage to 'file' for durable persistence."
+                    );
+                    MemoryStorageAdapter::new()
+                };
                Arc::new(adapter)
            }
            StorageType::File => {
@@ -49,9 +60,99 @@ impl RustDb {
        // Initialize storage
        storage.initialize().await?;

+        // Restore any previously persisted state (no-op for file storage and
+        // memory storage without a persist_path).
+        storage.restore().await?;
+
+        // Spawn periodic persistence task for memory storage with persist_path.
+        if options.storage == StorageType::Memory && options.persist_path.is_some() {
+            let persist_storage = storage.clone();
+            let interval_ms = options.persist_interval_ms;
+            tokio::spawn(async move {
+                let mut interval = tokio::time::interval(Duration::from_millis(interval_ms));
+                interval.tick().await; // skip the immediate first tick
+                loop {
+                    interval.tick().await;
+                    if let Err(e) = persist_storage.persist().await {
+                        tracing::error!("Periodic persist failed: {}", e);
+                    }
+                }
+            });
+        }
+
+        let indexes: Arc<DashMap<String, IndexEngine>> = Arc::new(DashMap::new());
+
+        // Restore persisted indexes from storage.
+        if let Ok(databases) = storage.list_databases().await {
+            for db_name in &databases {
+                if let Ok(collections) = storage.list_collections(db_name).await {
+                    for coll_name in &collections {
+                        if let Ok(specs) = storage.get_indexes(db_name, coll_name).await {
+                            let has_custom = specs.iter().any(|s| {
+                                s.get_str("name").unwrap_or("_id_") != "_id_"
+                            });
+                            if !has_custom {
+                                continue;
+                            }
+
+                            let ns_key = format!("{}.{}", db_name, coll_name);
+                            let mut engine = IndexEngine::new();
+
+                            for spec in &specs {
+                                let name = spec.get_str("name").unwrap_or("").to_string();
+                                if name == "_id_" {
+                                    continue; // already created by IndexEngine::new()
+                                }
+                                let key = match spec.get("key") {
+                                    Some(bson::Bson::Document(k)) => k.clone(),
+                                    _ => continue,
+                                };
+                                let unique = matches!(spec.get("unique"), Some(bson::Bson::Boolean(true)));
+                                let sparse = matches!(spec.get("sparse"), Some(bson::Bson::Boolean(true)));
+                                let expire_after_seconds = match spec.get("expireAfterSeconds") {
+                                    Some(bson::Bson::Int32(n)) => Some(*n as u64),
+                                    Some(bson::Bson::Int64(n)) => Some(*n as u64),
+                                    _ => None,
+                                };
+
+                                let options = IndexOptions {
+                                    name: Some(name.clone()),
+                                    unique,
+                                    sparse,
+                                    expire_after_seconds,
+                                };
+                                if let Err(e) = engine.create_index(key, options) {
+                                    tracing::warn!(
+                                        namespace = %ns_key,
+                                        index = %name,
+                                        error = %e,
+                                        "failed to restore index"
+                                    );
+                                }
+                            }
+
+                            // Rebuild index data from existing documents.
+                            if let Ok(docs) = storage.find_all(db_name, coll_name).await {
+                                if !docs.is_empty() {
+                                    engine.rebuild_from_documents(&docs);
+                                }
+                            }
+
+                            tracing::info!(
+                                namespace = %ns_key,
+                                indexes = engine.list_indexes().len(),
+                                "restored indexes"
+                            );
+                            indexes.insert(ns_key, engine);
+                        }
+                    }
+                }
+            }
+        }
+
        let ctx = Arc::new(CommandContext {
            storage,
-            indexes: Arc::new(DashMap::new()),
+            indexes,
            transactions: Arc::new(TransactionEngine::new()),
            sessions: Arc::new(SessionEngine::new(30 * 60 * 1000, 60 * 1000)),
            cursors: Arc::new(DashMap::new()),
@@ -25,6 +25,10 @@ struct Cli {
    #[arg(long)]
    validate: bool,

+    /// Validate data integrity of a storage directory (offline check)
+    #[arg(long, value_name = "PATH")]
+    validate_data: Option<String>,
+
    /// Run in management mode (JSON-over-stdin IPC for TypeScript wrapper)
    #[arg(long)]
    management: bool,
@@ -55,7 +59,7 @@ async fn main() -> Result<()> {
    let options = RustDbOptions::from_file(&cli.config)
        .map_err(|e| anyhow::anyhow!("Failed to load config '{}': {}", cli.config, e))?;

-    // Validate-only mode
+    // Validate-only mode (config)
    if cli.validate {
        match options.validate() {
            Ok(()) => {
@@ -69,6 +73,18 @@ async fn main() -> Result<()> {
        }
    }

+    // Validate data integrity mode
+    if let Some(ref data_path) = cli.validate_data {
+        tracing::info!("Validating data integrity at {}", data_path);
+        let report = rustdb_storage::validate::validate_data_directory(data_path)
+            .map_err(|e| anyhow::anyhow!("Validation failed: {}", e))?;
+        report.print_summary();
+        if report.has_errors() {
+            std::process::exit(1);
+        }
+        return Ok(());
+    }
+
    // Create and start server
    let mut db = RustDb::new(options).await?;
    db.start().await?;
@@ -0,0 +1,256 @@
+import { expect, tap } from '@git.zone/tstest/tapbundle';
+import * as smartdb from '../ts/index.js';
+import { MongoClient, Db } from 'mongodb';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+let tmpDir: string;
+let server: smartdb.SmartdbServer;
+let client: MongoClient;
+let db: Db;
+
+function makeTmpDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-compact-test-'));
+}
+
+function cleanTmpDir(dir: string): void {
+  if (fs.existsSync(dir)) {
+    fs.rmSync(dir, { recursive: true, force: true });
+  }
+}
+
+function getDataFileSize(storagePath: string, dbName: string, collName: string): number {
+  const dataPath = path.join(storagePath, dbName, collName, 'data.rdb');
+  if (!fs.existsSync(dataPath)) return 0;
+  return fs.statSync(dataPath).size;
+}
+
+// ============================================================================
+// Compaction: Setup
+// ============================================================================
+
+tap.test('compaction: start server with file storage', async () => {
+  tmpDir = makeTmpDir();
+  server = new smartdb.SmartdbServer({
+    socketPath: path.join(os.tmpdir(), `smartdb-compact-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+
+  client = new MongoClient(server.getConnectionUri(), {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  db = client.db('compactdb');
+});
+
+// ============================================================================
+// Compaction: Updates grow the data file
+// ============================================================================
+
+tap.test('compaction: repeated updates grow the data file', async () => {
+  const coll = db.collection('growing');
+
+  // Insert a document
+  await coll.insertOne({ key: 'target', counter: 0, payload: 'x'.repeat(200) });
+
+  const sizeAfterInsert = getDataFileSize(tmpDir, 'compactdb', 'growing');
+  expect(sizeAfterInsert).toBeGreaterThan(0);
+
+  // Update the same document 50 times — each update appends a new record
+  for (let i = 1; i <= 50; i++) {
+    await coll.updateOne(
+      { key: 'target' },
+      { $set: { counter: i, payload: 'y'.repeat(200) } }
+    );
+  }
+
+  const sizeAfterUpdates = getDataFileSize(tmpDir, 'compactdb', 'growing');
+  // Compaction may have run during updates, so we can't assert the file is
+  // much larger. What matters is the data is correct.
+
+  // The collection still has just 1 document
+  const count = await coll.countDocuments();
+  expect(count).toEqual(1);
+
+  const doc = await coll.findOne({ key: 'target' });
+  expect(doc!.counter).toEqual(50);
+});
+
+// ============================================================================
+// Compaction: Deletes create tombstones
+// ============================================================================
+
+tap.test('compaction: insert-then-delete creates dead space', async () => {
+  const coll = db.collection('tombstones');
+
+  // Insert 100 documents
+  const docs = [];
+  for (let i = 0; i < 100; i++) {
+    docs.push({ idx: i, data: 'delete-me-' + 'z'.repeat(100) });
+  }
+  await coll.insertMany(docs);
+
+  const sizeAfterInsert = getDataFileSize(tmpDir, 'compactdb', 'tombstones');
+
+  // Delete all 100
+  await coll.deleteMany({});
+
+  const sizeAfterDelete = getDataFileSize(tmpDir, 'compactdb', 'tombstones');
+  // File may have been compacted during deletes (dead > 50% threshold),
+  // but the operation itself should succeed regardless of file size.
+  // After deleting all docs, the file might be very small (just header + compacted).
+
+  // But count is 0
+  const count = await coll.countDocuments();
+  expect(count).toEqual(0);
+});
+
+// ============================================================================
+// Compaction: Data integrity after compaction trigger
+// ============================================================================
+
+tap.test('compaction: data file shrinks after heavy updates trigger compaction', async () => {
+  const coll = db.collection('shrinktest');
+
+  // Insert 10 documents with large payloads
+  const docs = [];
+  for (let i = 0; i < 10; i++) {
+    docs.push({ idx: i, data: 'a'.repeat(500) });
+  }
+  await coll.insertMany(docs);
+
+  const sizeAfterInsert = getDataFileSize(tmpDir, 'compactdb', 'shrinktest');
+
+  // Update each document 20 times (creates 200 dead records vs 10 live)
+  // This should trigger compaction (dead > 50% threshold)
+  for (let round = 0; round < 20; round++) {
+    for (let i = 0; i < 10; i++) {
+      await coll.updateOne(
+        { idx: i },
+        { $set: { data: `round-${round}-` + 'b'.repeat(500) } }
+      );
+    }
+  }
+
+  // After compaction, file should be smaller than the pre-compaction peak
+  // (We can't measure the peak exactly, but the final size should be reasonable)
+  const sizeAfterCompaction = getDataFileSize(tmpDir, 'compactdb', 'shrinktest');
+
+  // The file should not be 20x the insert size since compaction should have run
+  // With 10 live records of ~530 bytes each, the file should be roughly that
+  // plus header overhead. Without compaction it would be 210 * ~530 bytes.
+  const maxExpectedSize = sizeAfterInsert * 5; // generous upper bound
+  expect(sizeAfterCompaction).toBeLessThanOrEqual(maxExpectedSize);
+
+  // All documents should still be readable and correct
+  const count = await coll.countDocuments();
+  expect(count).toEqual(10);
+
+  for (let i = 0; i < 10; i++) {
+    const doc = await coll.findOne({ idx: i });
+    expect(doc).toBeTruthy();
+    expect(doc!.data.startsWith('round-19-')).toBeTrue();
+  }
+});
+
+// ============================================================================
+// Compaction: Persistence after compaction + restart
+// ============================================================================
+
+tap.test('compaction: data survives compaction + restart', async () => {
+  await client.close();
+  await server.stop();
+
+  server = new smartdb.SmartdbServer({
+    socketPath: path.join(os.tmpdir(), `smartdb-compact-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+
+  client = new MongoClient(server.getConnectionUri(), {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  db = client.db('compactdb');
+
+  // Verify shrinktest data
+  const coll = db.collection('shrinktest');
+  const count = await coll.countDocuments();
+  expect(count).toEqual(10);
+
+  for (let i = 0; i < 10; i++) {
+    const doc = await coll.findOne({ idx: i });
+    expect(doc).toBeTruthy();
+    expect(doc!.data.startsWith('round-19-')).toBeTrue();
+  }
+
+  // Verify growing collection
+  const growing = db.collection('growing');
+  const growDoc = await growing.findOne({ key: 'target' });
+  expect(growDoc).toBeTruthy();
+  expect(growDoc!.counter).toEqual(50);
+
+  // Verify tombstones collection is empty
+  const tombCount = await db.collection('tombstones').countDocuments();
+  expect(tombCount).toEqual(0);
+});
+
+// ============================================================================
+// Compaction: Mixed operations stress test
+// ============================================================================
+
+tap.test('compaction: mixed insert-update-delete stress test', async () => {
+  const coll = db.collection('stress');
+
+  // Phase 1: Insert 200 documents
+  const batch = [];
+  for (let i = 0; i < 200; i++) {
+    batch.push({ idx: i, value: `initial-${i}`, alive: true });
+  }
+  await coll.insertMany(batch);
+
+  // Phase 2: Update every even-indexed document
+  for (let i = 0; i < 200; i += 2) {
+    await coll.updateOne({ idx: i }, { $set: { value: `updated-${i}` } });
+  }
+
+  // Phase 3: Delete every document where idx % 3 === 0
+  await coll.deleteMany({ idx: { $in: Array.from({ length: 67 }, (_, k) => k * 3) } });
+
+  // Verify: documents where idx % 3 !== 0 should remain
+  const remaining = await coll.find({}).toArray();
+  for (const doc of remaining) {
+    expect(doc.idx % 3).not.toEqual(0);
+    if (doc.idx % 2 === 0) {
+      expect(doc.value).toEqual(`updated-${doc.idx}`);
+    } else {
+      expect(doc.value).toEqual(`initial-${doc.idx}`);
+    }
+  }
+
+  // Count should be 200 - 67 = 133
+  const count = await coll.countDocuments();
+  expect(count).toEqual(133);
+});
+
+// ============================================================================
+// Cleanup
+// ============================================================================
+
+tap.test('compaction: cleanup', async () => {
+  await client.close();
+  await server.stop();
+  cleanTmpDir(tmpDir);
+});
+
+export default tap.start();
@@ -0,0 +1,394 @@
+import { expect, tap } from '@git.zone/tstest/tapbundle';
+import * as smartdb from '../ts/index.js';
+import { MongoClient, Db } from 'mongodb';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+let tmpDir: string;
+let server: smartdb.SmartdbServer;
+let client: MongoClient;
+let db: Db;
+
+function makeTmpDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-test-'));
+}
+
+function cleanTmpDir(dir: string): void {
+  if (fs.existsSync(dir)) {
+    fs.rmSync(dir, { recursive: true, force: true });
+  }
+}
+
+// ============================================================================
+// File Storage: Startup
+// ============================================================================
+
+tap.test('file-storage: should start server with file storage', async () => {
+  tmpDir = makeTmpDir();
+  server = new smartdb.SmartdbServer({
+    port: 27118,
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+  expect(server.running).toBeTrue();
+});
+
+tap.test('file-storage: should connect MongoClient', async () => {
+  client = new MongoClient('mongodb://127.0.0.1:27118', {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  db = client.db('filetest');
+  expect(db).toBeTruthy();
+});
+
+// ============================================================================
+// File Storage: Data files are created on disk
+// ============================================================================
+
+tap.test('file-storage: inserting creates data files on disk', async () => {
+  const coll = db.collection('diskcheck');
+  await coll.insertOne({ name: 'disk-test', value: 42 });
+
+  // The storage directory should now contain a database directory
+  const dbDir = path.join(tmpDir, 'filetest');
+  expect(fs.existsSync(dbDir)).toBeTrue();
+
+  // Collection directory with data.rdb should exist
+  const collDir = path.join(dbDir, 'diskcheck');
+  expect(fs.existsSync(collDir)).toBeTrue();
+
+  const dataFile = path.join(collDir, 'data.rdb');
+  expect(fs.existsSync(dataFile)).toBeTrue();
+
+  // data.rdb should have the SMARTDB magic header
+  const header = Buffer.alloc(8);
+  const fd = fs.openSync(dataFile, 'r');
+  fs.readSync(fd, header, 0, 8, 0);
+  fs.closeSync(fd);
+  expect(header.toString('ascii')).toEqual('SMARTDB\0');
+});
+
+// ============================================================================
+// File Storage: Full CRUD cycle
+// ============================================================================
+
+tap.test('file-storage: insertOne returns valid id', async () => {
+  const coll = db.collection('crud');
+  const result = await coll.insertOne({ name: 'Alice', age: 30 });
+  expect(result.acknowledged).toBeTrue();
+  expect(result.insertedId).toBeTruthy();
+});
+
+tap.test('file-storage: insertMany returns all ids', async () => {
+  const coll = db.collection('crud');
+  const result = await coll.insertMany([
+    { name: 'Bob', age: 25 },
+    { name: 'Charlie', age: 35 },
+    { name: 'Diana', age: 28 },
+    { name: 'Eve', age: 32 },
+  ]);
+  expect(result.insertedCount).toEqual(4);
+});
+
+tap.test('file-storage: findOne retrieves correct document', async () => {
+  const coll = db.collection('crud');
+  const doc = await coll.findOne({ name: 'Alice' });
+  expect(doc).toBeTruthy();
+  expect(doc!.name).toEqual('Alice');
+  expect(doc!.age).toEqual(30);
+});
+
+tap.test('file-storage: find with filter returns correct subset', async () => {
+  const coll = db.collection('crud');
+  const docs = await coll.find({ age: { $gte: 30 } }).toArray();
+  expect(docs.length).toEqual(3); // Alice(30), Charlie(35), Eve(32)
+  expect(docs.every(d => d.age >= 30)).toBeTrue();
+});
+
+tap.test('file-storage: updateOne modifies document', async () => {
+  const coll = db.collection('crud');
+  const result = await coll.updateOne(
+    { name: 'Alice' },
+    { $set: { age: 31, updated: true } }
+  );
+  expect(result.modifiedCount).toEqual(1);
+
+  const doc = await coll.findOne({ name: 'Alice' });
+  expect(doc!.age).toEqual(31);
+  expect(doc!.updated).toBeTrue();
+});
+
+tap.test('file-storage: deleteOne removes document', async () => {
+  const coll = db.collection('crud');
+  const result = await coll.deleteOne({ name: 'Eve' });
+  expect(result.deletedCount).toEqual(1);
+
+  const doc = await coll.findOne({ name: 'Eve' });
+  expect(doc).toBeNull();
+});
+
+tap.test('file-storage: count reflects current state', async () => {
+  const coll = db.collection('crud');
+  const count = await coll.countDocuments();
+  expect(count).toEqual(4); // 5 inserted - 1 deleted = 4
+});
+
+// ============================================================================
+// File Storage: Persistence across server restart
+// ============================================================================
+
+tap.test('file-storage: stop server for restart test', async () => {
+  await client.close();
+  await server.stop();
+  expect(server.running).toBeFalse();
+});
+
+tap.test('file-storage: restart server with same data path', async () => {
+  server = new smartdb.SmartdbServer({
+    port: 27118,
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+  expect(server.running).toBeTrue();
+
+  client = new MongoClient('mongodb://127.0.0.1:27118', {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  db = client.db('filetest');
+});
+
+tap.test('file-storage: data persists after restart', async () => {
+  const coll = db.collection('crud');
+
+  // Alice should still be there with updated age
+  const alice = await coll.findOne({ name: 'Alice' });
+  expect(alice).toBeTruthy();
+  expect(alice!.age).toEqual(31);
+  expect(alice!.updated).toBeTrue();
+
+  // Bob, Charlie, Diana should be there
+  const bob = await coll.findOne({ name: 'Bob' });
+  expect(bob).toBeTruthy();
+  expect(bob!.age).toEqual(25);
+
+  const charlie = await coll.findOne({ name: 'Charlie' });
+  expect(charlie).toBeTruthy();
+
+  const diana = await coll.findOne({ name: 'Diana' });
+  expect(diana).toBeTruthy();
+
+  // Eve should still be deleted
+  const eve = await coll.findOne({ name: 'Eve' });
+  expect(eve).toBeNull();
+});
+
+tap.test('file-storage: count is correct after restart', async () => {
+  const coll = db.collection('crud');
+  const count = await coll.countDocuments();
+  expect(count).toEqual(4);
+});
+
+tap.test('file-storage: can write new data after restart', async () => {
+  const coll = db.collection('crud');
+  const result = await coll.insertOne({ name: 'Frank', age: 45 });
+  expect(result.acknowledged).toBeTrue();
+
+  const doc = await coll.findOne({ name: 'Frank' });
+  expect(doc).toBeTruthy();
+  expect(doc!.age).toEqual(45);
+
+  const count = await coll.countDocuments();
+  expect(count).toEqual(5);
+});
+
+// ============================================================================
+// File Storage: Multiple collections in same database
+// ============================================================================
+
+tap.test('file-storage: multiple collections are independent', async () => {
+  const products = db.collection('products');
+  const orders = db.collection('orders');
+
+  await products.insertMany([
+    { sku: 'A001', name: 'Widget', price: 9.99 },
+    { sku: 'A002', name: 'Gadget', price: 19.99 },
+  ]);
+
+  await orders.insertMany([
+    { orderId: 1, sku: 'A001', qty: 3 },
+    { orderId: 2, sku: 'A002', qty: 1 },
+    { orderId: 3, sku: 'A001', qty: 2 },
+  ]);
+
+  const productCount = await products.countDocuments();
+  const orderCount = await orders.countDocuments();
+  expect(productCount).toEqual(2);
+  expect(orderCount).toEqual(3);
+
+  // Deleting from one collection doesn't affect the other
+  await products.deleteOne({ sku: 'A001' });
+  expect(await products.countDocuments()).toEqual(1);
+  expect(await orders.countDocuments()).toEqual(3);
+});
+
+// ============================================================================
+// File Storage: Multiple databases
+// ============================================================================
+
+tap.test('file-storage: multiple databases are independent', async () => {
+  const db2 = client.db('filetest2');
+  const coll2 = db2.collection('items');
+
+  await coll2.insertOne({ name: 'cross-db-test', source: 'db2' });
+
+  // db2 has 1 doc
+  const count2 = await coll2.countDocuments();
+  expect(count2).toEqual(1);
+
+  // original db is unaffected
+  const crudCount = await db.collection('crud').countDocuments();
+  expect(crudCount).toEqual(5);
+
+  await db2.dropDatabase();
+});
+
+// ============================================================================
+// File Storage: Large batch insert and retrieval
+// ============================================================================
+
+tap.test('file-storage: bulk insert 1000 documents', async () => {
+  const coll = db.collection('bulk');
+  const docs = [];
+  for (let i = 0; i < 1000; i++) {
+    docs.push({ index: i, data: `value-${i}`, timestamp: Date.now() });
+  }
+  const result = await coll.insertMany(docs);
+  expect(result.insertedCount).toEqual(1000);
+});
+
+tap.test('file-storage: find all 1000 documents', async () => {
+  const coll = db.collection('bulk');
+  const docs = await coll.find({}).toArray();
+  expect(docs.length).toEqual(1000);
+});
+
+tap.test('file-storage: range query on 1000 documents', async () => {
+  const coll = db.collection('bulk');
+  const docs = await coll.find({ index: { $gte: 500, $lt: 600 } }).toArray();
+  expect(docs.length).toEqual(100);
+  expect(docs.every(d => d.index >= 500 && d.index < 600)).toBeTrue();
+});
+
+tap.test('file-storage: sorted retrieval with limit', async () => {
+  const coll = db.collection('bulk');
+  const docs = await coll.find({}).sort({ index: -1 }).limit(10).toArray();
+  expect(docs.length).toEqual(10);
+  expect(docs[0].index).toEqual(999);
+  expect(docs[9].index).toEqual(990);
+});
+
+// ============================================================================
+// File Storage: Update many and verify persistence
+// ============================================================================
+
+tap.test('file-storage: updateMany on bulk collection', async () => {
+  const coll = db.collection('bulk');
+  const result = await coll.updateMany(
+    { index: { $lt: 100 } },
+    { $set: { batch: 'first-hundred' } }
+  );
+  expect(result.modifiedCount).toEqual(100);
+
+  const updated = await coll.find({ batch: 'first-hundred' }).toArray();
+  expect(updated.length).toEqual(100);
+});
+
+// ============================================================================
+// File Storage: Delete many and verify
+// ============================================================================
+
+tap.test('file-storage: deleteMany removes correct documents', async () => {
+  const coll = db.collection('bulk');
+  const result = await coll.deleteMany({ index: { $gte: 900 } });
+  expect(result.deletedCount).toEqual(100);
+
+  const remaining = await coll.countDocuments();
+  expect(remaining).toEqual(900);
+});
+
+// ============================================================================
+// File Storage: Persistence of bulk data across restart
+// ============================================================================
+
+tap.test('file-storage: stop server for bulk restart test', async () => {
+  await client.close();
+  await server.stop();
+  expect(server.running).toBeFalse();
+});
+
+tap.test('file-storage: restart and verify bulk data', async () => {
+  server = new smartdb.SmartdbServer({
+    port: 27118,
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+
+  client = new MongoClient('mongodb://127.0.0.1:27118', {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  db = client.db('filetest');
+
+  const coll = db.collection('bulk');
+  const count = await coll.countDocuments();
+  expect(count).toEqual(900);
+
+  // Verify the updateMany persisted
+  const firstHundred = await coll.find({ batch: 'first-hundred' }).toArray();
+  expect(firstHundred.length).toEqual(100);
+
+  // Verify deleted docs are gone
+  const over900 = await coll.find({ index: { $gte: 900 } }).toArray();
+  expect(over900.length).toEqual(0);
+});
+
+// ============================================================================
+// File Storage: Index persistence
+// ============================================================================
+
+tap.test('file-storage: default indexes.json exists on disk', async () => {
+  // The indexes.json is created when the collection is first created,
+  // containing the default _id_ index spec.
+  const indexFile = path.join(tmpDir, 'filetest', 'crud', 'indexes.json');
+  expect(fs.existsSync(indexFile)).toBeTrue();
+
+  const indexData = JSON.parse(fs.readFileSync(indexFile, 'utf-8'));
+  const names = indexData.map((i: any) => i.name);
+  expect(names).toContain('_id_');
+});
+
+// ============================================================================
+// Cleanup
+// ============================================================================
+
+tap.test('file-storage: cleanup', async () => {
+  await client.close();
+  await server.stop();
+  expect(server.running).toBeFalse();
+  cleanTmpDir(tmpDir);
+});
+
+export default tap.start();
@@ -0,0 +1,235 @@
+import { expect, tap } from '@git.zone/tstest/tapbundle';
+import * as smartdb from '../ts/index.js';
+import { MongoClient, Db } from 'mongodb';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+let tmpDir: string;
+let localDb: smartdb.LocalSmartDb;
+let client: MongoClient;
+let db: Db;
+
+function makeTmpDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-local-test-'));
+}
+
+function cleanTmpDir(dir: string): void {
+  if (fs.existsSync(dir)) {
+    fs.rmSync(dir, { recursive: true, force: true });
+  }
+}
+
+// ============================================================================
+// LocalSmartDb: Lifecycle
+// ============================================================================
+
+tap.test('localsmartdb: should start with just a folder path', async () => {
+  tmpDir = makeTmpDir();
+  localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
+  const info = await localDb.start();
+
+  expect(localDb.running).toBeTrue();
+  expect(info.socketPath).toBeTruthy();
+  expect(info.connectionUri).toBeTruthy();
+  expect(info.connectionUri.startsWith('mongodb://')).toBeTrue();
+});
+
+tap.test('localsmartdb: should connect via returned connectionUri', async () => {
+  const info = localDb.getConnectionInfo();
+  client = new MongoClient(info.connectionUri, {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  db = client.db('localtest');
+  expect(db).toBeTruthy();
+});
+
+tap.test('localsmartdb: should reject double start', async () => {
+  let threw = false;
+  try {
+    await localDb.start();
+  } catch {
+    threw = true;
+  }
+  expect(threw).toBeTrue();
+});
+
+// ============================================================================
+// LocalSmartDb: CRUD via Unix socket
+// ============================================================================
+
+tap.test('localsmartdb: insert and find documents', async () => {
+  const coll = db.collection('notes');
+  await coll.insertMany([
+    { title: 'Note 1', body: 'First note', priority: 1 },
+    { title: 'Note 2', body: 'Second note', priority: 2 },
+    { title: 'Note 3', body: 'Third note', priority: 3 },
+  ]);
+
+  const all = await coll.find({}).toArray();
+  expect(all.length).toEqual(3);
+
+  const high = await coll.findOne({ priority: 3 });
+  expect(high).toBeTruthy();
+  expect(high!.title).toEqual('Note 3');
+});
+
+tap.test('localsmartdb: update and verify', async () => {
+  const coll = db.collection('notes');
+  await coll.updateOne(
+    { title: 'Note 2' },
+    { $set: { body: 'Updated second note', edited: true } }
+  );
+
+  const doc = await coll.findOne({ title: 'Note 2' });
+  expect(doc!.body).toEqual('Updated second note');
+  expect(doc!.edited).toBeTrue();
+});
+
+tap.test('localsmartdb: delete and verify', async () => {
+  const coll = db.collection('notes');
+  await coll.deleteOne({ title: 'Note 1' });
+
+  const count = await coll.countDocuments();
+  expect(count).toEqual(2);
+
+  const deleted = await coll.findOne({ title: 'Note 1' });
+  expect(deleted).toBeNull();
+});
+
+// ============================================================================
+// LocalSmartDb: Persistence across restart
+// ============================================================================
+
+tap.test('localsmartdb: stop for restart', async () => {
+  await client.close();
+  await localDb.stop();
+  expect(localDb.running).toBeFalse();
+});
+
+tap.test('localsmartdb: restart with same folder', async () => {
+  localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
+  const info = await localDb.start();
+  expect(localDb.running).toBeTrue();
+
+  client = new MongoClient(info.connectionUri, {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  db = client.db('localtest');
+});
+
+tap.test('localsmartdb: data persists after restart', async () => {
+  const coll = db.collection('notes');
+
+  const count = await coll.countDocuments();
+  expect(count).toEqual(2); // 3 inserted - 1 deleted
+
+  const note2 = await coll.findOne({ title: 'Note 2' });
+  expect(note2!.body).toEqual('Updated second note');
+  expect(note2!.edited).toBeTrue();
+
+  const note3 = await coll.findOne({ title: 'Note 3' });
+  expect(note3!.priority).toEqual(3);
+});
+
+// ============================================================================
+// LocalSmartDb: Custom socket path
+// ============================================================================
+
+tap.test('localsmartdb: works with custom socket path', async () => {
+  await client.close();
+  await localDb.stop();
+
+  const customSocket = path.join(os.tmpdir(), `smartdb-custom-${Date.now()}.sock`);
+  const tmpDir2 = makeTmpDir();
+  const localDb2 = new smartdb.LocalSmartDb({
+    folderPath: tmpDir2,
+    socketPath: customSocket,
+  });
+
+  const info = await localDb2.start();
+  expect(info.socketPath).toEqual(customSocket);
+
+  const client2 = new MongoClient(info.connectionUri, {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client2.connect();
+  const testDb = client2.db('customsock');
+  await testDb.collection('test').insertOne({ x: 1 });
+  const doc = await testDb.collection('test').findOne({ x: 1 });
+  expect(doc).toBeTruthy();
+
+  await client2.close();
+  await localDb2.stop();
+  cleanTmpDir(tmpDir2);
+
+  // Reconnect original for remaining tests
+  localDb = new smartdb.LocalSmartDb({ folderPath: tmpDir });
+  const origInfo = await localDb.start();
+  client = new MongoClient(origInfo.connectionUri, {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  db = client.db('localtest');
+});
+
+// ============================================================================
+// LocalSmartDb: getConnectionUri and getServer helpers
+// ============================================================================
+
+tap.test('localsmartdb: getConnectionUri returns valid uri', async () => {
+  const uri = localDb.getConnectionUri();
+  expect(uri.startsWith('mongodb://')).toBeTrue();
+});
+
+tap.test('localsmartdb: getServer returns the SmartdbServer', async () => {
+  const srv = localDb.getServer();
+  expect(srv).toBeTruthy();
+  expect(srv.running).toBeTrue();
+});
+
+// ============================================================================
+// LocalSmartDb: Data isolation between databases
+// ============================================================================
+
+tap.test('localsmartdb: databases are isolated', async () => {
+  const dbA = client.db('isoA');
+  const dbB = client.db('isoB');
+
+  await dbA.collection('shared').insertOne({ source: 'A', val: 1 });
+  await dbB.collection('shared').insertOne({ source: 'B', val: 2 });
+
+  const docsA = await dbA.collection('shared').find({}).toArray();
+  const docsB = await dbB.collection('shared').find({}).toArray();
+
+  expect(docsA.length).toEqual(1);
+  expect(docsA[0].source).toEqual('A');
+  expect(docsB.length).toEqual(1);
+  expect(docsB[0].source).toEqual('B');
+
+  await dbA.dropDatabase();
+  await dbB.dropDatabase();
+});
+
+// ============================================================================
+// Cleanup
+// ============================================================================
+
+tap.test('localsmartdb: cleanup', async () => {
+  await client.close();
+  await localDb.stop();
+  expect(localDb.running).toBeFalse();
+  cleanTmpDir(tmpDir);
+});
+
+export default tap.start();
@@ -0,0 +1,269 @@
+import { expect, tap } from '@git.zone/tstest/tapbundle';
+import * as smartdb from '../ts/index.js';
+import { MongoClient, Db } from 'mongodb';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+let tmpDir: string;
+
+function makeTmpDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'smartdb-migration-test-'));
+}
+
+function cleanTmpDir(dir: string): void {
+  if (fs.existsSync(dir)) {
+    fs.rmSync(dir, { recursive: true, force: true });
+  }
+}
+
+/**
+ * Create a v0 (legacy JSON) storage layout:
+ *   {base}/{db}/{coll}.json
+ *   {base}/{db}/{coll}.indexes.json
+ */
+function createV0Layout(basePath: string, dbName: string, collName: string, docs: any[]): void {
+  const dbDir = path.join(basePath, dbName);
+  fs.mkdirSync(dbDir, { recursive: true });
+
+  // Convert docs to the extended JSON format that the old Rust engine wrote:
+  // ObjectId is stored as { "$oid": "hex" }
+  const jsonDocs = docs.map(doc => {
+    const clone = { ...doc };
+    if (!clone._id) {
+      // Generate a fake ObjectId-like hex string
+      const hex = [...Array(24)].map(() => Math.floor(Math.random() * 16).toString(16)).join('');
+      clone._id = { '$oid': hex };
+    }
+    return clone;
+  });
+
+  const collPath = path.join(dbDir, `${collName}.json`);
+  fs.writeFileSync(collPath, JSON.stringify(jsonDocs, null, 2));
+
+  const indexPath = path.join(dbDir, `${collName}.indexes.json`);
+  fs.writeFileSync(indexPath, JSON.stringify([
+    { name: '_id_', key: { _id: 1 } },
+  ], null, 2));
+}
+
+// ============================================================================
+// Migration: v0 → v1 basic
+// ============================================================================
+
+tap.test('migration: detects v0 format and migrates on startup', async () => {
+  tmpDir = makeTmpDir();
+
+  // Create v0 layout with test data
+  createV0Layout(tmpDir, 'mydb', 'users', [
+    { name: 'Alice', age: 30, email: 'alice@test.com' },
+    { name: 'Bob', age: 25, email: 'bob@test.com' },
+    { name: 'Charlie', age: 35, email: 'charlie@test.com' },
+  ]);
+
+  createV0Layout(tmpDir, 'mydb', 'products', [
+    { sku: 'W001', name: 'Widget', price: 9.99 },
+    { sku: 'G001', name: 'Gadget', price: 19.99 },
+  ]);
+
+  // Verify v0 files exist
+  expect(fs.existsSync(path.join(tmpDir, 'mydb', 'users.json'))).toBeTrue();
+  expect(fs.existsSync(path.join(tmpDir, 'mydb', 'products.json'))).toBeTrue();
+
+  // Start server — migration should run automatically
+  const server = new smartdb.SmartdbServer({
+    socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+
+  // v1 directories should now exist
+  expect(fs.existsSync(path.join(tmpDir, 'mydb', 'users', 'data.rdb'))).toBeTrue();
+  expect(fs.existsSync(path.join(tmpDir, 'mydb', 'products', 'data.rdb'))).toBeTrue();
+
+  // v0 files should still exist (not deleted)
+  expect(fs.existsSync(path.join(tmpDir, 'mydb', 'users.json'))).toBeTrue();
+  expect(fs.existsSync(path.join(tmpDir, 'mydb', 'products.json'))).toBeTrue();
+
+  // Connect and verify data is accessible
+  const client = new MongoClient(server.getConnectionUri(), {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  const db = client.db('mydb');
+
+  // Users collection
+  const users = await db.collection('users').find({}).toArray();
+  expect(users.length).toEqual(3);
+  const alice = users.find(u => u.name === 'Alice');
+  expect(alice).toBeTruthy();
+  expect(alice!.age).toEqual(30);
+  expect(alice!.email).toEqual('alice@test.com');
+
+  // Products collection
+  const products = await db.collection('products').find({}).toArray();
+  expect(products.length).toEqual(2);
+  const widget = products.find(p => p.sku === 'W001');
+  expect(widget).toBeTruthy();
+  expect(widget!.price).toEqual(9.99);
+
+  await client.close();
+  await server.stop();
+});
+
+// ============================================================================
+// Migration: migrated data survives another restart
+// ============================================================================
+
+tap.test('migration: migrated data persists across restart', async () => {
+  const server = new smartdb.SmartdbServer({
+    socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+
+  const client = new MongoClient(server.getConnectionUri(), {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  const db = client.db('mydb');
+
+  const users = await db.collection('users').find({}).toArray();
+  expect(users.length).toEqual(3);
+
+  const products = await db.collection('products').find({}).toArray();
+  expect(products.length).toEqual(2);
+
+  await client.close();
+  await server.stop();
+});
+
+// ============================================================================
+// Migration: can write new data after migration
+// ============================================================================
+
+tap.test('migration: new writes work after migration', async () => {
+  const server = new smartdb.SmartdbServer({
+    socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+
+  const client = new MongoClient(server.getConnectionUri(), {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  const db = client.db('mydb');
+
+  // Insert new documents
+  await db.collection('users').insertOne({ name: 'Diana', age: 28 });
+  const count = await db.collection('users').countDocuments();
+  expect(count).toEqual(4);
+
+  // Update existing migrated document
+  await db.collection('users').updateOne(
+    { name: 'Alice' },
+    { $set: { age: 31 } }
+  );
+  const alice = await db.collection('users').findOne({ name: 'Alice' });
+  expect(alice!.age).toEqual(31);
+
+  // Delete a migrated document
+  await db.collection('products').deleteOne({ sku: 'G001' });
+  const prodCount = await db.collection('products').countDocuments();
+  expect(prodCount).toEqual(1);
+
+  await client.close();
+  await server.stop();
+  cleanTmpDir(tmpDir);
+});
+
+// ============================================================================
+// Migration: skips already-migrated data
+// ============================================================================
+
+tap.test('migration: no-op for v1 format', async () => {
+  tmpDir = makeTmpDir();
+
+  // Start fresh to create v1 layout
+  const server = new smartdb.SmartdbServer({
+    socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+
+  const client = new MongoClient(server.getConnectionUri(), {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+  const db = client.db('v1test');
+  await db.collection('items').insertOne({ x: 1 });
+  await client.close();
+  await server.stop();
+
+  // Restart — migration should detect v1 and skip
+  const server2 = new smartdb.SmartdbServer({
+    socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server2.start();
+
+  const client2 = new MongoClient(server2.getConnectionUri(), {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client2.connect();
+  const db2 = client2.db('v1test');
+  const doc = await db2.collection('items').findOne({ x: 1 });
+  expect(doc).toBeTruthy();
+
+  await client2.close();
+  await server2.stop();
+  cleanTmpDir(tmpDir);
+});
+
+// ============================================================================
+// Migration: empty storage is handled gracefully
+// ============================================================================
+
+tap.test('migration: empty storage directory works', async () => {
+  tmpDir = makeTmpDir();
+
+  const server = new smartdb.SmartdbServer({
+    socketPath: path.join(os.tmpdir(), `smartdb-mig-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`),
+    storage: 'file',
+    storagePath: tmpDir,
+  });
+  await server.start();
+
+  const client = new MongoClient(server.getConnectionUri(), {
+    directConnection: true,
+    serverSelectionTimeoutMS: 5000,
+  });
+  await client.connect();
+
+  // Should work fine with empty storage
+  const db = client.db('emptytest');
+  await db.collection('first').insertOne({ hello: 'world' });
+  const doc = await db.collection('first').findOne({ hello: 'world' });
+  expect(doc).toBeTruthy();
+
+  await client.close();
+  await server.stop();
+  cleanTmpDir(tmpDir);
+});
+
+export default tap.start();
@@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: '@push.rocks/smartdb',
-  version: '2.1.0',
+  version: '2.5.2',
  description: 'A MongoDB-compatible embedded database server with wire protocol support, backed by a high-performance Rust engine.'
 }
@@ -7,6 +7,9 @@ export * from './ts_smartdb/index.js';
 export { LocalSmartDb } from './ts_local/index.js';
 export type { ILocalSmartDbOptions, ILocalSmartDbConnectionInfo } from './ts_local/index.js';

+// Export migration
+export { StorageMigrator } from './ts_migration/index.js';
+
 // Export commitinfo
 export { commitinfo };

@@ -1,7 +1,10 @@
 import * as crypto from 'crypto';
+import * as fs from 'fs/promises';
+import * as net from 'net';
 import * as path from 'path';
 import * as os from 'os';
 import { SmartdbServer } from '../ts_smartdb/index.js';
+import { StorageMigrator } from '../ts_migration/index.js';

 /**
 * Connection information returned by LocalSmartDb.start()
@@ -65,6 +68,55 @@ export class LocalSmartDb {
    return path.join(os.tmpdir(), `smartdb-${randomId}.sock`);
  }

+  /**
+   * Check if a Unix socket is alive by attempting to connect.
+   */
+  private static isSocketAlive(socketPath: string): Promise<boolean> {
+    return new Promise((resolve) => {
+      const client = net.createConnection({ path: socketPath }, () => {
+        client.destroy();
+        resolve(true);
+      });
+      client.on('error', () => {
+        resolve(false);
+      });
+      client.setTimeout(500, () => {
+        client.destroy();
+        resolve(false);
+      });
+    });
+  }
+
+  /**
+   * Remove stale smartdb-*.sock files from /tmp.
+   * A socket is considered stale if connecting to it fails.
+   */
+  private static async cleanStaleSockets(): Promise<void> {
+    const tmpDir = os.tmpdir();
+    let entries: string[];
+    try {
+      entries = await fs.readdir(tmpDir);
+    } catch {
+      return;
+    }
+    const socketFiles = entries.filter(
+      (f) => f.startsWith('smartdb-') && f.endsWith('.sock')
+    );
+    for (const name of socketFiles) {
+      const fullPath = path.join(tmpDir, name);
+      try {
+        const stat = await fs.stat(fullPath);
+        if (!stat.isSocket()) continue;
+        const alive = await LocalSmartDb.isSocketAlive(fullPath);
+        if (!alive) {
+          await fs.unlink(fullPath);
+        }
+      } catch {
+        // File may have been removed already; ignore
+      }
+    }
+  }
+
  /**
   * Start the local SmartDB server and return connection info
   */
@@ -73,6 +125,13 @@ export class LocalSmartDb {
      throw new Error('LocalSmartDb is already running');
    }

+    // Clean up stale sockets from previous crashed instances
+    await LocalSmartDb.cleanStaleSockets();
+
+    // Run storage migration before starting the Rust engine
+    const migrator = new StorageMigrator(this.options.folderPath);
+    await migrator.run();
+
    // Use provided socket path or generate one
    this.generatedSocketPath = this.options.socketPath ?? this.generateSocketPath();

@@ -0,0 +1,93 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import { migrateV0ToV1 } from './migrators/v0_to_v1.js';
+
+/**
+ * Detected storage format version.
+ * - v0: Legacy JSON format ({db}/{coll}.json files)
+ * - v1: Bitcask binary format ({db}/{coll}/data.rdb directories)
+ */
+type TStorageVersion = 0 | 1;
+
+/**
+ * StorageMigrator — runs before the Rust engine starts.
+ *
+ * Detects the current storage format version and runs the appropriate
+ * migration chain. The Rust engine only knows the current format (v1).
+ *
+ * Migration is safe: original files are never modified or deleted.
+ * On success, a console hint is printed about which old files can be removed.
+ */
+export class StorageMigrator {
+  private storagePath: string;
+
+  constructor(storagePath: string) {
+    this.storagePath = storagePath;
+  }
+
+  /**
+   * Run any needed migrations. Safe to call even if storage is already current.
+   */
+  async run(): Promise<void> {
+    if (!fs.existsSync(this.storagePath)) {
+      return; // No data yet — nothing to migrate
+    }
+
+    const version = this.detectVersion();
+
+    if (version === 1) {
+      return; // Already current
+    }
+
+    if (version === 0) {
+      console.log(`[smartdb] Detected v0 (JSON) storage format at ${this.storagePath}`);
+      console.log(`[smartdb] Running migration v0 → v1 (Bitcask binary format)...`);
+
+      const deletableFiles = await migrateV0ToV1(this.storagePath);
+
+      if (deletableFiles.length > 0) {
+        console.log(`[smartdb] Migration v0 → v1 complete.`);
+        console.log(`[smartdb] The following old files can be safely deleted:`);
+        for (const f of deletableFiles) {
+          console.log(`[smartdb]   ${f}`);
+        }
+      } else {
+        console.log(`[smartdb] Migration v0 → v1 complete. No old files to clean up.`);
+      }
+    }
+  }
+
+  /**
+   * Detect the storage format version by inspecting the directory structure.
+   *
+   * v0: {db}/{coll}.json files exist
+   * v1: {db}/{coll}/data.rdb directories exist
+   */
+  private detectVersion(): TStorageVersion {
+    const entries = fs.readdirSync(this.storagePath, { withFileTypes: true });
+
+    for (const entry of entries) {
+      if (!entry.isDirectory()) continue;
+
+      const dbDir = path.join(this.storagePath, entry.name);
+      const dbEntries = fs.readdirSync(dbDir, { withFileTypes: true });
+
+      for (const dbEntry of dbEntries) {
+        // v1: subdirectory with data.rdb
+        if (dbEntry.isDirectory()) {
+          const dataRdb = path.join(dbDir, dbEntry.name, 'data.rdb');
+          if (fs.existsSync(dataRdb)) {
+            return 1;
+          }
+        }
+        // v0: .json file (not .indexes.json)
+        if (dbEntry.isFile() && dbEntry.name.endsWith('.json') && !dbEntry.name.endsWith('.indexes.json')) {
+          return 0;
+        }
+      }
+    }
+
+    // Empty or unrecognized — treat as v1 (fresh start)
+    return 1;
+  }
+}
@@ -0,0 +1 @@
+export { StorageMigrator } from './classes.storagemigrator.js';
@@ -0,0 +1,253 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import * as crypto from 'crypto';
+import { BSON } from 'bson';
+
+// ---------------------------------------------------------------------------
+// Binary format constants (must match Rust: record.rs)
+// ---------------------------------------------------------------------------
+
+/** File-level magic: "SMARTDB\0" */
+const FILE_MAGIC = Buffer.from('SMARTDB\0', 'ascii');
+/** Current format version */
+const FORMAT_VERSION = 1;
+/** File type tags */
+const FILE_TYPE_DATA = 1;
+const FILE_TYPE_HINT = 3;
+/** File header total size */
+const FILE_HEADER_SIZE = 64;
+/** Per-record magic */
+const RECORD_MAGIC = 0xDB01;
+/** Per-record header size */
+const RECORD_HEADER_SIZE = 22; // 2 + 8 + 4 + 4 + 4
+
+// ---------------------------------------------------------------------------
+// Binary encoding helpers
+// ---------------------------------------------------------------------------
+
+function writeFileHeader(fileType: number): Buffer {
+  const buf = Buffer.alloc(FILE_HEADER_SIZE, 0);
+  FILE_MAGIC.copy(buf, 0);
+  buf.writeUInt16LE(FORMAT_VERSION, 8);
+  buf.writeUInt8(fileType, 10);
+  buf.writeUInt32LE(0, 11); // flags
+  const now = BigInt(Date.now());
+  buf.writeBigUInt64LE(now, 15);
+  // bytes 23..64 are reserved (zeros)
+  return buf;
+}
+
+function encodeDataRecord(timestamp: bigint, key: Buffer, value: Buffer): Buffer {
+  const keyLen = key.length;
+  const valLen = value.length;
+  const totalSize = RECORD_HEADER_SIZE + keyLen + valLen;
+  const buf = Buffer.alloc(totalSize);
+
+  // Write header fields (without CRC)
+  buf.writeUInt16LE(RECORD_MAGIC, 0);
+  buf.writeBigUInt64LE(timestamp, 2);
+  buf.writeUInt32LE(keyLen, 10);
+  buf.writeUInt32LE(valLen, 14);
+  // CRC placeholder at offset 18..22 (will fill below)
+  key.copy(buf, RECORD_HEADER_SIZE);
+  value.copy(buf, RECORD_HEADER_SIZE + keyLen);
+
+  // CRC32 covers everything except the CRC field itself:
+  // bytes [0..18] + bytes [22..]
+  const crc = crc32(Buffer.concat([
+    buf.subarray(0, 18),
+    buf.subarray(22),
+  ]));
+  buf.writeUInt32LE(crc, 18);
+
+  return buf;
+}
+
+function encodeHintEntry(key: string, offset: bigint, recordLen: number, valueLen: number, timestamp: bigint): Buffer {
+  const keyBuf = Buffer.from(key, 'utf-8');
+  const buf = Buffer.alloc(4 + keyBuf.length + 8 + 4 + 4 + 8);
+  let pos = 0;
+  buf.writeUInt32LE(keyBuf.length, pos); pos += 4;
+  keyBuf.copy(buf, pos); pos += keyBuf.length;
+  buf.writeBigUInt64LE(offset, pos); pos += 8;
+  buf.writeUInt32LE(recordLen, pos); pos += 4;
+  buf.writeUInt32LE(valueLen, pos); pos += 4;
+  buf.writeBigUInt64LE(timestamp, pos);
+  return buf;
+}
+
+// ---------------------------------------------------------------------------
+// CRC32 (matching crc32fast in Rust)
+// ---------------------------------------------------------------------------
+
+const CRC32_TABLE = (() => {
+  const table = new Uint32Array(256);
+  for (let i = 0; i < 256; i++) {
+    let crc = i;
+    for (let j = 0; j < 8; j++) {
+      crc = (crc & 1) ? (0xEDB88320 ^ (crc >>> 1)) : (crc >>> 1);
+    }
+    table[i] = crc;
+  }
+  return table;
+})();
+
+function crc32(data: Buffer): number {
+  let crc = 0xFFFFFFFF;
+  for (let i = 0; i < data.length; i++) {
+    crc = CRC32_TABLE[(crc ^ data[i]) & 0xFF] ^ (crc >>> 8);
+  }
+  return (crc ^ 0xFFFFFFFF) >>> 0;
+}
+
+// ---------------------------------------------------------------------------
+// Migration: v0 (JSON) → v1 (Bitcask binary)
+// ---------------------------------------------------------------------------
+
+interface IKeyDirEntry {
+  offset: bigint;
+  recordLen: number;
+  valueLen: number;
+  timestamp: bigint;
+}
+
+/**
+ * Migrate a storage directory from v0 (JSON-per-collection) to v1 (Bitcask binary).
+ *
+ * - Original .json files are NOT modified or deleted.
+ * - New v1 files are written into {db}/{coll}/ subdirectories.
+ * - Returns a list of old files that can be safely deleted.
+ * - On failure, cleans up any partial new files and throws.
+ */
+export async function migrateV0ToV1(storagePath: string): Promise<string[]> {
+  const deletableFiles: string[] = [];
+  const createdDirs: string[] = [];
+
+  try {
+    const dbEntries = fs.readdirSync(storagePath, { withFileTypes: true });
+
+    for (const dbEntry of dbEntries) {
+      if (!dbEntry.isDirectory()) continue;
+
+      const dbDir = path.join(storagePath, dbEntry.name);
+      const collFiles = fs.readdirSync(dbDir, { withFileTypes: true });
+
+      for (const collFile of collFiles) {
+        if (!collFile.isFile()) continue;
+        if (!collFile.name.endsWith('.json')) continue;
+        if (collFile.name.endsWith('.indexes.json')) continue;
+
+        const collName = collFile.name.replace(/\.json$/, '');
+        const jsonPath = path.join(dbDir, collFile.name);
+        const indexJsonPath = path.join(dbDir, `${collName}.indexes.json`);
+
+        // Target directory
+        const collDir = path.join(dbDir, collName);
+        if (fs.existsSync(collDir)) {
+          // Already migrated
+          continue;
+        }
+
+        console.log(`[smartdb]   Migrating ${dbEntry.name}.${collName}...`);
+
+        // Read the JSON collection
+        const jsonData = fs.readFileSync(jsonPath, 'utf-8');
+        const docs: any[] = JSON.parse(jsonData);
+
+        // Create collection directory
+        fs.mkdirSync(collDir, { recursive: true });
+        createdDirs.push(collDir);
+
+        // Write data.rdb
+        const dataPath = path.join(collDir, 'data.rdb');
+        const fd = fs.openSync(dataPath, 'w');
+
+        try {
+          // File header
+          const headerBuf = writeFileHeader(FILE_TYPE_DATA);
+          fs.writeSync(fd, headerBuf);
+
+          let currentOffset = BigInt(FILE_HEADER_SIZE);
+          const keydir: Map<string, IKeyDirEntry> = new Map();
+          const ts = BigInt(Date.now());
+
+          for (const doc of docs) {
+            // Extract _id
+            let idHex: string;
+            if (doc._id && doc._id.$oid) {
+              idHex = doc._id.$oid;
+            } else if (typeof doc._id === 'string') {
+              idHex = doc._id;
+            } else if (doc._id) {
+              idHex = String(doc._id);
+            } else {
+              // Generate a new ObjectId
+              idHex = crypto.randomBytes(12).toString('hex');
+              doc._id = { $oid: idHex };
+            }
+
+            // Serialize to BSON
+            const bsonBytes = BSON.serialize(doc);
+            const keyBuf = Buffer.from(idHex, 'utf-8');
+            const valueBuf = Buffer.from(bsonBytes);
+
+            const record = encodeDataRecord(ts, keyBuf, valueBuf);
+            fs.writeSync(fd, record);
+
+            keydir.set(idHex, {
+              offset: currentOffset,
+              recordLen: record.length,
+              valueLen: valueBuf.length,
+              timestamp: ts,
+            });
+
+            currentOffset += BigInt(record.length);
+          }
+
+          fs.fsyncSync(fd);
+          fs.closeSync(fd);
+
+          // Write keydir.hint
+          const hintPath = path.join(collDir, 'keydir.hint');
+          const hintFd = fs.openSync(hintPath, 'w');
+          fs.writeSync(hintFd, writeFileHeader(FILE_TYPE_HINT));
+          for (const [key, entry] of keydir) {
+            fs.writeSync(hintFd, encodeHintEntry(key, entry.offset, entry.recordLen, entry.valueLen, entry.timestamp));
+          }
+          fs.fsyncSync(hintFd);
+          fs.closeSync(hintFd);
+
+        } catch (writeErr) {
+          // Clean up on write failure
+          try { fs.closeSync(fd); } catch {}
+          throw writeErr;
+        }
+
+        // Copy indexes.json if it exists
+        if (fs.existsSync(indexJsonPath)) {
+          const destIndexPath = path.join(collDir, 'indexes.json');
+          fs.copyFileSync(indexJsonPath, destIndexPath);
+          deletableFiles.push(indexJsonPath);
+        } else {
+          // Write default _id index
+          const destIndexPath = path.join(collDir, 'indexes.json');
+          fs.writeFileSync(destIndexPath, JSON.stringify([{ name: '_id_', key: { _id: 1 } }], null, 2));
+        }
+
+        deletableFiles.push(jsonPath);
+
+        console.log(`[smartdb]   Migrated ${dbEntry.name}.${collName}: ${docs.length} documents`);
+      }
+    }
+  } catch (err) {
+    // Clean up any partially created directories
+    for (const dir of createdDirs) {
+      try {
+        fs.rmSync(dir, { recursive: true, force: true });
+      } catch {}
+    }
+    throw err;
+  }
+
+  return deletableFiles;
+}
@@ -1,4 +1,5 @@
 import { RustDbBridge } from '../rust-db-bridge.js';
+import { StorageMigrator } from '../../ts_migration/index.js';
 import type {
  IOpLogEntry,
  IOpLogResult,
@@ -75,6 +76,12 @@ export class SmartdbServer {
      throw new Error('Server is already running');
    }

+    // Run storage migration for file-based storage before starting Rust engine
+    if (this.options.storage === 'file' && this.options.storagePath) {
+      const migrator = new StorageMigrator(this.options.storagePath);
+      await migrator.run();
+    }
+
    const spawned = await this.bridge.spawn();
    if (!spawned) {
      throw new Error(
Author	SHA1	Message	Date
jkunz	1a10c32b12	v2.5.2	2026-04-05 03:26:52 +00:00
jkunz	cb8cb87d9f	fix(rustdb-indexes): persist created indexes and restore them on server startup	2026-04-05 03:26:52 +00:00
jkunz	96117d54b9	v2.5.1	2026-04-05 02:48:00 +00:00
jkunz	53f58e45c3	fix(docs): update project documentation	2026-04-05 02:48:00 +00:00
jkunz	34d708be7e	v2.5.0	2026-04-05 02:46:05 +00:00
jkunz	418e8dc052	feat(storage): add offline data validation and strengthen storage/index integrity checks	2026-04-05 02:46:05 +00:00
jkunz	b8567ebe08	v2.4.1	2026-04-05 01:31:44 +00:00
jkunz	827bfa6370	fix(package): update package metadata	2026-04-05 01:31:44 +00:00
jkunz	ceba64e34a	v2.4.0	2026-04-05 01:30:28 +00:00
jkunz	8646d58f06	feat(rustdb): add restore and periodic persistence support for in-memory storage	2026-04-05 01:30:28 +00:00
jkunz	8ce6ff11c3	v2.3.1	2026-04-04 20:15:58 +00:00
jkunz	5c7aaebaba	fix(package): update package metadata	2026-04-04 20:15:58 +00:00
jkunz	be7d086c0b	v2.3.0	2026-04-04 20:14:51 +00:00
jkunz	91a7b69f1d	feat(test): add integration coverage for file storage, compaction, migration, and LocalSmartDb workflows	2026-04-04 20:14:51 +00:00
jkunz	4e078b35d4	v2.2.0	2026-04-04 19:49:47 +00:00
jkunz	d8a8259c73	feat(storage): add Bitcask storage migration, binary WAL, and data compaction support	2026-04-04 19:49:47 +00:00
jkunz	9e7ce25b45	v2.1.1	2026-04-02 17:05:07 +00:00
jkunz	b634ee50d1	fix(package): update package metadata	2026-04-02 17:05:07 +00:00
				`@@ -0,0 +1 @@`
				`export { StorageMigrator } from './classes.storagemigrator.js';`