BREAKING CHANGE(core): replace the TypeScript database engine with a Rust-backed embedded server and bridge
This commit is contained in:
185
rust/crates/rustdb-storage/src/adapter.rs
Normal file
185
rust/crates/rustdb-storage/src/adapter.rs
Normal file
@@ -0,0 +1,185 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bson::Document;
|
||||
|
||||
use crate::error::StorageResult;
|
||||
|
||||
/// Core storage adapter trait that all backends must implement.
|
||||
#[async_trait]
|
||||
pub trait StorageAdapter: Send + Sync {
|
||||
// ---- lifecycle ----
|
||||
|
||||
/// Initialize the storage backend (create directories, open files, etc.).
|
||||
async fn initialize(&self) -> StorageResult<()>;
|
||||
|
||||
/// Gracefully shut down the storage backend.
|
||||
async fn close(&self) -> StorageResult<()>;
|
||||
|
||||
// ---- database operations ----
|
||||
|
||||
/// List all database names.
|
||||
async fn list_databases(&self) -> StorageResult<Vec<String>>;
|
||||
|
||||
/// Create a new database.
|
||||
async fn create_database(&self, db: &str) -> StorageResult<()>;
|
||||
|
||||
/// Drop a database and all its collections.
|
||||
async fn drop_database(&self, db: &str) -> StorageResult<()>;
|
||||
|
||||
/// Check whether a database exists.
|
||||
async fn database_exists(&self, db: &str) -> StorageResult<bool>;
|
||||
|
||||
// ---- collection operations ----
|
||||
|
||||
/// List all collection names in a database.
|
||||
async fn list_collections(&self, db: &str) -> StorageResult<Vec<String>>;
|
||||
|
||||
/// Create a new collection inside a database.
|
||||
async fn create_collection(&self, db: &str, coll: &str) -> StorageResult<()>;
|
||||
|
||||
/// Drop a collection.
|
||||
async fn drop_collection(&self, db: &str, coll: &str) -> StorageResult<()>;
|
||||
|
||||
/// Check whether a collection exists.
|
||||
async fn collection_exists(&self, db: &str, coll: &str) -> StorageResult<bool>;
|
||||
|
||||
/// Rename a collection within the same database.
|
||||
async fn rename_collection(
|
||||
&self,
|
||||
db: &str,
|
||||
old_name: &str,
|
||||
new_name: &str,
|
||||
) -> StorageResult<()>;
|
||||
|
||||
// ---- document write operations ----
|
||||
|
||||
/// Insert a single document. Returns the `_id` as hex string.
|
||||
async fn insert_one(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
doc: Document,
|
||||
) -> StorageResult<String>;
|
||||
|
||||
/// Insert many documents. Returns the `_id` hex strings.
|
||||
async fn insert_many(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
docs: Vec<Document>,
|
||||
) -> StorageResult<Vec<String>>;
|
||||
|
||||
/// Replace a document by its `_id` hex string.
|
||||
async fn update_by_id(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
id: &str,
|
||||
doc: Document,
|
||||
) -> StorageResult<()>;
|
||||
|
||||
/// Delete a single document by `_id` hex string.
|
||||
async fn delete_by_id(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
id: &str,
|
||||
) -> StorageResult<()>;
|
||||
|
||||
/// Delete multiple documents by `_id` hex strings.
|
||||
async fn delete_by_ids(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
ids: &[String],
|
||||
) -> StorageResult<()>;
|
||||
|
||||
// ---- document read operations ----
|
||||
|
||||
/// Return all documents in a collection.
|
||||
async fn find_all(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<Vec<Document>>;
|
||||
|
||||
/// Return documents whose `_id` hex is in the given set.
|
||||
async fn find_by_ids(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
ids: HashSet<String>,
|
||||
) -> StorageResult<Vec<Document>>;
|
||||
|
||||
/// Return a single document by `_id` hex.
|
||||
async fn find_by_id(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
id: &str,
|
||||
) -> StorageResult<Option<Document>>;
|
||||
|
||||
/// Count documents in a collection.
|
||||
async fn count(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<u64>;
|
||||
|
||||
// ---- index operations ----
|
||||
|
||||
/// Persist an index specification for a collection.
|
||||
async fn save_index(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
name: &str,
|
||||
spec: Document,
|
||||
) -> StorageResult<()>;
|
||||
|
||||
/// Return all saved index specs for a collection.
|
||||
async fn get_indexes(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<Vec<Document>>;
|
||||
|
||||
/// Drop a named index.
|
||||
async fn drop_index(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
name: &str,
|
||||
) -> StorageResult<()>;
|
||||
|
||||
// ---- snapshot / conflict detection ----
|
||||
|
||||
/// Create a logical snapshot timestamp for a collection. Returns a timestamp (ms).
|
||||
async fn create_snapshot(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<i64>;
|
||||
|
||||
/// Check if any of the given document ids have been modified after `snapshot_time`.
|
||||
async fn has_conflicts(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
ids: &HashSet<String>,
|
||||
snapshot_time: i64,
|
||||
) -> StorageResult<bool>;
|
||||
|
||||
// ---- optional persistence (for in-memory backends) ----
|
||||
|
||||
/// Persist current state to durable storage. Default: no-op.
|
||||
async fn persist(&self) -> StorageResult<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Restore state from durable storage. Default: no-op.
|
||||
async fn restore(&self) -> StorageResult<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
40
rust/crates/rustdb-storage/src/error.rs
Normal file
40
rust/crates/rustdb-storage/src/error.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
use thiserror::Error;
|
||||
|
||||
/// Errors that can occur in storage operations.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum StorageError {
|
||||
#[error("not found: {0}")]
|
||||
NotFound(String),
|
||||
|
||||
#[error("already exists: {0}")]
|
||||
AlreadyExists(String),
|
||||
|
||||
#[error("I/O error: {0}")]
|
||||
IoError(#[from] std::io::Error),
|
||||
|
||||
#[error("serialization error: {0}")]
|
||||
SerializationError(String),
|
||||
|
||||
#[error("conflict detected: {0}")]
|
||||
ConflictError(String),
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for StorageError {
|
||||
fn from(e: serde_json::Error) -> Self {
|
||||
StorageError::SerializationError(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bson::de::Error> for StorageError {
|
||||
fn from(e: bson::de::Error) -> Self {
|
||||
StorageError::SerializationError(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bson::ser::Error> for StorageError {
|
||||
fn from(e: bson::ser::Error) -> Self {
|
||||
StorageError::SerializationError(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub type StorageResult<T> = Result<T, StorageError>;
|
||||
476
rust/crates/rustdb-storage/src/file.rs
Normal file
476
rust/crates/rustdb-storage/src/file.rs
Normal file
@@ -0,0 +1,476 @@
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bson::{doc, oid::ObjectId, Document};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::adapter::StorageAdapter;
|
||||
use crate::error::{StorageError, StorageResult};
|
||||
|
||||
/// File-based storage adapter. Each collection is stored as a JSON file:
|
||||
/// `{base_path}/{db}/{coll}.json`
|
||||
/// Index metadata lives alongside:
|
||||
/// `{base_path}/{db}/{coll}.indexes.json`
|
||||
pub struct FileStorageAdapter {
|
||||
base_path: PathBuf,
|
||||
}
|
||||
|
||||
impl FileStorageAdapter {
|
||||
pub fn new(base_path: impl Into<PathBuf>) -> Self {
|
||||
Self {
|
||||
base_path: base_path.into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn db_dir(&self, db: &str) -> PathBuf {
|
||||
self.base_path.join(db)
|
||||
}
|
||||
|
||||
fn coll_path(&self, db: &str, coll: &str) -> PathBuf {
|
||||
self.db_dir(db).join(format!("{coll}.json"))
|
||||
}
|
||||
|
||||
fn index_path(&self, db: &str, coll: &str) -> PathBuf {
|
||||
self.db_dir(db).join(format!("{coll}.indexes.json"))
|
||||
}
|
||||
|
||||
/// Read all documents from a collection file. Returns empty vec if file doesn't exist.
|
||||
async fn read_docs(&self, db: &str, coll: &str) -> StorageResult<Vec<Document>> {
|
||||
let path = self.coll_path(db, coll);
|
||||
if !path.exists() {
|
||||
return Err(StorageError::NotFound(format!(
|
||||
"collection '{db}.{coll}'"
|
||||
)));
|
||||
}
|
||||
let data = tokio::fs::read_to_string(&path).await?;
|
||||
let json_docs: Vec<serde_json::Value> = serde_json::from_str(&data)?;
|
||||
let mut docs = Vec::with_capacity(json_docs.len());
|
||||
for jv in json_docs {
|
||||
let bson_val: bson::Bson = serde_json::from_value(jv)
|
||||
.map_err(|e| StorageError::SerializationError(e.to_string()))?;
|
||||
let doc = bson_val
|
||||
.as_document()
|
||||
.ok_or_else(|| StorageError::SerializationError("expected document".into()))?
|
||||
.clone();
|
||||
docs.push(doc);
|
||||
}
|
||||
Ok(docs)
|
||||
}
|
||||
|
||||
/// Write all documents to a collection file.
|
||||
async fn write_docs(&self, db: &str, coll: &str, docs: &[Document]) -> StorageResult<()> {
|
||||
let path = self.coll_path(db, coll);
|
||||
if let Some(parent) = path.parent() {
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
let json_vals: Vec<serde_json::Value> = docs
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let b = bson::to_bson(d)
|
||||
.map_err(|e| StorageError::SerializationError(e.to_string()))?;
|
||||
serde_json::to_value(&b)
|
||||
.map_err(|e| StorageError::SerializationError(e.to_string()))
|
||||
})
|
||||
.collect::<StorageResult<Vec<_>>>()?;
|
||||
let json = serde_json::to_string_pretty(&json_vals)?;
|
||||
tokio::fs::write(&path, json).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read index specs from the indexes file.
|
||||
async fn read_indexes(&self, db: &str, coll: &str) -> StorageResult<Vec<Document>> {
|
||||
let path = self.index_path(db, coll);
|
||||
if !path.exists() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
let data = tokio::fs::read_to_string(&path).await?;
|
||||
let json_vals: Vec<serde_json::Value> = serde_json::from_str(&data)?;
|
||||
let mut docs = Vec::new();
|
||||
for jv in json_vals {
|
||||
let bson_val: bson::Bson = serde_json::from_value(jv)
|
||||
.map_err(|e| StorageError::SerializationError(e.to_string()))?;
|
||||
let doc = bson_val
|
||||
.as_document()
|
||||
.ok_or_else(|| StorageError::SerializationError("expected document".into()))?
|
||||
.clone();
|
||||
docs.push(doc);
|
||||
}
|
||||
Ok(docs)
|
||||
}
|
||||
|
||||
/// Write index specs to the indexes file.
|
||||
async fn write_indexes(&self, db: &str, coll: &str, specs: &[Document]) -> StorageResult<()> {
|
||||
let path = self.index_path(db, coll);
|
||||
if let Some(parent) = path.parent() {
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
let json_vals: Vec<serde_json::Value> = specs
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let b = bson::to_bson(d)
|
||||
.map_err(|e| StorageError::SerializationError(e.to_string()))?;
|
||||
serde_json::to_value(&b)
|
||||
.map_err(|e| StorageError::SerializationError(e.to_string()))
|
||||
})
|
||||
.collect::<StorageResult<Vec<_>>>()?;
|
||||
let json = serde_json::to_string_pretty(&json_vals)?;
|
||||
tokio::fs::write(&path, json).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extract_id_hex(doc: &Document) -> StorageResult<String> {
|
||||
match doc.get("_id") {
|
||||
Some(bson::Bson::ObjectId(oid)) => Ok(oid.to_hex()),
|
||||
_ => Err(StorageError::NotFound("document missing _id".into())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl StorageAdapter for FileStorageAdapter {
|
||||
async fn initialize(&self) -> StorageResult<()> {
|
||||
tokio::fs::create_dir_all(&self.base_path).await?;
|
||||
debug!("FileStorageAdapter initialized at {:?}", self.base_path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn close(&self) -> StorageResult<()> {
|
||||
debug!("FileStorageAdapter closed");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---- database ----
|
||||
|
||||
async fn list_databases(&self) -> StorageResult<Vec<String>> {
|
||||
let mut dbs = Vec::new();
|
||||
let mut entries = tokio::fs::read_dir(&self.base_path).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
if entry.file_type().await?.is_dir() {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
dbs.push(name.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(dbs)
|
||||
}
|
||||
|
||||
async fn create_database(&self, db: &str) -> StorageResult<()> {
|
||||
let dir = self.db_dir(db);
|
||||
if dir.exists() {
|
||||
return Err(StorageError::AlreadyExists(format!("database '{db}'")));
|
||||
}
|
||||
tokio::fs::create_dir_all(&dir).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn drop_database(&self, db: &str) -> StorageResult<()> {
|
||||
let dir = self.db_dir(db);
|
||||
if dir.exists() {
|
||||
tokio::fs::remove_dir_all(&dir).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn database_exists(&self, db: &str) -> StorageResult<bool> {
|
||||
Ok(self.db_dir(db).exists())
|
||||
}
|
||||
|
||||
// ---- collection ----
|
||||
|
||||
async fn list_collections(&self, db: &str) -> StorageResult<Vec<String>> {
|
||||
let dir = self.db_dir(db);
|
||||
if !dir.exists() {
|
||||
return Err(StorageError::NotFound(format!("database '{db}'")));
|
||||
}
|
||||
let mut colls = Vec::new();
|
||||
let mut entries = tokio::fs::read_dir(&dir).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if name.ends_with(".json") && !name.ends_with(".indexes.json") {
|
||||
colls.push(name.trim_end_matches(".json").to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(colls)
|
||||
}
|
||||
|
||||
async fn create_collection(&self, db: &str, coll: &str) -> StorageResult<()> {
|
||||
let path = self.coll_path(db, coll);
|
||||
if path.exists() {
|
||||
return Err(StorageError::AlreadyExists(format!(
|
||||
"collection '{db}.{coll}'"
|
||||
)));
|
||||
}
|
||||
// Ensure db dir exists.
|
||||
tokio::fs::create_dir_all(self.db_dir(db)).await?;
|
||||
// Write empty array.
|
||||
self.write_docs(db, coll, &[]).await?;
|
||||
// Write default _id index.
|
||||
let idx_spec = doc! { "name": "_id_", "key": { "_id": 1 } };
|
||||
self.write_indexes(db, coll, &[idx_spec]).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn drop_collection(&self, db: &str, coll: &str) -> StorageResult<()> {
|
||||
let path = self.coll_path(db, coll);
|
||||
if path.exists() {
|
||||
tokio::fs::remove_file(&path).await?;
|
||||
}
|
||||
let idx_path = self.index_path(db, coll);
|
||||
if idx_path.exists() {
|
||||
tokio::fs::remove_file(&idx_path).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn collection_exists(&self, db: &str, coll: &str) -> StorageResult<bool> {
|
||||
Ok(self.coll_path(db, coll).exists())
|
||||
}
|
||||
|
||||
async fn rename_collection(
|
||||
&self,
|
||||
db: &str,
|
||||
old_name: &str,
|
||||
new_name: &str,
|
||||
) -> StorageResult<()> {
|
||||
let old_path = self.coll_path(db, old_name);
|
||||
let new_path = self.coll_path(db, new_name);
|
||||
if !old_path.exists() {
|
||||
return Err(StorageError::NotFound(format!(
|
||||
"collection '{db}.{old_name}'"
|
||||
)));
|
||||
}
|
||||
if new_path.exists() {
|
||||
return Err(StorageError::AlreadyExists(format!(
|
||||
"collection '{db}.{new_name}'"
|
||||
)));
|
||||
}
|
||||
tokio::fs::rename(&old_path, &new_path).await?;
|
||||
|
||||
// Rename index file too.
|
||||
let old_idx = self.index_path(db, old_name);
|
||||
let new_idx = self.index_path(db, new_name);
|
||||
if old_idx.exists() {
|
||||
tokio::fs::rename(&old_idx, &new_idx).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---- document writes ----
|
||||
|
||||
async fn insert_one(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
mut doc: Document,
|
||||
) -> StorageResult<String> {
|
||||
if !doc.contains_key("_id") {
|
||||
doc.insert("_id", ObjectId::new());
|
||||
}
|
||||
let id = Self::extract_id_hex(&doc)?;
|
||||
|
||||
let mut docs = self.read_docs(db, coll).await?;
|
||||
// Check for duplicate.
|
||||
for existing in &docs {
|
||||
if Self::extract_id_hex(existing)? == id {
|
||||
return Err(StorageError::AlreadyExists(format!("document '{id}'")));
|
||||
}
|
||||
}
|
||||
docs.push(doc);
|
||||
self.write_docs(db, coll, &docs).await?;
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
async fn insert_many(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
mut new_docs: Vec<Document>,
|
||||
) -> StorageResult<Vec<String>> {
|
||||
let mut docs = self.read_docs(db, coll).await?;
|
||||
let mut ids = Vec::with_capacity(new_docs.len());
|
||||
|
||||
for doc in &mut new_docs {
|
||||
if !doc.contains_key("_id") {
|
||||
doc.insert("_id", ObjectId::new());
|
||||
}
|
||||
ids.push(Self::extract_id_hex(doc)?);
|
||||
}
|
||||
|
||||
docs.extend(new_docs);
|
||||
self.write_docs(db, coll, &docs).await?;
|
||||
Ok(ids)
|
||||
}
|
||||
|
||||
async fn update_by_id(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
id: &str,
|
||||
doc: Document,
|
||||
) -> StorageResult<()> {
|
||||
let mut docs = self.read_docs(db, coll).await?;
|
||||
let mut found = false;
|
||||
for existing in &mut docs {
|
||||
if Self::extract_id_hex(existing)? == id {
|
||||
*existing = doc.clone();
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
return Err(StorageError::NotFound(format!("document '{id}'")));
|
||||
}
|
||||
self.write_docs(db, coll, &docs).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_by_id(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
id: &str,
|
||||
) -> StorageResult<()> {
|
||||
let mut docs = self.read_docs(db, coll).await?;
|
||||
let len_before = docs.len();
|
||||
docs.retain(|d| Self::extract_id_hex(d).map(|i| i != id).unwrap_or(true));
|
||||
if docs.len() == len_before {
|
||||
return Err(StorageError::NotFound(format!("document '{id}'")));
|
||||
}
|
||||
self.write_docs(db, coll, &docs).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_by_ids(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
ids: &[String],
|
||||
) -> StorageResult<()> {
|
||||
let id_set: HashSet<&str> = ids.iter().map(|s| s.as_str()).collect();
|
||||
let mut docs = self.read_docs(db, coll).await?;
|
||||
docs.retain(|d| {
|
||||
Self::extract_id_hex(d)
|
||||
.map(|i| !id_set.contains(i.as_str()))
|
||||
.unwrap_or(true)
|
||||
});
|
||||
self.write_docs(db, coll, &docs).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---- document reads ----
|
||||
|
||||
async fn find_all(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<Vec<Document>> {
|
||||
self.read_docs(db, coll).await
|
||||
}
|
||||
|
||||
async fn find_by_ids(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
ids: HashSet<String>,
|
||||
) -> StorageResult<Vec<Document>> {
|
||||
let docs = self.read_docs(db, coll).await?;
|
||||
Ok(docs
|
||||
.into_iter()
|
||||
.filter(|d| {
|
||||
Self::extract_id_hex(d)
|
||||
.map(|i| ids.contains(&i))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn find_by_id(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
id: &str,
|
||||
) -> StorageResult<Option<Document>> {
|
||||
let docs = self.read_docs(db, coll).await?;
|
||||
Ok(docs
|
||||
.into_iter()
|
||||
.find(|d| Self::extract_id_hex(d).map(|i| i == id).unwrap_or(false)))
|
||||
}
|
||||
|
||||
async fn count(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<u64> {
|
||||
let docs = self.read_docs(db, coll).await?;
|
||||
Ok(docs.len() as u64)
|
||||
}
|
||||
|
||||
// ---- indexes ----
|
||||
|
||||
async fn save_index(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
name: &str,
|
||||
spec: Document,
|
||||
) -> StorageResult<()> {
|
||||
let mut indexes = self.read_indexes(db, coll).await?;
|
||||
indexes.retain(|s| s.get_str("name").unwrap_or("") != name);
|
||||
let mut full_spec = spec;
|
||||
full_spec.insert("name", name);
|
||||
indexes.push(full_spec);
|
||||
self.write_indexes(db, coll, &indexes).await
|
||||
}
|
||||
|
||||
async fn get_indexes(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<Vec<Document>> {
|
||||
self.read_indexes(db, coll).await
|
||||
}
|
||||
|
||||
async fn drop_index(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
name: &str,
|
||||
) -> StorageResult<()> {
|
||||
let mut indexes = self.read_indexes(db, coll).await?;
|
||||
let before = indexes.len();
|
||||
indexes.retain(|s| s.get_str("name").unwrap_or("") != name);
|
||||
if indexes.len() == before {
|
||||
return Err(StorageError::NotFound(format!("index '{name}'")));
|
||||
}
|
||||
self.write_indexes(db, coll, &indexes).await
|
||||
}
|
||||
|
||||
// ---- snapshot / conflict detection ----
|
||||
// File adapter doesn't track per-document timestamps, so conflict detection
|
||||
// is a no-op (always returns false).
|
||||
|
||||
async fn create_snapshot(
|
||||
&self,
|
||||
_db: &str,
|
||||
_coll: &str,
|
||||
) -> StorageResult<i64> {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
Ok(SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis() as i64)
|
||||
}
|
||||
|
||||
async fn has_conflicts(
|
||||
&self,
|
||||
_db: &str,
|
||||
_coll: &str,
|
||||
_ids: &HashSet<String>,
|
||||
_snapshot_time: i64,
|
||||
) -> StorageResult<bool> {
|
||||
// File adapter does not track modification timestamps per document.
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
22
rust/crates/rustdb-storage/src/lib.rs
Normal file
22
rust/crates/rustdb-storage/src/lib.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
//! `rustdb-storage` -- Storage adapters for RustDb.
|
||||
//!
|
||||
//! Provides the [`StorageAdapter`] trait and two concrete implementations:
|
||||
//! - [`MemoryStorageAdapter`] -- fast in-memory store backed by `DashMap`
|
||||
//! - [`FileStorageAdapter`] -- JSON-file-per-collection persistent store
|
||||
//!
|
||||
//! Also includes an [`OpLog`] for operation logging and a [`WriteAheadLog`]
|
||||
//! for crash recovery.
|
||||
|
||||
pub mod adapter;
|
||||
pub mod error;
|
||||
pub mod file;
|
||||
pub mod memory;
|
||||
pub mod oplog;
|
||||
pub mod wal;
|
||||
|
||||
pub use adapter::StorageAdapter;
|
||||
pub use error::{StorageError, StorageResult};
|
||||
pub use file::FileStorageAdapter;
|
||||
pub use memory::MemoryStorageAdapter;
|
||||
pub use oplog::{OpLog, OpLogEntry, OpType};
|
||||
pub use wal::{WalOp, WalRecord, WriteAheadLog};
|
||||
613
rust/crates/rustdb-storage/src/memory.rs
Normal file
613
rust/crates/rustdb-storage/src/memory.rs
Normal file
@@ -0,0 +1,613 @@
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bson::{doc, oid::ObjectId, Document};
|
||||
use dashmap::DashMap;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::adapter::StorageAdapter;
|
||||
use crate::error::{StorageError, StorageResult};
|
||||
|
||||
/// Per-document timestamp tracking for conflict detection.
|
||||
type TimestampMap = DashMap<String, i64>;
|
||||
|
||||
/// db -> coll -> id_hex -> Document
|
||||
type DataStore = DashMap<String, DashMap<String, DashMap<String, Document>>>;
|
||||
|
||||
/// db -> coll -> Vec<index spec Document>
|
||||
type IndexStore = DashMap<String, DashMap<String, Vec<Document>>>;
|
||||
|
||||
/// db -> coll -> id_hex -> last_modified_ms
|
||||
type ModificationStore = DashMap<String, DashMap<String, TimestampMap>>;
|
||||
|
||||
fn now_ms() -> i64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis() as i64
|
||||
}
|
||||
|
||||
/// In-memory storage adapter backed by `DashMap`.
|
||||
///
|
||||
/// Optionally persists to a JSON file at a configured path.
|
||||
pub struct MemoryStorageAdapter {
|
||||
data: DataStore,
|
||||
indexes: IndexStore,
|
||||
modifications: ModificationStore,
|
||||
persist_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl MemoryStorageAdapter {
|
||||
/// Create a new purely in-memory adapter.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
data: DashMap::new(),
|
||||
indexes: DashMap::new(),
|
||||
modifications: DashMap::new(),
|
||||
persist_path: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new adapter that will persist state to the given JSON file.
|
||||
pub fn with_persist_path(path: PathBuf) -> Self {
|
||||
Self {
|
||||
data: DashMap::new(),
|
||||
indexes: DashMap::new(),
|
||||
modifications: DashMap::new(),
|
||||
persist_path: Some(path),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create the database entry in the data store.
|
||||
fn ensure_db(&self, db: &str) {
|
||||
self.data.entry(db.to_string()).or_insert_with(DashMap::new);
|
||||
self.indexes
|
||||
.entry(db.to_string())
|
||||
.or_insert_with(DashMap::new);
|
||||
self.modifications
|
||||
.entry(db.to_string())
|
||||
.or_insert_with(DashMap::new);
|
||||
}
|
||||
|
||||
fn extract_id(doc: &Document) -> StorageResult<String> {
|
||||
match doc.get("_id") {
|
||||
Some(bson::Bson::ObjectId(oid)) => Ok(oid.to_hex()),
|
||||
_ => Err(StorageError::NotFound("document missing _id".into())),
|
||||
}
|
||||
}
|
||||
|
||||
fn record_modification(&self, db: &str, coll: &str, id: &str) {
|
||||
if let Some(db_mods) = self.modifications.get(db) {
|
||||
if let Some(coll_mods) = db_mods.get(coll) {
|
||||
coll_mods.insert(id.to_string(), now_ms());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl StorageAdapter for MemoryStorageAdapter {
|
||||
async fn initialize(&self) -> StorageResult<()> {
|
||||
debug!("MemoryStorageAdapter initialized");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn close(&self) -> StorageResult<()> {
|
||||
// Persist if configured.
|
||||
self.persist().await?;
|
||||
debug!("MemoryStorageAdapter closed");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---- database ----
|
||||
|
||||
async fn list_databases(&self) -> StorageResult<Vec<String>> {
|
||||
Ok(self.data.iter().map(|e| e.key().clone()).collect())
|
||||
}
|
||||
|
||||
async fn create_database(&self, db: &str) -> StorageResult<()> {
|
||||
if self.data.contains_key(db) {
|
||||
return Err(StorageError::AlreadyExists(format!("database '{db}'")));
|
||||
}
|
||||
self.ensure_db(db);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn drop_database(&self, db: &str) -> StorageResult<()> {
|
||||
self.data.remove(db);
|
||||
self.indexes.remove(db);
|
||||
self.modifications.remove(db);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn database_exists(&self, db: &str) -> StorageResult<bool> {
|
||||
Ok(self.data.contains_key(db))
|
||||
}
|
||||
|
||||
// ---- collection ----
|
||||
|
||||
async fn list_collections(&self, db: &str) -> StorageResult<Vec<String>> {
|
||||
let db_ref = self
|
||||
.data
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
Ok(db_ref.iter().map(|e| e.key().clone()).collect())
|
||||
}
|
||||
|
||||
async fn create_collection(&self, db: &str, coll: &str) -> StorageResult<()> {
|
||||
self.ensure_db(db);
|
||||
let db_ref = self.data.get(db).unwrap();
|
||||
if db_ref.contains_key(coll) {
|
||||
return Err(StorageError::AlreadyExists(format!(
|
||||
"collection '{db}.{coll}'"
|
||||
)));
|
||||
}
|
||||
db_ref.insert(coll.to_string(), DashMap::new());
|
||||
drop(db_ref);
|
||||
|
||||
// Create modification tracker for this collection.
|
||||
if let Some(db_mods) = self.modifications.get(db) {
|
||||
db_mods.insert(coll.to_string(), DashMap::new());
|
||||
}
|
||||
|
||||
// Auto-create _id index spec.
|
||||
let idx_spec = doc! { "name": "_id_", "key": { "_id": 1 } };
|
||||
if let Some(db_idx) = self.indexes.get(db) {
|
||||
db_idx.insert(coll.to_string(), vec![idx_spec]);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn drop_collection(&self, db: &str, coll: &str) -> StorageResult<()> {
|
||||
if let Some(db_ref) = self.data.get(db) {
|
||||
db_ref.remove(coll);
|
||||
}
|
||||
if let Some(db_idx) = self.indexes.get(db) {
|
||||
db_idx.remove(coll);
|
||||
}
|
||||
if let Some(db_mods) = self.modifications.get(db) {
|
||||
db_mods.remove(coll);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn collection_exists(&self, db: &str, coll: &str) -> StorageResult<bool> {
|
||||
Ok(self
|
||||
.data
|
||||
.get(db)
|
||||
.map(|db_ref| db_ref.contains_key(coll))
|
||||
.unwrap_or(false))
|
||||
}
|
||||
|
||||
async fn rename_collection(
|
||||
&self,
|
||||
db: &str,
|
||||
old_name: &str,
|
||||
new_name: &str,
|
||||
) -> StorageResult<()> {
|
||||
let db_ref = self
|
||||
.data
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
if db_ref.contains_key(new_name) {
|
||||
return Err(StorageError::AlreadyExists(format!(
|
||||
"collection '{db}.{new_name}'"
|
||||
)));
|
||||
}
|
||||
let (_, coll_data) = db_ref
|
||||
.remove(old_name)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{old_name}'")))?;
|
||||
db_ref.insert(new_name.to_string(), coll_data);
|
||||
drop(db_ref);
|
||||
|
||||
// Rename in indexes.
|
||||
if let Some(db_idx) = self.indexes.get(db) {
|
||||
if let Some((_, idx_data)) = db_idx.remove(old_name) {
|
||||
db_idx.insert(new_name.to_string(), idx_data);
|
||||
}
|
||||
}
|
||||
// Rename in modifications.
|
||||
if let Some(db_mods) = self.modifications.get(db) {
|
||||
if let Some((_, mod_data)) = db_mods.remove(old_name) {
|
||||
db_mods.insert(new_name.to_string(), mod_data);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---- document writes ----
|
||||
|
||||
async fn insert_one(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
mut doc: Document,
|
||||
) -> StorageResult<String> {
|
||||
// Ensure _id exists.
|
||||
if !doc.contains_key("_id") {
|
||||
doc.insert("_id", ObjectId::new());
|
||||
}
|
||||
let id = Self::extract_id(&doc)?;
|
||||
|
||||
let db_ref = self
|
||||
.data
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
let coll_ref = db_ref
|
||||
.get(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
if coll_ref.contains_key(&id) {
|
||||
return Err(StorageError::AlreadyExists(format!("document '{id}'")));
|
||||
}
|
||||
coll_ref.insert(id.clone(), doc);
|
||||
drop(coll_ref);
|
||||
drop(db_ref);
|
||||
|
||||
self.record_modification(db, coll, &id);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
async fn insert_many(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
docs: Vec<Document>,
|
||||
) -> StorageResult<Vec<String>> {
|
||||
let mut ids = Vec::with_capacity(docs.len());
|
||||
for doc in docs {
|
||||
let id = self.insert_one(db, coll, doc).await?;
|
||||
ids.push(id);
|
||||
}
|
||||
Ok(ids)
|
||||
}
|
||||
|
||||
async fn update_by_id(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
id: &str,
|
||||
doc: Document,
|
||||
) -> StorageResult<()> {
|
||||
let db_ref = self
|
||||
.data
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
let coll_ref = db_ref
|
||||
.get(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
if !coll_ref.contains_key(id) {
|
||||
return Err(StorageError::NotFound(format!("document '{id}'")));
|
||||
}
|
||||
coll_ref.insert(id.to_string(), doc);
|
||||
drop(coll_ref);
|
||||
drop(db_ref);
|
||||
|
||||
self.record_modification(db, coll, id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_by_id(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
id: &str,
|
||||
) -> StorageResult<()> {
|
||||
let db_ref = self
|
||||
.data
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
let coll_ref = db_ref
|
||||
.get(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
coll_ref
|
||||
.remove(id)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("document '{id}'")))?;
|
||||
drop(coll_ref);
|
||||
drop(db_ref);
|
||||
|
||||
self.record_modification(db, coll, id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_by_ids(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
ids: &[String],
|
||||
) -> StorageResult<()> {
|
||||
for id in ids {
|
||||
self.delete_by_id(db, coll, id).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---- document reads ----
|
||||
|
||||
async fn find_all(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<Vec<Document>> {
|
||||
let db_ref = self
|
||||
.data
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
let coll_ref = db_ref
|
||||
.get(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
Ok(coll_ref.iter().map(|e| e.value().clone()).collect())
|
||||
}
|
||||
|
||||
async fn find_by_ids(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
ids: HashSet<String>,
|
||||
) -> StorageResult<Vec<Document>> {
|
||||
let db_ref = self
|
||||
.data
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
let coll_ref = db_ref
|
||||
.get(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
let mut results = Vec::with_capacity(ids.len());
|
||||
for id in &ids {
|
||||
if let Some(doc) = coll_ref.get(id) {
|
||||
results.push(doc.value().clone());
|
||||
}
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
async fn find_by_id(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
id: &str,
|
||||
) -> StorageResult<Option<Document>> {
|
||||
let db_ref = self
|
||||
.data
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
let coll_ref = db_ref
|
||||
.get(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
Ok(coll_ref.get(id).map(|e| e.value().clone()))
|
||||
}
|
||||
|
||||
async fn count(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<u64> {
|
||||
let db_ref = self
|
||||
.data
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
let coll_ref = db_ref
|
||||
.get(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
Ok(coll_ref.len() as u64)
|
||||
}
|
||||
|
||||
// ---- indexes ----
|
||||
|
||||
async fn save_index(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
name: &str,
|
||||
spec: Document,
|
||||
) -> StorageResult<()> {
|
||||
let db_idx = self
|
||||
.indexes
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
|
||||
let mut specs = db_idx
|
||||
.get_mut(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
// Remove existing index with same name, then add.
|
||||
specs.retain(|s| s.get_str("name").unwrap_or("") != name);
|
||||
let mut full_spec = spec;
|
||||
full_spec.insert("name", name);
|
||||
specs.push(full_spec);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_indexes(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
) -> StorageResult<Vec<Document>> {
|
||||
let db_idx = self
|
||||
.indexes
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
|
||||
let specs = db_idx
|
||||
.get(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
Ok(specs.clone())
|
||||
}
|
||||
|
||||
async fn drop_index(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
name: &str,
|
||||
) -> StorageResult<()> {
|
||||
let db_idx = self
|
||||
.indexes
|
||||
.get(db)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("database '{db}'")))?;
|
||||
|
||||
let mut specs = db_idx
|
||||
.get_mut(coll)
|
||||
.ok_or_else(|| StorageError::NotFound(format!("collection '{db}.{coll}'")))?;
|
||||
|
||||
let before = specs.len();
|
||||
specs.retain(|s| s.get_str("name").unwrap_or("") != name);
|
||||
if specs.len() == before {
|
||||
return Err(StorageError::NotFound(format!("index '{name}'")));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---- snapshot / conflict detection ----
|
||||
|
||||
async fn create_snapshot(
|
||||
&self,
|
||||
_db: &str,
|
||||
_coll: &str,
|
||||
) -> StorageResult<i64> {
|
||||
Ok(now_ms())
|
||||
}
|
||||
|
||||
async fn has_conflicts(
|
||||
&self,
|
||||
db: &str,
|
||||
coll: &str,
|
||||
ids: &HashSet<String>,
|
||||
snapshot_time: i64,
|
||||
) -> StorageResult<bool> {
|
||||
if let Some(db_mods) = self.modifications.get(db) {
|
||||
if let Some(coll_mods) = db_mods.get(coll) {
|
||||
for id in ids {
|
||||
if let Some(ts) = coll_mods.get(id) {
|
||||
if *ts.value() > snapshot_time {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
// ---- persistence ----
|
||||
|
||||
async fn persist(&self) -> StorageResult<()> {
|
||||
let path = match &self.persist_path {
|
||||
Some(p) => p,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
// Serialize the entire data store to JSON.
|
||||
let mut db_map = serde_json::Map::new();
|
||||
for db_entry in self.data.iter() {
|
||||
let db_name = db_entry.key().clone();
|
||||
let mut coll_map = serde_json::Map::new();
|
||||
for coll_entry in db_entry.value().iter() {
|
||||
let coll_name = coll_entry.key().clone();
|
||||
let mut docs_map = serde_json::Map::new();
|
||||
for doc_entry in coll_entry.value().iter() {
|
||||
let id = doc_entry.key().clone();
|
||||
// Convert bson::Document -> serde_json::Value via bson's
|
||||
// built-in extended-JSON serialization.
|
||||
let json_val: serde_json::Value =
|
||||
bson::to_bson(doc_entry.value())
|
||||
.map_err(|e| StorageError::SerializationError(e.to_string()))
|
||||
.and_then(|b| {
|
||||
serde_json::to_value(&b)
|
||||
.map_err(|e| StorageError::SerializationError(e.to_string()))
|
||||
})?;
|
||||
docs_map.insert(id, json_val);
|
||||
}
|
||||
coll_map.insert(coll_name, serde_json::Value::Object(docs_map));
|
||||
}
|
||||
db_map.insert(db_name, serde_json::Value::Object(coll_map));
|
||||
}
|
||||
|
||||
let json = serde_json::to_string_pretty(&serde_json::Value::Object(db_map))?;
|
||||
if let Some(parent) = path.parent() {
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
tokio::fs::write(path, json).await?;
|
||||
debug!("MemoryStorageAdapter persisted to {:?}", path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn restore(&self) -> StorageResult<()> {
|
||||
let path = match &self.persist_path {
|
||||
Some(p) => p,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
if !path.exists() {
|
||||
warn!("persist file not found at {:?}, skipping restore", path);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let json = tokio::fs::read_to_string(path).await?;
|
||||
let root: serde_json::Value = serde_json::from_str(&json)?;
|
||||
|
||||
let root_obj = root
|
||||
.as_object()
|
||||
.ok_or_else(|| StorageError::SerializationError("expected object".into()))?;
|
||||
|
||||
self.data.clear();
|
||||
self.indexes.clear();
|
||||
self.modifications.clear();
|
||||
|
||||
for (db_name, colls_val) in root_obj {
|
||||
self.ensure_db(db_name);
|
||||
let db_ref = self.data.get(db_name).unwrap();
|
||||
let colls = colls_val
|
||||
.as_object()
|
||||
.ok_or_else(|| StorageError::SerializationError("expected object".into()))?;
|
||||
|
||||
for (coll_name, docs_val) in colls {
|
||||
let coll_map: DashMap<String, Document> = DashMap::new();
|
||||
let docs = docs_val
|
||||
.as_object()
|
||||
.ok_or_else(|| StorageError::SerializationError("expected object".into()))?;
|
||||
|
||||
for (id, doc_val) in docs {
|
||||
let bson_val: bson::Bson = serde_json::from_value(doc_val.clone())
|
||||
.map_err(|e| StorageError::SerializationError(e.to_string()))?;
|
||||
let doc = bson_val
|
||||
.as_document()
|
||||
.ok_or_else(|| {
|
||||
StorageError::SerializationError("expected document".into())
|
||||
})?
|
||||
.clone();
|
||||
coll_map.insert(id.clone(), doc);
|
||||
}
|
||||
db_ref.insert(coll_name.clone(), coll_map);
|
||||
|
||||
// Restore modification tracker and default _id index.
|
||||
if let Some(db_mods) = self.modifications.get(db_name) {
|
||||
db_mods.insert(coll_name.clone(), DashMap::new());
|
||||
}
|
||||
if let Some(db_idx) = self.indexes.get(db_name) {
|
||||
let idx_spec = doc! { "name": "_id_", "key": { "_id": 1 } };
|
||||
db_idx.insert(coll_name.clone(), vec![idx_spec]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("MemoryStorageAdapter restored from {:?}", path);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MemoryStorageAdapter {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
120
rust/crates/rustdb-storage/src/oplog.rs
Normal file
120
rust/crates/rustdb-storage/src/oplog.rs
Normal file
@@ -0,0 +1,120 @@
|
||||
//! Operation log (OpLog) for tracking mutations.
|
||||
//!
|
||||
//! The OpLog records every write operation so that changes can be replayed,
|
||||
//! replicated, or used for change-stream style notifications.
|
||||
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use bson::Document;
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// The type of operation recorded in the oplog.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum OpType {
|
||||
Insert,
|
||||
Update,
|
||||
Delete,
|
||||
}
|
||||
|
||||
/// A single oplog entry.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct OpLogEntry {
|
||||
/// Monotonically increasing sequence number.
|
||||
pub seq: u64,
|
||||
/// Timestamp in milliseconds since UNIX epoch.
|
||||
pub timestamp_ms: i64,
|
||||
/// Operation type.
|
||||
pub op: OpType,
|
||||
/// Database name.
|
||||
pub db: String,
|
||||
/// Collection name.
|
||||
pub collection: String,
|
||||
/// Document id (hex string).
|
||||
pub document_id: String,
|
||||
/// The document snapshot (for insert/update; None for delete).
|
||||
pub document: Option<Document>,
|
||||
}
|
||||
|
||||
/// In-memory operation log.
|
||||
pub struct OpLog {
|
||||
/// All entries keyed by sequence number.
|
||||
entries: DashMap<u64, OpLogEntry>,
|
||||
/// Next sequence number.
|
||||
next_seq: AtomicU64,
|
||||
}
|
||||
|
||||
impl OpLog {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
entries: DashMap::new(),
|
||||
next_seq: AtomicU64::new(1),
|
||||
}
|
||||
}
|
||||
|
||||
/// Append an operation to the log and return its sequence number.
|
||||
pub fn append(
|
||||
&self,
|
||||
op: OpType,
|
||||
db: &str,
|
||||
collection: &str,
|
||||
document_id: &str,
|
||||
document: Option<Document>,
|
||||
) -> u64 {
|
||||
let seq = self.next_seq.fetch_add(1, Ordering::SeqCst);
|
||||
let entry = OpLogEntry {
|
||||
seq,
|
||||
timestamp_ms: SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis() as i64,
|
||||
op,
|
||||
db: db.to_string(),
|
||||
collection: collection.to_string(),
|
||||
document_id: document_id.to_string(),
|
||||
document,
|
||||
};
|
||||
self.entries.insert(seq, entry);
|
||||
seq
|
||||
}
|
||||
|
||||
/// Get all entries with sequence number >= `since`.
|
||||
pub fn entries_since(&self, since: u64) -> Vec<OpLogEntry> {
|
||||
let mut result: Vec<_> = self
|
||||
.entries
|
||||
.iter()
|
||||
.filter(|e| *e.key() >= since)
|
||||
.map(|e| e.value().clone())
|
||||
.collect();
|
||||
result.sort_by_key(|e| e.seq);
|
||||
result
|
||||
}
|
||||
|
||||
/// Get the current (latest) sequence number. Returns 0 if empty.
|
||||
pub fn current_seq(&self) -> u64 {
|
||||
self.next_seq.load(Ordering::SeqCst).saturating_sub(1)
|
||||
}
|
||||
|
||||
/// Clear all entries.
|
||||
pub fn clear(&self) {
|
||||
self.entries.clear();
|
||||
self.next_seq.store(1, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
/// Number of entries in the log.
|
||||
pub fn len(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
|
||||
/// Whether the log is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.entries.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OpLog {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
186
rust/crates/rustdb-storage/src/wal.rs
Normal file
186
rust/crates/rustdb-storage/src/wal.rs
Normal file
@@ -0,0 +1,186 @@
|
||||
//! Write-Ahead Log (WAL) for crash recovery.
|
||||
//!
|
||||
//! Before any mutation is applied to storage, it is first written to the WAL.
|
||||
//! On recovery, uncommitted WAL entries can be replayed or discarded.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
use bson::Document;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::error::StorageResult;
|
||||
|
||||
/// WAL operation kind.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum WalOp {
|
||||
Insert,
|
||||
Update,
|
||||
Delete,
|
||||
}
|
||||
|
||||
/// A single WAL record.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WalRecord {
|
||||
/// Sequence number.
|
||||
pub seq: u64,
|
||||
/// Operation kind.
|
||||
pub op: WalOp,
|
||||
/// Database name.
|
||||
pub db: String,
|
||||
/// Collection name.
|
||||
pub collection: String,
|
||||
/// Document id (hex string).
|
||||
pub document_id: String,
|
||||
/// Document data (for insert/update).
|
||||
pub document: Option<Document>,
|
||||
/// Whether this record has been committed (applied to storage).
|
||||
pub committed: bool,
|
||||
/// CRC32 checksum of the serialized payload for integrity verification.
|
||||
pub checksum: u32,
|
||||
}
|
||||
|
||||
/// Write-ahead log that persists records to a file.
|
||||
pub struct WriteAheadLog {
|
||||
path: PathBuf,
|
||||
next_seq: AtomicU64,
|
||||
}
|
||||
|
||||
impl WriteAheadLog {
|
||||
/// Create a new WAL at the given file path.
|
||||
pub fn new(path: PathBuf) -> Self {
|
||||
Self {
|
||||
path,
|
||||
next_seq: AtomicU64::new(1),
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize the WAL (create file if needed, load sequence counter).
|
||||
pub async fn initialize(&self) -> StorageResult<()> {
|
||||
if let Some(parent) = self.path.parent() {
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
if self.path.exists() {
|
||||
// Load existing records to find the max sequence number.
|
||||
let records = self.read_all().await?;
|
||||
if let Some(max_seq) = records.iter().map(|r| r.seq).max() {
|
||||
self.next_seq.store(max_seq + 1, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
debug!("WAL initialized at {:?}", self.path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Append a record to the WAL. Returns the sequence number.
|
||||
pub async fn append(
|
||||
&self,
|
||||
op: WalOp,
|
||||
db: &str,
|
||||
collection: &str,
|
||||
document_id: &str,
|
||||
document: Option<Document>,
|
||||
) -> StorageResult<u64> {
|
||||
let seq = self.next_seq.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
// Compute checksum over the payload.
|
||||
let payload = serde_json::json!({
|
||||
"op": op,
|
||||
"db": db,
|
||||
"collection": collection,
|
||||
"document_id": document_id,
|
||||
});
|
||||
let payload_bytes = serde_json::to_vec(&payload)?;
|
||||
let checksum = crc32fast::hash(&payload_bytes);
|
||||
|
||||
let record = WalRecord {
|
||||
seq,
|
||||
op,
|
||||
db: db.to_string(),
|
||||
collection: collection.to_string(),
|
||||
document_id: document_id.to_string(),
|
||||
document,
|
||||
committed: false,
|
||||
checksum,
|
||||
};
|
||||
|
||||
let line = serde_json::to_string(&record)?;
|
||||
let mut file = tokio::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&self.path)
|
||||
.await?;
|
||||
file.write_all(line.as_bytes()).await?;
|
||||
file.write_all(b"\n").await?;
|
||||
file.flush().await?;
|
||||
|
||||
Ok(seq)
|
||||
}
|
||||
|
||||
/// Mark a WAL record as committed by rewriting the file.
|
||||
pub async fn mark_committed(&self, seq: u64) -> StorageResult<()> {
|
||||
let mut records = self.read_all().await?;
|
||||
for record in &mut records {
|
||||
if record.seq == seq {
|
||||
record.committed = true;
|
||||
}
|
||||
}
|
||||
self.write_all(&records).await
|
||||
}
|
||||
|
||||
/// Read all WAL records.
|
||||
pub async fn read_all(&self) -> StorageResult<Vec<WalRecord>> {
|
||||
if !self.path.exists() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
let data = tokio::fs::read_to_string(&self.path).await?;
|
||||
let mut records = Vec::new();
|
||||
for line in data.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
match serde_json::from_str::<WalRecord>(line) {
|
||||
Ok(record) => records.push(record),
|
||||
Err(e) => {
|
||||
warn!("skipping corrupt WAL record: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
/// Get all uncommitted records (for replay during recovery).
|
||||
pub async fn uncommitted(&self) -> StorageResult<Vec<WalRecord>> {
|
||||
let records = self.read_all().await?;
|
||||
Ok(records.into_iter().filter(|r| !r.committed).collect())
|
||||
}
|
||||
|
||||
/// Truncate the WAL, removing all committed records.
|
||||
pub async fn truncate_committed(&self) -> StorageResult<()> {
|
||||
let records = self.read_all().await?;
|
||||
let uncommitted: Vec<_> = records.into_iter().filter(|r| !r.committed).collect();
|
||||
self.write_all(&uncommitted).await
|
||||
}
|
||||
|
||||
/// Clear the entire WAL.
|
||||
pub async fn clear(&self) -> StorageResult<()> {
|
||||
if self.path.exists() {
|
||||
tokio::fs::write(&self.path, "").await?;
|
||||
}
|
||||
self.next_seq.store(1, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write all records to the WAL file (overwrites).
|
||||
async fn write_all(&self, records: &[WalRecord]) -> StorageResult<()> {
|
||||
let mut content = String::new();
|
||||
for record in records {
|
||||
let line = serde_json::to_string(record)?;
|
||||
content.push_str(&line);
|
||||
content.push('\n');
|
||||
}
|
||||
tokio::fs::write(&self.path, content).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user