use std::collections::HashSet; use std::path::PathBuf; use async_trait::async_trait; use bson::{doc, oid::ObjectId, Document}; use tracing::debug; use crate::adapter::StorageAdapter; use crate::error::{StorageError, StorageResult}; /// File-based storage adapter. Each collection is stored as a JSON file: /// `{base_path}/{db}/{coll}.json` /// Index metadata lives alongside: /// `{base_path}/{db}/{coll}.indexes.json` pub struct FileStorageAdapter { base_path: PathBuf, } impl FileStorageAdapter { pub fn new(base_path: impl Into) -> Self { Self { base_path: base_path.into(), } } fn db_dir(&self, db: &str) -> PathBuf { self.base_path.join(db) } fn coll_path(&self, db: &str, coll: &str) -> PathBuf { self.db_dir(db).join(format!("{coll}.json")) } fn index_path(&self, db: &str, coll: &str) -> PathBuf { self.db_dir(db).join(format!("{coll}.indexes.json")) } /// Read all documents from a collection file. Returns empty vec if file doesn't exist. async fn read_docs(&self, db: &str, coll: &str) -> StorageResult> { let path = self.coll_path(db, coll); if !path.exists() { return Err(StorageError::NotFound(format!( "collection '{db}.{coll}'" ))); } let data = tokio::fs::read_to_string(&path).await?; let json_docs: Vec = serde_json::from_str(&data)?; let mut docs = Vec::with_capacity(json_docs.len()); for jv in json_docs { let bson_val: bson::Bson = serde_json::from_value(jv) .map_err(|e| StorageError::SerializationError(e.to_string()))?; let doc = bson_val .as_document() .ok_or_else(|| StorageError::SerializationError("expected document".into()))? .clone(); docs.push(doc); } Ok(docs) } /// Write all documents to a collection file. async fn write_docs(&self, db: &str, coll: &str, docs: &[Document]) -> StorageResult<()> { let path = self.coll_path(db, coll); if let Some(parent) = path.parent() { tokio::fs::create_dir_all(parent).await?; } let json_vals: Vec = docs .iter() .map(|d| { let b = bson::to_bson(d) .map_err(|e| StorageError::SerializationError(e.to_string()))?; serde_json::to_value(&b) .map_err(|e| StorageError::SerializationError(e.to_string())) }) .collect::>>()?; let json = serde_json::to_string_pretty(&json_vals)?; tokio::fs::write(&path, json).await?; Ok(()) } /// Read index specs from the indexes file. async fn read_indexes(&self, db: &str, coll: &str) -> StorageResult> { let path = self.index_path(db, coll); if !path.exists() { return Ok(vec![]); } let data = tokio::fs::read_to_string(&path).await?; let json_vals: Vec = serde_json::from_str(&data)?; let mut docs = Vec::new(); for jv in json_vals { let bson_val: bson::Bson = serde_json::from_value(jv) .map_err(|e| StorageError::SerializationError(e.to_string()))?; let doc = bson_val .as_document() .ok_or_else(|| StorageError::SerializationError("expected document".into()))? .clone(); docs.push(doc); } Ok(docs) } /// Write index specs to the indexes file. async fn write_indexes(&self, db: &str, coll: &str, specs: &[Document]) -> StorageResult<()> { let path = self.index_path(db, coll); if let Some(parent) = path.parent() { tokio::fs::create_dir_all(parent).await?; } let json_vals: Vec = specs .iter() .map(|d| { let b = bson::to_bson(d) .map_err(|e| StorageError::SerializationError(e.to_string()))?; serde_json::to_value(&b) .map_err(|e| StorageError::SerializationError(e.to_string())) }) .collect::>>()?; let json = serde_json::to_string_pretty(&json_vals)?; tokio::fs::write(&path, json).await?; Ok(()) } fn extract_id_hex(doc: &Document) -> StorageResult { match doc.get("_id") { Some(bson::Bson::ObjectId(oid)) => Ok(oid.to_hex()), _ => Err(StorageError::NotFound("document missing _id".into())), } } } #[async_trait] impl StorageAdapter for FileStorageAdapter { async fn initialize(&self) -> StorageResult<()> { tokio::fs::create_dir_all(&self.base_path).await?; debug!("FileStorageAdapter initialized at {:?}", self.base_path); Ok(()) } async fn close(&self) -> StorageResult<()> { debug!("FileStorageAdapter closed"); Ok(()) } // ---- database ---- async fn list_databases(&self) -> StorageResult> { let mut dbs = Vec::new(); let mut entries = tokio::fs::read_dir(&self.base_path).await?; while let Some(entry) = entries.next_entry().await? { if entry.file_type().await?.is_dir() { if let Some(name) = entry.file_name().to_str() { dbs.push(name.to_string()); } } } Ok(dbs) } async fn create_database(&self, db: &str) -> StorageResult<()> { let dir = self.db_dir(db); if dir.exists() { return Err(StorageError::AlreadyExists(format!("database '{db}'"))); } tokio::fs::create_dir_all(&dir).await?; Ok(()) } async fn drop_database(&self, db: &str) -> StorageResult<()> { let dir = self.db_dir(db); if dir.exists() { tokio::fs::remove_dir_all(&dir).await?; } Ok(()) } async fn database_exists(&self, db: &str) -> StorageResult { Ok(self.db_dir(db).exists()) } // ---- collection ---- async fn list_collections(&self, db: &str) -> StorageResult> { let dir = self.db_dir(db); if !dir.exists() { return Err(StorageError::NotFound(format!("database '{db}'"))); } let mut colls = Vec::new(); let mut entries = tokio::fs::read_dir(&dir).await?; while let Some(entry) = entries.next_entry().await? { if let Some(name) = entry.file_name().to_str() { if name.ends_with(".json") && !name.ends_with(".indexes.json") { colls.push(name.trim_end_matches(".json").to_string()); } } } Ok(colls) } async fn create_collection(&self, db: &str, coll: &str) -> StorageResult<()> { let path = self.coll_path(db, coll); if path.exists() { return Err(StorageError::AlreadyExists(format!( "collection '{db}.{coll}'" ))); } // Ensure db dir exists. tokio::fs::create_dir_all(self.db_dir(db)).await?; // Write empty array. self.write_docs(db, coll, &[]).await?; // Write default _id index. let idx_spec = doc! { "name": "_id_", "key": { "_id": 1 } }; self.write_indexes(db, coll, &[idx_spec]).await?; Ok(()) } async fn drop_collection(&self, db: &str, coll: &str) -> StorageResult<()> { let path = self.coll_path(db, coll); if path.exists() { tokio::fs::remove_file(&path).await?; } let idx_path = self.index_path(db, coll); if idx_path.exists() { tokio::fs::remove_file(&idx_path).await?; } Ok(()) } async fn collection_exists(&self, db: &str, coll: &str) -> StorageResult { Ok(self.coll_path(db, coll).exists()) } async fn rename_collection( &self, db: &str, old_name: &str, new_name: &str, ) -> StorageResult<()> { let old_path = self.coll_path(db, old_name); let new_path = self.coll_path(db, new_name); if !old_path.exists() { return Err(StorageError::NotFound(format!( "collection '{db}.{old_name}'" ))); } if new_path.exists() { return Err(StorageError::AlreadyExists(format!( "collection '{db}.{new_name}'" ))); } tokio::fs::rename(&old_path, &new_path).await?; // Rename index file too. let old_idx = self.index_path(db, old_name); let new_idx = self.index_path(db, new_name); if old_idx.exists() { tokio::fs::rename(&old_idx, &new_idx).await?; } Ok(()) } // ---- document writes ---- async fn insert_one( &self, db: &str, coll: &str, mut doc: Document, ) -> StorageResult { if !doc.contains_key("_id") { doc.insert("_id", ObjectId::new()); } let id = Self::extract_id_hex(&doc)?; let mut docs = self.read_docs(db, coll).await?; // Check for duplicate. for existing in &docs { if Self::extract_id_hex(existing)? == id { return Err(StorageError::AlreadyExists(format!("document '{id}'"))); } } docs.push(doc); self.write_docs(db, coll, &docs).await?; Ok(id) } async fn insert_many( &self, db: &str, coll: &str, mut new_docs: Vec, ) -> StorageResult> { let mut docs = self.read_docs(db, coll).await?; let mut ids = Vec::with_capacity(new_docs.len()); for doc in &mut new_docs { if !doc.contains_key("_id") { doc.insert("_id", ObjectId::new()); } ids.push(Self::extract_id_hex(doc)?); } docs.extend(new_docs); self.write_docs(db, coll, &docs).await?; Ok(ids) } async fn update_by_id( &self, db: &str, coll: &str, id: &str, doc: Document, ) -> StorageResult<()> { let mut docs = self.read_docs(db, coll).await?; let mut found = false; for existing in &mut docs { if Self::extract_id_hex(existing)? == id { *existing = doc.clone(); found = true; break; } } if !found { return Err(StorageError::NotFound(format!("document '{id}'"))); } self.write_docs(db, coll, &docs).await?; Ok(()) } async fn delete_by_id( &self, db: &str, coll: &str, id: &str, ) -> StorageResult<()> { let mut docs = self.read_docs(db, coll).await?; let len_before = docs.len(); docs.retain(|d| Self::extract_id_hex(d).map(|i| i != id).unwrap_or(true)); if docs.len() == len_before { return Err(StorageError::NotFound(format!("document '{id}'"))); } self.write_docs(db, coll, &docs).await?; Ok(()) } async fn delete_by_ids( &self, db: &str, coll: &str, ids: &[String], ) -> StorageResult<()> { let id_set: HashSet<&str> = ids.iter().map(|s| s.as_str()).collect(); let mut docs = self.read_docs(db, coll).await?; docs.retain(|d| { Self::extract_id_hex(d) .map(|i| !id_set.contains(i.as_str())) .unwrap_or(true) }); self.write_docs(db, coll, &docs).await?; Ok(()) } // ---- document reads ---- async fn find_all( &self, db: &str, coll: &str, ) -> StorageResult> { self.read_docs(db, coll).await } async fn find_by_ids( &self, db: &str, coll: &str, ids: HashSet, ) -> StorageResult> { let docs = self.read_docs(db, coll).await?; Ok(docs .into_iter() .filter(|d| { Self::extract_id_hex(d) .map(|i| ids.contains(&i)) .unwrap_or(false) }) .collect()) } async fn find_by_id( &self, db: &str, coll: &str, id: &str, ) -> StorageResult> { let docs = self.read_docs(db, coll).await?; Ok(docs .into_iter() .find(|d| Self::extract_id_hex(d).map(|i| i == id).unwrap_or(false))) } async fn count( &self, db: &str, coll: &str, ) -> StorageResult { let docs = self.read_docs(db, coll).await?; Ok(docs.len() as u64) } // ---- indexes ---- async fn save_index( &self, db: &str, coll: &str, name: &str, spec: Document, ) -> StorageResult<()> { let mut indexes = self.read_indexes(db, coll).await?; indexes.retain(|s| s.get_str("name").unwrap_or("") != name); let mut full_spec = spec; full_spec.insert("name", name); indexes.push(full_spec); self.write_indexes(db, coll, &indexes).await } async fn get_indexes( &self, db: &str, coll: &str, ) -> StorageResult> { self.read_indexes(db, coll).await } async fn drop_index( &self, db: &str, coll: &str, name: &str, ) -> StorageResult<()> { let mut indexes = self.read_indexes(db, coll).await?; let before = indexes.len(); indexes.retain(|s| s.get_str("name").unwrap_or("") != name); if indexes.len() == before { return Err(StorageError::NotFound(format!("index '{name}'"))); } self.write_indexes(db, coll, &indexes).await } // ---- snapshot / conflict detection ---- // File adapter doesn't track per-document timestamps, so conflict detection // is a no-op (always returns false). async fn create_snapshot( &self, _db: &str, _coll: &str, ) -> StorageResult { use std::time::{SystemTime, UNIX_EPOCH}; Ok(SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap() .as_millis() as i64) } async fn has_conflicts( &self, _db: &str, _coll: &str, _ids: &HashSet, _snapshot_time: i64, ) -> StorageResult { // File adapter does not track modification timestamps per document. Ok(false) } }