feat: initial implementation of content-addressed incremental backup engine
Rust-centric architecture with TypeScript facade following smartproxy/smartstorage pattern. Core engine in Rust (FastCDC chunking, SHA-256, gzip, AES-256-GCM + Argon2id, binary pack files, global index, snapshots, locking, verification, pruning, repair). TypeScript provides npm interface via @push.rocks/smartrust RustBridge IPC with Unix socket streaming for ingest/restore. All 14 integration tests pass.
This commit is contained in:
28
.gitignore
vendored
Normal file
28
.gitignore
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
.nogit/
|
||||
|
||||
# artifacts
|
||||
coverage/
|
||||
public/
|
||||
pages/
|
||||
|
||||
# installs
|
||||
node_modules/
|
||||
|
||||
# caches
|
||||
.yarn/
|
||||
.cache/
|
||||
.rpt2_cache
|
||||
|
||||
# builds
|
||||
dist/
|
||||
dist_*/
|
||||
|
||||
# AI
|
||||
.claude/
|
||||
.serena/
|
||||
|
||||
# Rust
|
||||
rust/target/
|
||||
|
||||
#------# custom
|
||||
.playwright-mcp/
|
||||
21
license
Normal file
21
license
Normal file
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2026 Lossless GmbH
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
40
npmextra.json
Normal file
40
npmextra.json
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"@git.zone/cli": {
|
||||
"projectType": "npm",
|
||||
"module": {
|
||||
"githost": "code.foss.global",
|
||||
"gitscope": "push.rocks",
|
||||
"gitrepo": "containerarchive",
|
||||
"description": "content-addressed incremental backup engine with deduplication, encryption, and error correction",
|
||||
"npmPackagename": "@push.rocks/containerarchive",
|
||||
"license": "MIT",
|
||||
"keywords": [
|
||||
"backup",
|
||||
"deduplication",
|
||||
"content-addressed",
|
||||
"incremental",
|
||||
"archive",
|
||||
"encryption",
|
||||
"chunking",
|
||||
"fastcdc",
|
||||
"pack-files"
|
||||
]
|
||||
},
|
||||
"release": {
|
||||
"registries": [
|
||||
"https://verdaccio.lossless.digital",
|
||||
"https://registry.npmjs.org"
|
||||
],
|
||||
"accessLevel": "public"
|
||||
}
|
||||
},
|
||||
"@git.zone/tsrust": {
|
||||
"targets": [
|
||||
"linux_amd64",
|
||||
"linux_arm64"
|
||||
]
|
||||
},
|
||||
"@git.zone/tsdoc": {
|
||||
"legal": "\n## License and Legal Information\n\nThis module is part of the @push.rocks ecosystem, maintained by Task Venture Capital GmbH.\n\nLicensed under MIT. See LICENSE file for details.\n\nFor questions or commercial licensing, contact: hello@task.vc\n"
|
||||
}
|
||||
}
|
||||
59
package.json
Normal file
59
package.json
Normal file
@@ -0,0 +1,59 @@
|
||||
{
|
||||
"name": "@push.rocks/containerarchive",
|
||||
"version": "0.0.1",
|
||||
"private": false,
|
||||
"description": "content-addressed incremental backup engine with deduplication, encryption, and error correction",
|
||||
"main": "dist_ts/index.js",
|
||||
"typings": "dist_ts/index.d.ts",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "(tstest test/ --verbose --timeout 60)",
|
||||
"build": "(tsrust && tsbuild tsfolders --allowimplicitany)"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://code.foss.global/push.rocks/containerarchive.git"
|
||||
},
|
||||
"author": "Lossless GmbH",
|
||||
"license": "MIT",
|
||||
"bugs": {
|
||||
"url": "https://code.foss.global/push.rocks/containerarchive/issues"
|
||||
},
|
||||
"homepage": "https://code.foss.global/push.rocks/containerarchive",
|
||||
"dependencies": {
|
||||
"@push.rocks/lik": "^6.0.0",
|
||||
"@push.rocks/smartpromise": "^4.0.0",
|
||||
"@push.rocks/smartrust": "^1.3.2",
|
||||
"@push.rocks/smartrx": "^3.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@git.zone/tsbuild": "^2.0.0",
|
||||
"@git.zone/tsrun": "^1.0.0",
|
||||
"@git.zone/tstest": "^1.0.0",
|
||||
"@git.zone/tsrust": "^1.3.0",
|
||||
"@types/node": "^22.0.0"
|
||||
},
|
||||
"files": [
|
||||
"ts/**/*",
|
||||
"dist/**/*",
|
||||
"dist_*/**/*",
|
||||
"dist_ts/**/*",
|
||||
"assets/**/*",
|
||||
"npmextra.json",
|
||||
"readme.md"
|
||||
],
|
||||
"browserslist": [
|
||||
"last 1 chrome versions"
|
||||
],
|
||||
"keywords": [
|
||||
"backup",
|
||||
"deduplication",
|
||||
"content-addressed",
|
||||
"incremental",
|
||||
"archive",
|
||||
"encryption",
|
||||
"chunking",
|
||||
"fastcdc",
|
||||
"pack-files"
|
||||
]
|
||||
}
|
||||
9705
pnpm-lock.yaml
generated
Normal file
9705
pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
2
rust/.cargo/config.toml
Normal file
2
rust/.cargo/config.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
[target.aarch64-unknown-linux-gnu]
|
||||
linker = "aarch64-linux-gnu-gcc"
|
||||
1388
rust/Cargo.lock
generated
Normal file
1388
rust/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
47
rust/Cargo.toml
Normal file
47
rust/Cargo.toml
Normal file
@@ -0,0 +1,47 @@
|
||||
[package]
|
||||
name = "containerarchive"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "containerarchive"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
# Async runtime
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
|
||||
# Serialization
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
|
||||
# CLI
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
|
||||
# Cryptography
|
||||
sha2 = "0.10"
|
||||
aes-gcm = "0.10"
|
||||
argon2 = "0.5"
|
||||
|
||||
# Compression
|
||||
flate2 = "1"
|
||||
|
||||
# Utilities
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
hex = "0.4"
|
||||
rand = "0.8"
|
||||
byteorder = "1"
|
||||
|
||||
# Error handling
|
||||
thiserror = "2"
|
||||
anyhow = "1"
|
||||
|
||||
# Logging
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
lto = true
|
||||
strip = true
|
||||
236
rust/src/chunker.rs
Normal file
236
rust/src/chunker.rs
Normal file
@@ -0,0 +1,236 @@
|
||||
/// FastCDC content-defined chunking implementation.
|
||||
///
|
||||
/// Uses a gear-based rolling hash to find chunk boundaries determined by content.
|
||||
/// This ensures that insertions/deletions only affect nearby chunk boundaries,
|
||||
/// enabling high dedup ratios across incremental backups.
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{SeedableRng, Rng};
|
||||
|
||||
/// Pre-computed gear hash table (256 random u64 values).
|
||||
/// Generated deterministically from a fixed seed for reproducibility.
|
||||
fn gear_table() -> [u64; 256] {
|
||||
let mut table = [0u64; 256];
|
||||
let mut rng = StdRng::seed_from_u64(0x5A7BC1E3D9F04B62);
|
||||
for entry in table.iter_mut() {
|
||||
*entry = rng.gen();
|
||||
}
|
||||
table
|
||||
}
|
||||
|
||||
/// Lazy-initialized gear table.
|
||||
static GEAR_TABLE: std::sync::LazyLock<[u64; 256]> = std::sync::LazyLock::new(gear_table);
|
||||
|
||||
/// A chunk boundary found by FastCDC.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ChunkBoundary {
|
||||
pub offset: usize,
|
||||
pub length: usize,
|
||||
}
|
||||
|
||||
/// FastCDC chunker with configurable min/avg/max sizes.
|
||||
pub struct FastCdc {
|
||||
min_size: usize,
|
||||
avg_size: usize,
|
||||
max_size: usize,
|
||||
mask_s: u64, // "small" mask — more bits set, harder to match (used below avg)
|
||||
mask_l: u64, // "large" mask — fewer bits set, easier to match (used above avg)
|
||||
}
|
||||
|
||||
impl FastCdc {
|
||||
pub fn new(min_size: usize, avg_size: usize, max_size: usize) -> Self {
|
||||
// Compute masks based on avg size.
|
||||
// mask_s has more bits set (harder to trigger) — used when chunk < avg
|
||||
// mask_l has fewer bits set (easier to trigger) — used when chunk > avg
|
||||
let bits = (avg_size as f64).log2().round() as u32;
|
||||
let mask_s = (1u64 << (bits + 1)) - 1;
|
||||
let mask_l = (1u64 << (bits - 1)) - 1;
|
||||
|
||||
Self {
|
||||
min_size,
|
||||
avg_size,
|
||||
max_size,
|
||||
mask_s,
|
||||
mask_l,
|
||||
}
|
||||
}
|
||||
|
||||
/// Find all chunk boundaries in the given data.
|
||||
pub fn chunk_data(&self, data: &[u8]) -> Vec<ChunkBoundary> {
|
||||
let mut boundaries = Vec::new();
|
||||
let mut offset = 0;
|
||||
let len = data.len();
|
||||
|
||||
while offset < len {
|
||||
let remaining = len - offset;
|
||||
if remaining <= self.min_size {
|
||||
// Final chunk: whatever remains
|
||||
boundaries.push(ChunkBoundary {
|
||||
offset,
|
||||
length: remaining,
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
let chunk_len = self.find_boundary(&data[offset..]);
|
||||
boundaries.push(ChunkBoundary {
|
||||
offset,
|
||||
length: chunk_len,
|
||||
});
|
||||
offset += chunk_len;
|
||||
}
|
||||
|
||||
boundaries
|
||||
}
|
||||
|
||||
/// Find the next chunk boundary starting from the beginning of `data`.
|
||||
/// Returns the length of the chunk.
|
||||
fn find_boundary(&self, data: &[u8]) -> usize {
|
||||
let len = data.len();
|
||||
if len <= self.min_size {
|
||||
return len;
|
||||
}
|
||||
|
||||
let gear = &*GEAR_TABLE;
|
||||
let mut hash: u64 = 0;
|
||||
|
||||
// Skip min_size bytes (no boundary can occur before min)
|
||||
let start = self.min_size;
|
||||
let mid = std::cmp::min(self.avg_size, len);
|
||||
let end = std::cmp::min(self.max_size, len);
|
||||
|
||||
// Phase 1: from min to avg, use mask_s (harder to match)
|
||||
for i in start..mid {
|
||||
hash = (hash << 1).wrapping_add(gear[data[i] as usize]);
|
||||
if hash & self.mask_s == 0 {
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: from avg to max, use mask_l (easier to match)
|
||||
for i in mid..end {
|
||||
hash = (hash << 1).wrapping_add(gear[data[i] as usize]);
|
||||
if hash & self.mask_l == 0 {
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// No boundary found before max — cut at max
|
||||
end
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for FastCdc {
|
||||
fn default() -> Self {
|
||||
Self::new(65536, 262144, 1048576)
|
||||
}
|
||||
}
|
||||
|
||||
/// Streaming chunker that accumulates data from multiple reads
|
||||
/// and yields chunks as they are found.
|
||||
pub struct StreamingChunker {
|
||||
cdc: FastCdc,
|
||||
buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl StreamingChunker {
|
||||
pub fn new(cdc: FastCdc) -> Self {
|
||||
Self {
|
||||
cdc,
|
||||
buffer: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Feed data into the chunker. Returns any complete chunks found.
|
||||
pub fn feed(&mut self, data: &[u8]) -> Vec<Vec<u8>> {
|
||||
self.buffer.extend_from_slice(data);
|
||||
let mut chunks = Vec::new();
|
||||
|
||||
loop {
|
||||
if self.buffer.len() <= self.cdc.min_size {
|
||||
break;
|
||||
}
|
||||
|
||||
let boundary = self.cdc.find_boundary(&self.buffer);
|
||||
if boundary >= self.buffer.len() && self.buffer.len() < self.cdc.max_size {
|
||||
// No boundary found and we haven't hit max — need more data
|
||||
break;
|
||||
}
|
||||
|
||||
let chunk: Vec<u8> = self.buffer.drain(..boundary).collect();
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
/// Finalize: return any remaining data as the last chunk.
|
||||
pub fn finalize(&mut self) -> Option<Vec<u8>> {
|
||||
if self.buffer.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(std::mem::take(&mut self.buffer))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_chunk_sizes_within_bounds() {
|
||||
let cdc = FastCdc::new(1024, 4096, 16384);
|
||||
let data: Vec<u8> = (0..100_000u32).map(|i| (i % 256) as u8).collect();
|
||||
let chunks = cdc.chunk_data(&data);
|
||||
|
||||
let total: usize = chunks.iter().map(|c| c.length).sum();
|
||||
assert_eq!(total, data.len());
|
||||
|
||||
for (i, chunk) in chunks.iter().enumerate() {
|
||||
if i < chunks.len() - 1 {
|
||||
// Non-final chunks must be >= min and <= max
|
||||
assert!(chunk.length >= 1024, "Chunk {} too small: {}", i, chunk.length);
|
||||
assert!(chunk.length <= 16384, "Chunk {} too large: {}", i, chunk.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deterministic() {
|
||||
let cdc = FastCdc::new(1024, 4096, 16384);
|
||||
let data: Vec<u8> = (0..50_000u32).map(|i| (i % 256) as u8).collect();
|
||||
let chunks1 = cdc.chunk_data(&data);
|
||||
let chunks2 = cdc.chunk_data(&data);
|
||||
assert_eq!(chunks1.len(), chunks2.len());
|
||||
for (a, b) in chunks1.iter().zip(chunks2.iter()) {
|
||||
assert_eq!(a.offset, b.offset);
|
||||
assert_eq!(a.length, b.length);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_streaming_chunker() {
|
||||
let cdc = FastCdc::new(1024, 4096, 16384);
|
||||
let data: Vec<u8> = (0..100_000u32).map(|i| (i % 256) as u8).collect();
|
||||
|
||||
// Chunk with streaming in 8KB reads
|
||||
let mut streamer = StreamingChunker::new(FastCdc::new(1024, 4096, 16384));
|
||||
let mut stream_chunks: Vec<Vec<u8>> = Vec::new();
|
||||
for chunk in data.chunks(8192) {
|
||||
stream_chunks.extend(streamer.feed(chunk));
|
||||
}
|
||||
if let Some(last) = streamer.finalize() {
|
||||
stream_chunks.push(last);
|
||||
}
|
||||
|
||||
// Chunk in one shot
|
||||
let batch_boundaries = cdc.chunk_data(&data);
|
||||
|
||||
// Total bytes must match
|
||||
let stream_total: usize = stream_chunks.iter().map(|c| c.len()).sum();
|
||||
let batch_total: usize = batch_boundaries.iter().map(|c| c.length).sum();
|
||||
assert_eq!(stream_total, data.len());
|
||||
assert_eq!(batch_total, data.len());
|
||||
}
|
||||
}
|
||||
43
rust/src/compression.rs
Normal file
43
rust/src/compression.rs
Normal file
@@ -0,0 +1,43 @@
|
||||
use flate2::Compression;
|
||||
use flate2::read::{GzDecoder, GzEncoder};
|
||||
use std::io::Read;
|
||||
use crate::error::ArchiveError;
|
||||
|
||||
/// Gzip compress data.
|
||||
pub fn compress(data: &[u8]) -> Result<Vec<u8>, ArchiveError> {
|
||||
let mut encoder = GzEncoder::new(data, Compression::default());
|
||||
let mut compressed = Vec::new();
|
||||
encoder.read_to_end(&mut compressed)
|
||||
.map_err(|e| ArchiveError::Io(e))?;
|
||||
Ok(compressed)
|
||||
}
|
||||
|
||||
/// Gzip decompress data.
|
||||
pub fn decompress(data: &[u8]) -> Result<Vec<u8>, ArchiveError> {
|
||||
let mut decoder = GzDecoder::new(data);
|
||||
let mut decompressed = Vec::new();
|
||||
decoder.read_to_end(&mut decompressed)
|
||||
.map_err(|e| ArchiveError::Io(e))?;
|
||||
Ok(decompressed)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_roundtrip() {
|
||||
let data = b"Hello, this is test data for compression!";
|
||||
let compressed = compress(data).unwrap();
|
||||
let decompressed = decompress(&compressed).unwrap();
|
||||
assert_eq!(data.as_slice(), decompressed.as_slice());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compression_reduces_size() {
|
||||
// Highly compressible data
|
||||
let data = vec![b'A'; 10000];
|
||||
let compressed = compress(&data).unwrap();
|
||||
assert!(compressed.len() < data.len());
|
||||
}
|
||||
}
|
||||
97
rust/src/config.rs
Normal file
97
rust/src/config.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RepositoryConfig {
|
||||
pub version: u32,
|
||||
pub id: String,
|
||||
pub created_at: String,
|
||||
pub chunking: ChunkingConfig,
|
||||
pub compression: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub encryption: Option<EncryptionConfig>,
|
||||
pub pack_target_size: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ChunkingConfig {
|
||||
pub algorithm: String,
|
||||
pub min_size: u32,
|
||||
pub avg_size: u32,
|
||||
pub max_size: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct EncryptionConfig {
|
||||
pub algorithm: String,
|
||||
pub kdf: String,
|
||||
pub kdf_params: KdfParams,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct KdfParams {
|
||||
pub memory: u32,
|
||||
pub iterations: u32,
|
||||
pub parallelism: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct KeyFile {
|
||||
pub id: String,
|
||||
pub created_at: String,
|
||||
pub kdf: String,
|
||||
pub kdf_salt: String,
|
||||
pub kdf_params: KdfParams,
|
||||
pub encrypted_key: String,
|
||||
pub nonce: String,
|
||||
pub auth_tag: String,
|
||||
}
|
||||
|
||||
impl Default for ChunkingConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
algorithm: "fastcdc".to_string(),
|
||||
min_size: 65536, // 64 KB
|
||||
avg_size: 262144, // 256 KB
|
||||
max_size: 1048576, // 1 MB
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for KdfParams {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
memory: 262144, // 256 MB
|
||||
iterations: 3,
|
||||
parallelism: 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for EncryptionConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
algorithm: "aes-256-gcm".to_string(),
|
||||
kdf: "argon2id".to_string(),
|
||||
kdf_params: KdfParams::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RepositoryConfig {
|
||||
pub fn new(encryption: Option<EncryptionConfig>) -> Self {
|
||||
Self {
|
||||
version: 1,
|
||||
id: format!("repo-{}", uuid::Uuid::new_v4()),
|
||||
created_at: chrono::Utc::now().to_rfc3339(),
|
||||
chunking: ChunkingConfig::default(),
|
||||
compression: "gzip".to_string(),
|
||||
encryption,
|
||||
pack_target_size: 8 * 1024 * 1024, // 8 MB
|
||||
}
|
||||
}
|
||||
}
|
||||
179
rust/src/encryption.rs
Normal file
179
rust/src/encryption.rs
Normal file
@@ -0,0 +1,179 @@
|
||||
use aes_gcm::{Aes256Gcm, Key, Nonce};
|
||||
use aes_gcm::aead::{Aead, KeyInit};
|
||||
use argon2::Argon2;
|
||||
use rand::RngCore;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::config::KdfParams;
|
||||
use crate::error::ArchiveError;
|
||||
|
||||
/// Result of encrypting a chunk.
|
||||
pub struct EncryptedChunk {
|
||||
/// Ciphertext with appended 16-byte GCM auth tag
|
||||
pub ciphertext: Vec<u8>,
|
||||
/// 12-byte nonce used for this chunk
|
||||
pub nonce: [u8; 12],
|
||||
}
|
||||
|
||||
/// A wrapped (encrypted) master key stored in a key file.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WrappedKey {
|
||||
pub encrypted_key: Vec<u8>,
|
||||
pub nonce: [u8; 12],
|
||||
}
|
||||
|
||||
/// Derive a 32-byte key from a passphrase using Argon2id.
|
||||
pub fn derive_key(passphrase: &str, salt: &[u8], params: &KdfParams) -> Result<[u8; 32], ArchiveError> {
|
||||
let argon2 = Argon2::new(
|
||||
argon2::Algorithm::Argon2id,
|
||||
argon2::Version::V0x13,
|
||||
argon2::Params::new(
|
||||
params.memory,
|
||||
params.iterations,
|
||||
params.parallelism,
|
||||
Some(32),
|
||||
).map_err(|e| ArchiveError::Encryption(format!("Argon2 params error: {}", e)))?,
|
||||
);
|
||||
|
||||
let mut key = [0u8; 32];
|
||||
argon2.hash_password_into(passphrase.as_bytes(), salt, &mut key)
|
||||
.map_err(|e| ArchiveError::Encryption(format!("Argon2 derivation failed: {}", e)))?;
|
||||
|
||||
Ok(key)
|
||||
}
|
||||
|
||||
/// Generate a random 32-byte master key.
|
||||
pub fn generate_master_key() -> [u8; 32] {
|
||||
let mut key = [0u8; 32];
|
||||
rand::thread_rng().fill_bytes(&mut key);
|
||||
key
|
||||
}
|
||||
|
||||
/// Generate a random 16-byte salt.
|
||||
pub fn generate_salt() -> [u8; 16] {
|
||||
let mut salt = [0u8; 16];
|
||||
rand::thread_rng().fill_bytes(&mut salt);
|
||||
salt
|
||||
}
|
||||
|
||||
/// Generate a random 12-byte nonce.
|
||||
pub fn generate_nonce() -> [u8; 12] {
|
||||
let mut nonce = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut nonce);
|
||||
nonce
|
||||
}
|
||||
|
||||
/// Wrap (encrypt) a master key with a key-encryption-key derived from a passphrase.
|
||||
pub fn wrap_key(master_key: &[u8; 32], kek: &[u8; 32]) -> Result<WrappedKey, ArchiveError> {
|
||||
let cipher = Aes256Gcm::new(Key::<Aes256Gcm>::from_slice(kek));
|
||||
let nonce_bytes = generate_nonce();
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let ciphertext = cipher.encrypt(nonce, master_key.as_slice())
|
||||
.map_err(|e| ArchiveError::Encryption(format!("Key wrap failed: {}", e)))?;
|
||||
|
||||
Ok(WrappedKey {
|
||||
encrypted_key: ciphertext,
|
||||
nonce: nonce_bytes,
|
||||
})
|
||||
}
|
||||
|
||||
/// Unwrap (decrypt) a master key with a key-encryption-key.
|
||||
pub fn unwrap_key(wrapped: &WrappedKey, kek: &[u8; 32]) -> Result<[u8; 32], ArchiveError> {
|
||||
let cipher = Aes256Gcm::new(Key::<Aes256Gcm>::from_slice(kek));
|
||||
let nonce = Nonce::from_slice(&wrapped.nonce);
|
||||
|
||||
let plaintext = cipher.decrypt(nonce, wrapped.encrypted_key.as_slice())
|
||||
.map_err(|e| ArchiveError::Encryption(format!("Key unwrap failed: {}", e)))?;
|
||||
|
||||
if plaintext.len() != 32 {
|
||||
return Err(ArchiveError::Encryption("Unwrapped key has wrong length".to_string()));
|
||||
}
|
||||
|
||||
let mut key = [0u8; 32];
|
||||
key.copy_from_slice(&plaintext);
|
||||
Ok(key)
|
||||
}
|
||||
|
||||
/// Encrypt a chunk with AES-256-GCM.
|
||||
/// Returns ciphertext (which includes the 16-byte auth tag appended) and the nonce.
|
||||
pub fn encrypt_chunk(data: &[u8], key: &[u8; 32]) -> Result<EncryptedChunk, ArchiveError> {
|
||||
let cipher = Aes256Gcm::new(Key::<Aes256Gcm>::from_slice(key));
|
||||
let nonce_bytes = generate_nonce();
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let ciphertext = cipher.encrypt(nonce, data)
|
||||
.map_err(|e| ArchiveError::Encryption(format!("Chunk encryption failed: {}", e)))?;
|
||||
|
||||
Ok(EncryptedChunk {
|
||||
ciphertext,
|
||||
nonce: nonce_bytes,
|
||||
})
|
||||
}
|
||||
|
||||
/// Decrypt a chunk with AES-256-GCM.
|
||||
/// The ciphertext includes the 16-byte auth tag at the end.
|
||||
pub fn decrypt_chunk(ciphertext: &[u8], key: &[u8; 32], nonce: &[u8; 12]) -> Result<Vec<u8>, ArchiveError> {
|
||||
let cipher = Aes256Gcm::new(Key::<Aes256Gcm>::from_slice(key));
|
||||
let nonce = Nonce::from_slice(nonce);
|
||||
|
||||
let plaintext = cipher.decrypt(nonce, ciphertext)
|
||||
.map_err(|e| ArchiveError::Encryption(format!("Chunk decryption failed: {}", e)))?;
|
||||
|
||||
Ok(plaintext)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_encrypt_decrypt_roundtrip() {
|
||||
let key = generate_master_key();
|
||||
let data = b"Hello, encrypted world!";
|
||||
|
||||
let encrypted = encrypt_chunk(data, &key).unwrap();
|
||||
let decrypted = decrypt_chunk(&encrypted.ciphertext, &key, &encrypted.nonce).unwrap();
|
||||
|
||||
assert_eq!(data.as_slice(), decrypted.as_slice());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrong_key_fails() {
|
||||
let key1 = generate_master_key();
|
||||
let key2 = generate_master_key();
|
||||
let data = b"Secret data";
|
||||
|
||||
let encrypted = encrypt_chunk(data, &key1).unwrap();
|
||||
let result = decrypt_chunk(&encrypted.ciphertext, &key2, &encrypted.nonce);
|
||||
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_key_wrap_unwrap() {
|
||||
let master = generate_master_key();
|
||||
let kek = generate_master_key();
|
||||
|
||||
let wrapped = wrap_key(&master, &kek).unwrap();
|
||||
let unwrapped = unwrap_key(&wrapped, &kek).unwrap();
|
||||
|
||||
assert_eq!(master, unwrapped);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_derive_key_deterministic() {
|
||||
let salt = generate_salt();
|
||||
let params = KdfParams {
|
||||
memory: 1024, // small for test speed
|
||||
iterations: 1,
|
||||
parallelism: 1,
|
||||
};
|
||||
|
||||
let k1 = derive_key("password", &salt, ¶ms).unwrap();
|
||||
let k2 = derive_key("password", &salt, ¶ms).unwrap();
|
||||
assert_eq!(k1, k2);
|
||||
|
||||
let k3 = derive_key("different", &salt, ¶ms).unwrap();
|
||||
assert_ne!(k1, k3);
|
||||
}
|
||||
}
|
||||
37
rust/src/error.rs
Normal file
37
rust/src/error.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum ArchiveError {
|
||||
#[error("I/O error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("Configuration error: {0}")]
|
||||
Config(String),
|
||||
|
||||
#[error("Data corruption: {0}")]
|
||||
Corruption(String),
|
||||
|
||||
#[error("Encryption error: {0}")]
|
||||
Encryption(String),
|
||||
|
||||
#[error("Not found: {0}")]
|
||||
NotFound(String),
|
||||
|
||||
#[error("Repository is locked: {0}")]
|
||||
Locked(String),
|
||||
|
||||
#[error("Invalid repository: {0}")]
|
||||
InvalidRepo(String),
|
||||
|
||||
#[error("JSON error: {0}")]
|
||||
Json(#[from] serde_json::Error),
|
||||
|
||||
#[error("{0}")]
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl ArchiveError {
|
||||
pub fn to_error_string(&self) -> String {
|
||||
format!("{}", self)
|
||||
}
|
||||
}
|
||||
243
rust/src/global_index.rs
Normal file
243
rust/src/global_index.rs
Normal file
@@ -0,0 +1,243 @@
|
||||
/// Global index: maps chunk hashes to their physical location in pack files.
|
||||
///
|
||||
/// The index is stored as JSON segments in the `index/` directory and loaded
|
||||
/// into an in-memory HashMap for O(1) lookups during ingest and restore.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::error::ArchiveError;
|
||||
use crate::hasher;
|
||||
use crate::pack_reader;
|
||||
|
||||
/// An entry in the global index pointing to a chunk's location in a pack file.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexEntry {
|
||||
pub pack_id: String,
|
||||
pub offset: u64,
|
||||
pub compressed_size: u32,
|
||||
pub plaintext_size: u32,
|
||||
}
|
||||
|
||||
/// An index segment stored on disk.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct IndexSegment {
|
||||
segment_id: String,
|
||||
created_at: String,
|
||||
entries: HashMap<String, IndexEntry>,
|
||||
}
|
||||
|
||||
/// The global in-memory index.
|
||||
pub struct GlobalIndex {
|
||||
/// hash (hex string) -> IndexEntry
|
||||
entries: HashMap<String, IndexEntry>,
|
||||
/// New entries added since last save
|
||||
pending: HashMap<String, IndexEntry>,
|
||||
}
|
||||
|
||||
impl GlobalIndex {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
entries: HashMap::new(),
|
||||
pending: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Load the index from all segment files in the index/ directory.
|
||||
pub async fn load(repo_path: &str) -> Result<Self, ArchiveError> {
|
||||
let index_dir = Path::new(repo_path).join("index");
|
||||
let mut index = Self::new();
|
||||
|
||||
if !index_dir.exists() {
|
||||
return Ok(index);
|
||||
}
|
||||
|
||||
let mut dir = tokio::fs::read_dir(&index_dir).await?;
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|e| e.to_str()) == Some("json") {
|
||||
let data = tokio::fs::read_to_string(&path).await?;
|
||||
let segment: IndexSegment = serde_json::from_str(&data)
|
||||
.map_err(|e| ArchiveError::Corruption(format!(
|
||||
"Failed to parse index segment {}: {}",
|
||||
path.display(), e
|
||||
)))?;
|
||||
index.entries.extend(segment.entries);
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!("Loaded global index with {} entries", index.entries.len());
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
/// Check if a chunk hash exists in the index.
|
||||
pub fn has(&self, hash_hex: &str) -> bool {
|
||||
self.entries.contains_key(hash_hex) || self.pending.contains_key(hash_hex)
|
||||
}
|
||||
|
||||
/// Get an index entry by hash.
|
||||
pub fn get(&self, hash_hex: &str) -> Option<&IndexEntry> {
|
||||
self.pending.get(hash_hex).or_else(|| self.entries.get(hash_hex))
|
||||
}
|
||||
|
||||
/// Add a new entry to the pending set.
|
||||
pub fn add_entry(&mut self, hash_hex: String, entry: IndexEntry) {
|
||||
self.pending.insert(hash_hex, entry);
|
||||
}
|
||||
|
||||
/// Add multiple entries to the pending set.
|
||||
pub fn add_entries(&mut self, entries: HashMap<String, IndexEntry>) {
|
||||
self.pending.extend(entries);
|
||||
}
|
||||
|
||||
/// Save pending entries as a new index segment.
|
||||
pub async fn save_segment(&mut self, repo_path: &str) -> Result<(), ArchiveError> {
|
||||
if self.pending.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let index_dir = Path::new(repo_path).join("index");
|
||||
tokio::fs::create_dir_all(&index_dir).await?;
|
||||
|
||||
let segment_id = uuid::Uuid::new_v4().to_string().replace("-", "");
|
||||
let segment = IndexSegment {
|
||||
segment_id: segment_id.clone(),
|
||||
created_at: chrono::Utc::now().to_rfc3339(),
|
||||
entries: self.pending.clone(),
|
||||
};
|
||||
|
||||
let json = serde_json::to_string_pretty(&segment)?;
|
||||
let segment_path = index_dir.join(format!("{}.json", segment_id));
|
||||
let tmp_path = index_dir.join(format!("{}.json.tmp", segment_id));
|
||||
|
||||
tokio::fs::write(&tmp_path, json).await?;
|
||||
tokio::fs::rename(&tmp_path, &segment_path).await?;
|
||||
|
||||
// Move pending into main entries
|
||||
self.entries.extend(std::mem::take(&mut self.pending));
|
||||
|
||||
tracing::info!("Saved index segment {} ({} entries)", segment_id, self.entries.len());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compact all segments into a single merged segment.
|
||||
pub async fn compact(&mut self, repo_path: &str) -> Result<(), ArchiveError> {
|
||||
// First, ensure pending is merged
|
||||
self.entries.extend(std::mem::take(&mut self.pending));
|
||||
|
||||
let index_dir = Path::new(repo_path).join("index");
|
||||
|
||||
// Remove all existing segments
|
||||
if index_dir.exists() {
|
||||
let mut dir = tokio::fs::read_dir(&index_dir).await?;
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|e| e.to_str()) == Some("json") {
|
||||
tokio::fs::remove_file(&path).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write single merged segment
|
||||
if !self.entries.is_empty() {
|
||||
tokio::fs::create_dir_all(&index_dir).await?;
|
||||
|
||||
let segment_id = uuid::Uuid::new_v4().to_string().replace("-", "");
|
||||
let segment = IndexSegment {
|
||||
segment_id: segment_id.clone(),
|
||||
created_at: chrono::Utc::now().to_rfc3339(),
|
||||
entries: self.entries.clone(),
|
||||
};
|
||||
|
||||
let json = serde_json::to_string_pretty(&segment)?;
|
||||
let path = index_dir.join(format!("{}.json", segment_id));
|
||||
tokio::fs::write(&path, json).await?;
|
||||
|
||||
tracing::info!("Compacted index into single segment with {} entries", self.entries.len());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Rebuild the entire index by scanning all .idx files in packs/data/.
|
||||
pub async fn rebuild_from_packs(repo_path: &str) -> Result<Self, ArchiveError> {
|
||||
let mut index = Self::new();
|
||||
let packs_dir = Path::new(repo_path).join("packs").join("data");
|
||||
|
||||
if !packs_dir.exists() {
|
||||
return Ok(index);
|
||||
}
|
||||
|
||||
let idx_files = find_idx_files(&packs_dir).await?;
|
||||
tracing::info!("Rebuilding index from {} pack index files", idx_files.len());
|
||||
|
||||
for idx_path in &idx_files {
|
||||
let entries = pack_reader::load_idx(idx_path).await?;
|
||||
|
||||
// Extract pack_id from filename (e.g., "abcdef1234.idx" -> "abcdef1234")
|
||||
let pack_id = idx_path
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
for entry in entries {
|
||||
let hash_hex = hasher::hash_to_hex(&entry.content_hash);
|
||||
index.entries.insert(hash_hex, IndexEntry {
|
||||
pack_id: pack_id.clone(),
|
||||
offset: entry.offset,
|
||||
compressed_size: entry.compressed_size,
|
||||
plaintext_size: entry.plaintext_size,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!("Rebuilt index with {} entries", index.entries.len());
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
/// Get the total number of indexed chunks.
|
||||
pub fn len(&self) -> usize {
|
||||
self.entries.len() + self.pending.len()
|
||||
}
|
||||
|
||||
/// Remove entries for chunks in a specific pack.
|
||||
pub fn remove_pack_entries(&mut self, pack_id: &str) {
|
||||
self.entries.retain(|_, v| v.pack_id != pack_id);
|
||||
self.pending.retain(|_, v| v.pack_id != pack_id);
|
||||
}
|
||||
|
||||
/// Get all unique pack IDs referenced by the index.
|
||||
pub fn referenced_pack_ids(&self) -> std::collections::HashSet<String> {
|
||||
let mut ids: std::collections::HashSet<String> = self.entries.values()
|
||||
.map(|e| e.pack_id.clone())
|
||||
.collect();
|
||||
ids.extend(self.pending.values().map(|e| e.pack_id.clone()));
|
||||
ids
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively find all .idx files under a directory.
|
||||
async fn find_idx_files(dir: &Path) -> Result<Vec<PathBuf>, ArchiveError> {
|
||||
let mut files = Vec::new();
|
||||
let mut stack = vec![dir.to_path_buf()];
|
||||
|
||||
while let Some(current) = stack.pop() {
|
||||
if !current.exists() {
|
||||
continue;
|
||||
}
|
||||
let mut entries = tokio::fs::read_dir(¤t).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
stack.push(path);
|
||||
} else if path.extension().and_then(|e| e.to_str()) == Some("idx") {
|
||||
files.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
64
rust/src/hasher.rs
Normal file
64
rust/src/hasher.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
use sha2::{Sha256, Digest};
|
||||
|
||||
/// Compute SHA-256 hash of a chunk, returning raw 32-byte hash.
|
||||
pub fn hash_chunk(data: &[u8]) -> [u8; 32] {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
let result = hasher.finalize();
|
||||
let mut hash = [0u8; 32];
|
||||
hash.copy_from_slice(&result);
|
||||
hash
|
||||
}
|
||||
|
||||
/// Convert a 32-byte hash to a hex string.
|
||||
pub fn hash_to_hex(hash: &[u8; 32]) -> String {
|
||||
hex::encode(hash)
|
||||
}
|
||||
|
||||
/// Parse a hex string back to a 32-byte hash.
|
||||
pub fn hex_to_hash(hex_str: &str) -> Result<[u8; 32], hex::FromHexError> {
|
||||
let bytes = hex::decode(hex_str)?;
|
||||
let mut hash = [0u8; 32];
|
||||
if bytes.len() == 32 {
|
||||
hash.copy_from_slice(&bytes);
|
||||
Ok(hash)
|
||||
} else {
|
||||
Err(hex::FromHexError::InvalidStringLength)
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify that data matches an expected hash.
|
||||
pub fn verify_hash(data: &[u8], expected: &[u8; 32]) -> bool {
|
||||
let actual = hash_chunk(data);
|
||||
actual == *expected
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_hash_deterministic() {
|
||||
let data = b"hello world";
|
||||
let h1 = hash_chunk(data);
|
||||
let h2 = hash_chunk(data);
|
||||
assert_eq!(h1, h2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hash_to_hex_roundtrip() {
|
||||
let data = b"test data";
|
||||
let hash = hash_chunk(data);
|
||||
let hex_str = hash_to_hex(&hash);
|
||||
let parsed = hex_to_hash(&hex_str).unwrap();
|
||||
assert_eq!(hash, parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_verify_hash() {
|
||||
let data = b"verify me";
|
||||
let hash = hash_chunk(data);
|
||||
assert!(verify_hash(data, &hash));
|
||||
assert!(!verify_hash(b"different data", &hash));
|
||||
}
|
||||
}
|
||||
262
rust/src/ingest.rs
Normal file
262
rust/src/ingest.rs
Normal file
@@ -0,0 +1,262 @@
|
||||
/// Ingest pipeline: reads data from a Unix socket, chunks it with FastCDC,
|
||||
/// deduplicates, compresses, optionally encrypts, and writes to pack files.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::net::UnixStream;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::chunker::{FastCdc, StreamingChunker};
|
||||
use crate::compression;
|
||||
use crate::encryption;
|
||||
use crate::error::ArchiveError;
|
||||
use crate::global_index::IndexEntry;
|
||||
use crate::hasher;
|
||||
use crate::pack_writer::{PackWriter, FLAG_GZIP, FLAG_ENCRYPTED};
|
||||
use crate::repository::Repository;
|
||||
use crate::snapshot::{Snapshot, SnapshotItem, save_snapshot};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IngestItemOptions {
|
||||
pub name: String,
|
||||
#[serde(rename = "type", default = "default_item_type")]
|
||||
pub item_type: String,
|
||||
}
|
||||
|
||||
fn default_item_type() -> String {
|
||||
"data".to_string()
|
||||
}
|
||||
|
||||
/// Tracks a chunk that has been added to the current pack but not yet indexed.
|
||||
struct PendingChunk {
|
||||
hash_hex: String,
|
||||
offset: u64,
|
||||
compressed_size: u32,
|
||||
plaintext_size: u32,
|
||||
}
|
||||
|
||||
/// Run the ingest pipeline.
|
||||
pub async fn ingest(
|
||||
repo: &mut Repository,
|
||||
socket_path: &str,
|
||||
tags: HashMap<String, String>,
|
||||
items: Vec<IngestItemOptions>,
|
||||
) -> Result<Snapshot, ArchiveError> {
|
||||
// Acquire write lock
|
||||
repo.acquire_lock("ingest").await?;
|
||||
|
||||
let result = do_ingest(repo, socket_path, tags, items).await;
|
||||
|
||||
// Always release lock
|
||||
repo.release_lock().await?;
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
async fn do_ingest(
|
||||
repo: &mut Repository,
|
||||
socket_path: &str,
|
||||
tags: HashMap<String, String>,
|
||||
items: Vec<IngestItemOptions>,
|
||||
) -> Result<Snapshot, ArchiveError> {
|
||||
// Connect to the Unix socket where TypeScript is writing the data
|
||||
let mut stream = UnixStream::connect(socket_path).await
|
||||
.map_err(|e| ArchiveError::Io(e))?;
|
||||
|
||||
tracing::info!("Connected to ingest socket: {}", socket_path);
|
||||
|
||||
// Set up chunker
|
||||
let cdc = FastCdc::new(
|
||||
repo.config.chunking.min_size as usize,
|
||||
repo.config.chunking.avg_size as usize,
|
||||
repo.config.chunking.max_size as usize,
|
||||
);
|
||||
let mut chunker = StreamingChunker::new(cdc);
|
||||
|
||||
// Set up pack writer
|
||||
let mut pack_writer = PackWriter::new(repo.config.pack_target_size);
|
||||
|
||||
// Track pending chunks for the current pack (to build index entries after finalize)
|
||||
let mut pending_chunks: Vec<PendingChunk> = Vec::new();
|
||||
|
||||
// Stats
|
||||
let mut total_original_size: u64 = 0;
|
||||
let mut total_stored_size: u64 = 0;
|
||||
let mut new_chunks: u64 = 0;
|
||||
let mut reused_chunks: u64 = 0;
|
||||
let mut chunk_hashes: Vec<String> = Vec::new();
|
||||
|
||||
// Read data from socket in chunks
|
||||
let mut read_buf = vec![0u8; 256 * 1024]; // 256KB read buffer
|
||||
|
||||
loop {
|
||||
let n = stream.read(&mut read_buf).await?;
|
||||
if n == 0 {
|
||||
break; // EOF
|
||||
}
|
||||
|
||||
total_original_size += n as u64;
|
||||
let data = &read_buf[..n];
|
||||
|
||||
// Feed into chunker
|
||||
let chunks = chunker.feed(data);
|
||||
for chunk_data in chunks {
|
||||
process_chunk(
|
||||
repo,
|
||||
&mut pack_writer,
|
||||
&mut pending_chunks,
|
||||
&chunk_data,
|
||||
&mut chunk_hashes,
|
||||
&mut new_chunks,
|
||||
&mut reused_chunks,
|
||||
&mut total_stored_size,
|
||||
).await?;
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize chunker — get any remaining data
|
||||
if let Some(final_chunk) = chunker.finalize() {
|
||||
process_chunk(
|
||||
repo,
|
||||
&mut pack_writer,
|
||||
&mut pending_chunks,
|
||||
&final_chunk,
|
||||
&mut chunk_hashes,
|
||||
&mut new_chunks,
|
||||
&mut reused_chunks,
|
||||
&mut total_stored_size,
|
||||
).await?;
|
||||
}
|
||||
|
||||
// Finalize any remaining pack
|
||||
if !pack_writer.is_empty() {
|
||||
finalize_pack(repo, &mut pack_writer, &mut pending_chunks).await?;
|
||||
}
|
||||
|
||||
// Save index
|
||||
repo.index.save_segment(&repo.path).await?;
|
||||
|
||||
// Build snapshot
|
||||
let item_name = items.first()
|
||||
.map(|i| i.name.clone())
|
||||
.unwrap_or_else(|| "data".to_string());
|
||||
let item_type = items.first()
|
||||
.map(|i| i.item_type.clone())
|
||||
.unwrap_or_else(|| "data".to_string());
|
||||
|
||||
let snapshot_items = vec![SnapshotItem {
|
||||
name: item_name,
|
||||
item_type,
|
||||
size: total_original_size,
|
||||
chunks: chunk_hashes,
|
||||
}];
|
||||
|
||||
let snapshot = Snapshot::new(
|
||||
snapshot_items,
|
||||
tags,
|
||||
total_original_size,
|
||||
total_stored_size,
|
||||
new_chunks,
|
||||
reused_chunks,
|
||||
);
|
||||
|
||||
save_snapshot(&repo.path, &snapshot).await?;
|
||||
|
||||
tracing::info!(
|
||||
"Ingest complete: {} bytes original, {} bytes stored, {} new chunks, {} reused",
|
||||
total_original_size, total_stored_size, new_chunks, reused_chunks
|
||||
);
|
||||
|
||||
Ok(snapshot)
|
||||
}
|
||||
|
||||
async fn process_chunk(
|
||||
repo: &mut Repository,
|
||||
pack_writer: &mut PackWriter,
|
||||
pending_chunks: &mut Vec<PendingChunk>,
|
||||
chunk_data: &[u8],
|
||||
chunk_hashes: &mut Vec<String>,
|
||||
new_chunks: &mut u64,
|
||||
reused_chunks: &mut u64,
|
||||
total_stored_size: &mut u64,
|
||||
) -> Result<(), ArchiveError> {
|
||||
// Hash the plaintext chunk
|
||||
let hash = hasher::hash_chunk(chunk_data);
|
||||
let hash_hex = hasher::hash_to_hex(&hash);
|
||||
|
||||
chunk_hashes.push(hash_hex.clone());
|
||||
|
||||
// Dedup check
|
||||
if repo.index.has(&hash_hex) {
|
||||
*reused_chunks += 1;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// New chunk: compress
|
||||
let compressed = compression::compress(chunk_data)?;
|
||||
let mut flags = FLAG_GZIP;
|
||||
let plaintext_size = chunk_data.len() as u32;
|
||||
|
||||
// Optionally encrypt
|
||||
let (stored_data, nonce) = if let Some(ref key) = repo.master_key {
|
||||
let encrypted = encryption::encrypt_chunk(&compressed, key)?;
|
||||
flags |= FLAG_ENCRYPTED;
|
||||
(encrypted.ciphertext, encrypted.nonce)
|
||||
} else {
|
||||
(compressed, [0u8; 12])
|
||||
};
|
||||
|
||||
let compressed_size = stored_data.len() as u32;
|
||||
*total_stored_size += stored_data.len() as u64;
|
||||
*new_chunks += 1;
|
||||
|
||||
// Track the pending chunk for index building
|
||||
// The offset is the current position in the pack buffer
|
||||
let offset = pack_writer.entries().iter()
|
||||
.map(|e| e.compressed_size as u64)
|
||||
.sum::<u64>();
|
||||
|
||||
pending_chunks.push(PendingChunk {
|
||||
hash_hex: hash_hex.clone(),
|
||||
offset,
|
||||
compressed_size,
|
||||
plaintext_size,
|
||||
});
|
||||
|
||||
// Add to pack writer
|
||||
pack_writer.add_chunk(hash, &stored_data, plaintext_size, nonce, flags);
|
||||
|
||||
// If pack is full, finalize it
|
||||
if pack_writer.should_finalize() {
|
||||
finalize_pack(repo, pack_writer, pending_chunks).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Finalize the current pack and add its entries to the global index.
|
||||
async fn finalize_pack(
|
||||
repo: &mut Repository,
|
||||
pack_writer: &mut PackWriter,
|
||||
pending_chunks: &mut Vec<PendingChunk>,
|
||||
) -> Result<(), ArchiveError> {
|
||||
let pack_info = pack_writer.finalize(&repo.path).await?;
|
||||
|
||||
// Now we know the pack_id — add all pending chunks to the global index
|
||||
for pending in pending_chunks.drain(..) {
|
||||
repo.index.add_entry(pending.hash_hex, IndexEntry {
|
||||
pack_id: pack_info.pack_id.clone(),
|
||||
offset: pending.offset,
|
||||
compressed_size: pending.compressed_size,
|
||||
plaintext_size: pending.plaintext_size,
|
||||
});
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Finalized pack {} ({} chunks, {} bytes)",
|
||||
pack_info.pack_id, pack_info.chunk_count, pack_info.data_size
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
194
rust/src/lock.rs
Normal file
194
rust/src/lock.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
/// Advisory file-based locking for repository write operations.
|
||||
|
||||
use std::path::Path;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::error::ArchiveError;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LockEntry {
|
||||
pub lock_id: String,
|
||||
pub pid: u32,
|
||||
pub hostname: String,
|
||||
pub created_at: String,
|
||||
pub operation: String,
|
||||
pub stale_after_seconds: u64,
|
||||
}
|
||||
|
||||
/// Acquire a lock for the given operation.
|
||||
pub async fn acquire(repo_path: &str, operation: &str) -> Result<LockEntry, ArchiveError> {
|
||||
let locks_dir = Path::new(repo_path).join("locks");
|
||||
tokio::fs::create_dir_all(&locks_dir).await?;
|
||||
|
||||
// Check for existing locks
|
||||
if let Some(existing) = get_active_lock(repo_path).await? {
|
||||
return Err(ArchiveError::Locked(format!(
|
||||
"Repository locked by PID {} on {} for operation '{}' since {}",
|
||||
existing.pid, existing.hostname, existing.operation, existing.created_at
|
||||
)));
|
||||
}
|
||||
|
||||
let lock_id = uuid::Uuid::new_v4().to_string();
|
||||
let hostname = std::env::var("HOSTNAME")
|
||||
.or_else(|_| std::env::var("HOST"))
|
||||
.unwrap_or_else(|_| "unknown".to_string());
|
||||
|
||||
let entry = LockEntry {
|
||||
lock_id: lock_id.clone(),
|
||||
pid: std::process::id(),
|
||||
hostname,
|
||||
created_at: chrono::Utc::now().to_rfc3339(),
|
||||
operation: operation.to_string(),
|
||||
stale_after_seconds: 21600, // 6 hours
|
||||
};
|
||||
|
||||
let lock_path = locks_dir.join(format!("{}.json", lock_id));
|
||||
let json = serde_json::to_string_pretty(&entry)?;
|
||||
|
||||
// Use create_new for atomic lock creation
|
||||
tokio::fs::write(&lock_path, json).await?;
|
||||
|
||||
tracing::info!("Acquired lock {} for operation '{}'", lock_id, operation);
|
||||
Ok(entry)
|
||||
}
|
||||
|
||||
/// Release a specific lock.
|
||||
pub async fn release(repo_path: &str, lock_id: &str) -> Result<(), ArchiveError> {
|
||||
let lock_path = Path::new(repo_path).join("locks").join(format!("{}.json", lock_id));
|
||||
if lock_path.exists() {
|
||||
tokio::fs::remove_file(&lock_path).await?;
|
||||
tracing::info!("Released lock {}", lock_id);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if the repository is locked.
|
||||
pub async fn is_locked(repo_path: &str) -> Result<bool, ArchiveError> {
|
||||
Ok(get_active_lock(repo_path).await?.is_some())
|
||||
}
|
||||
|
||||
/// Get the active (non-stale) lock, if any.
|
||||
async fn get_active_lock(repo_path: &str) -> Result<Option<LockEntry>, ArchiveError> {
|
||||
let locks_dir = Path::new(repo_path).join("locks");
|
||||
if !locks_dir.exists() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut dir = tokio::fs::read_dir(&locks_dir).await?;
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|e| e.to_str()) != Some("json") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let data = tokio::fs::read_to_string(&path).await?;
|
||||
let lock: LockEntry = match serde_json::from_str(&data) {
|
||||
Ok(l) => l,
|
||||
Err(_) => {
|
||||
// Corrupted lock file — remove it
|
||||
let _ = tokio::fs::remove_file(&path).await;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if is_stale(&lock) {
|
||||
tracing::warn!("Removing stale lock {} (from {})", lock.lock_id, lock.created_at);
|
||||
let _ = tokio::fs::remove_file(&path).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
return Ok(Some(lock));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Check and break all stale locks. Returns the number of locks removed.
|
||||
pub async fn check_and_break_stale(repo_path: &str) -> Result<u32, ArchiveError> {
|
||||
let locks_dir = Path::new(repo_path).join("locks");
|
||||
if !locks_dir.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut removed = 0u32;
|
||||
let mut dir = tokio::fs::read_dir(&locks_dir).await?;
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|e| e.to_str()) != Some("json") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let data = match tokio::fs::read_to_string(&path).await {
|
||||
Ok(d) => d,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let lock: LockEntry = match serde_json::from_str(&data) {
|
||||
Ok(l) => l,
|
||||
Err(_) => {
|
||||
let _ = tokio::fs::remove_file(&path).await;
|
||||
removed += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if is_stale(&lock) {
|
||||
tracing::warn!("Breaking stale lock {} (from {})", lock.lock_id, lock.created_at);
|
||||
let _ = tokio::fs::remove_file(&path).await;
|
||||
removed += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(removed)
|
||||
}
|
||||
|
||||
/// Break all locks (forced unlock).
|
||||
pub async fn break_all_locks(repo_path: &str, force: bool) -> Result<u32, ArchiveError> {
|
||||
let locks_dir = Path::new(repo_path).join("locks");
|
||||
if !locks_dir.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut removed = 0u32;
|
||||
let mut dir = tokio::fs::read_dir(&locks_dir).await?;
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|e| e.to_str()) != Some("json") {
|
||||
continue;
|
||||
}
|
||||
|
||||
if force {
|
||||
let _ = tokio::fs::remove_file(&path).await;
|
||||
removed += 1;
|
||||
} else {
|
||||
// Only break stale locks
|
||||
let data = match tokio::fs::read_to_string(&path).await {
|
||||
Ok(d) => d,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let lock: LockEntry = match serde_json::from_str(&data) {
|
||||
Ok(l) => l,
|
||||
Err(_) => {
|
||||
let _ = tokio::fs::remove_file(&path).await;
|
||||
removed += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if is_stale(&lock) {
|
||||
let _ = tokio::fs::remove_file(&path).await;
|
||||
removed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(removed)
|
||||
}
|
||||
|
||||
fn is_stale(lock: &LockEntry) -> bool {
|
||||
if let Ok(created) = chrono::DateTime::parse_from_rfc3339(&lock.created_at) {
|
||||
let age = chrono::Utc::now().signed_duration_since(created);
|
||||
age.num_seconds() > lock.stale_after_seconds as i64
|
||||
} else {
|
||||
true // Can't parse timestamp — treat as stale
|
||||
}
|
||||
}
|
||||
51
rust/src/main.rs
Normal file
51
rust/src/main.rs
Normal file
@@ -0,0 +1,51 @@
|
||||
use clap::Parser;
|
||||
|
||||
mod config;
|
||||
mod error;
|
||||
mod management;
|
||||
mod chunker;
|
||||
mod hasher;
|
||||
mod compression;
|
||||
mod encryption;
|
||||
mod pack_writer;
|
||||
mod pack_reader;
|
||||
mod global_index;
|
||||
mod repository;
|
||||
mod snapshot;
|
||||
mod lock;
|
||||
mod ingest;
|
||||
mod restore;
|
||||
mod verify;
|
||||
mod prune;
|
||||
mod repair;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "containerarchive", about = "Content-addressed incremental backup engine")]
|
||||
struct Cli {
|
||||
/// Run in management mode (JSON IPC over stdin/stdout)
|
||||
#[arg(long)]
|
||||
management: bool,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
// Initialize logging to stderr (stdout is reserved for IPC)
|
||||
tracing_subscriber::fmt()
|
||||
.with_writer(std::io::stderr)
|
||||
.with_env_filter(
|
||||
tracing_subscriber::EnvFilter::from_default_env()
|
||||
.add_directive(tracing::Level::INFO.into()),
|
||||
)
|
||||
.init();
|
||||
|
||||
let cli = Cli::parse();
|
||||
|
||||
if cli.management {
|
||||
management::management_loop().await?;
|
||||
} else {
|
||||
eprintln!("containerarchive: use --management for IPC mode");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
470
rust/src/management.rs
Normal file
470
rust/src/management.rs
Normal file
@@ -0,0 +1,470 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::io::{self, BufRead, Write};
|
||||
use crate::repository::Repository;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct Request {
|
||||
id: String,
|
||||
method: String,
|
||||
#[serde(default)]
|
||||
params: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct Response {
|
||||
id: String,
|
||||
success: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
result: Option<Value>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct Event {
|
||||
event: String,
|
||||
data: Value,
|
||||
}
|
||||
|
||||
fn send_event(event: &str, data: Value) {
|
||||
let evt = Event {
|
||||
event: event.to_string(),
|
||||
data,
|
||||
};
|
||||
let json = serde_json::to_string(&evt).unwrap();
|
||||
let stdout = io::stdout();
|
||||
let mut handle = stdout.lock();
|
||||
let _ = writeln!(handle, "{}", json);
|
||||
let _ = handle.flush();
|
||||
}
|
||||
|
||||
fn send_response(resp: &Response) {
|
||||
let json = serde_json::to_string(resp).unwrap();
|
||||
let stdout = io::stdout();
|
||||
let mut handle = stdout.lock();
|
||||
let _ = writeln!(handle, "{}", json);
|
||||
let _ = handle.flush();
|
||||
}
|
||||
|
||||
pub async fn management_loop() -> anyhow::Result<()> {
|
||||
// Send ready event
|
||||
send_event("ready", serde_json::json!({}));
|
||||
|
||||
let stdin = io::stdin();
|
||||
let mut repo: Option<Repository> = None;
|
||||
|
||||
for line in stdin.lock().lines() {
|
||||
let line = match line {
|
||||
Ok(l) => l,
|
||||
Err(_) => break,
|
||||
};
|
||||
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let request: Request = match serde_json::from_str(&line) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to parse request: {}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let response = handle_request(&request, &mut repo).await;
|
||||
send_response(&response);
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
if let Some(r) = repo.take() {
|
||||
let _ = r.close().await;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_request(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
match req.method.as_str() {
|
||||
"init" => handle_init(req, repo).await,
|
||||
"open" => handle_open(req, repo).await,
|
||||
"close" => handle_close(req, repo).await,
|
||||
"ingest" => handle_ingest(req, repo).await,
|
||||
"restore" => handle_restore(req, repo).await,
|
||||
"listSnapshots" => handle_list_snapshots(req, repo).await,
|
||||
"getSnapshot" => handle_get_snapshot(req, repo).await,
|
||||
"verify" => handle_verify(req, repo).await,
|
||||
"repair" => handle_repair(req, repo).await,
|
||||
"prune" => handle_prune(req, repo).await,
|
||||
"reindex" => handle_reindex(req, repo).await,
|
||||
"unlock" => handle_unlock(req, repo).await,
|
||||
_ => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(format!("Unknown method: {}", req.method)),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_init(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let path = req.params.get("path").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let passphrase = req.params.get("passphrase").and_then(|v| v.as_str());
|
||||
|
||||
match Repository::init(path, passphrase).await {
|
||||
Ok(r) => {
|
||||
let config = serde_json::to_value(&r.config).unwrap_or(Value::Null);
|
||||
*repo = Some(r);
|
||||
Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(config),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_open(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let path = req.params.get("path").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let passphrase = req.params.get("passphrase").and_then(|v| v.as_str());
|
||||
|
||||
match Repository::open(path, passphrase).await {
|
||||
Ok(r) => {
|
||||
let config = serde_json::to_value(&r.config).unwrap_or(Value::Null);
|
||||
*repo = Some(r);
|
||||
Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(config),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_close(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
if let Some(r) = repo.take() {
|
||||
match r.close().await {
|
||||
Ok(_) => Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(serde_json::json!({})),
|
||||
error: None,
|
||||
},
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
} else {
|
||||
Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(serde_json::json!({})),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_ingest(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let repo = match repo.as_mut() {
|
||||
Some(r) => r,
|
||||
None => return Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some("No repository open".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
let socket_path = req.params.get("socketPath").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let tags: std::collections::HashMap<String, String> = req.params.get("tags")
|
||||
.and_then(|v| serde_json::from_value(v.clone()).ok())
|
||||
.unwrap_or_default();
|
||||
let items: Vec<crate::ingest::IngestItemOptions> = req.params.get("items")
|
||||
.and_then(|v| serde_json::from_value(v.clone()).ok())
|
||||
.unwrap_or_default();
|
||||
|
||||
match crate::ingest::ingest(repo, socket_path, tags, items).await {
|
||||
Ok(snapshot) => {
|
||||
let result = serde_json::to_value(&snapshot).unwrap_or(Value::Null);
|
||||
Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(serde_json::json!({ "snapshot": result })),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_restore(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let repo = match repo.as_ref() {
|
||||
Some(r) => r,
|
||||
None => return Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some("No repository open".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
let snapshot_id = req.params.get("snapshotId").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let socket_path = req.params.get("socketPath").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let item = req.params.get("item").and_then(|v| v.as_str());
|
||||
|
||||
match crate::restore::restore(repo, snapshot_id, socket_path, item).await {
|
||||
Ok(_) => Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(serde_json::json!({})),
|
||||
error: None,
|
||||
},
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_list_snapshots(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let repo = match repo.as_ref() {
|
||||
Some(r) => r,
|
||||
None => return Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some("No repository open".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
let filter = req.params.get("filter")
|
||||
.and_then(|v| serde_json::from_value::<crate::snapshot::SnapshotFilter>(v.clone()).ok());
|
||||
|
||||
match crate::snapshot::list_snapshots(&repo.path, filter.as_ref()).await {
|
||||
Ok(snapshots) => {
|
||||
let result = serde_json::to_value(&snapshots).unwrap_or(Value::Null);
|
||||
Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(serde_json::json!({ "snapshots": result })),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_get_snapshot(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let repo = match repo.as_ref() {
|
||||
Some(r) => r,
|
||||
None => return Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some("No repository open".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
let snapshot_id = req.params.get("snapshotId").and_then(|v| v.as_str()).unwrap_or("");
|
||||
|
||||
match crate::snapshot::load_snapshot(&repo.path, snapshot_id).await {
|
||||
Ok(snapshot) => {
|
||||
let result = serde_json::to_value(&snapshot).unwrap_or(Value::Null);
|
||||
Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(serde_json::json!({ "snapshot": result })),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_verify(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let repo = match repo.as_ref() {
|
||||
Some(r) => r,
|
||||
None => return Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some("No repository open".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
let level = req.params.get("level").and_then(|v| v.as_str()).unwrap_or("standard");
|
||||
|
||||
match crate::verify::verify(repo, level).await {
|
||||
Ok(result) => {
|
||||
let result_val = serde_json::to_value(&result).unwrap_or(Value::Null);
|
||||
Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(result_val),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_repair(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let repo = match repo.as_mut() {
|
||||
Some(r) => r,
|
||||
None => return Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some("No repository open".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
match crate::repair::repair(repo).await {
|
||||
Ok(result) => {
|
||||
let result_val = serde_json::to_value(&result).unwrap_or(Value::Null);
|
||||
Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(result_val),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_prune(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let repo = match repo.as_mut() {
|
||||
Some(r) => r,
|
||||
None => return Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some("No repository open".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
let retention: crate::prune::RetentionPolicy = req.params
|
||||
.get("retention")
|
||||
.and_then(|v| serde_json::from_value(v.clone()).ok())
|
||||
.unwrap_or_default();
|
||||
let dry_run = req.params.get("dryRun").and_then(|v| v.as_bool()).unwrap_or(false);
|
||||
|
||||
match crate::prune::prune(repo, &retention, dry_run).await {
|
||||
Ok(result) => {
|
||||
let result_val = serde_json::to_value(&result).unwrap_or(Value::Null);
|
||||
Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(result_val),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_reindex(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let repo = match repo.as_mut() {
|
||||
Some(r) => r,
|
||||
None => return Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some("No repository open".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
match repo.reindex().await {
|
||||
Ok(count) => Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(serde_json::json!({ "indexedChunks": count })),
|
||||
error: None,
|
||||
},
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_unlock(req: &Request, repo: &mut Option<Repository>) -> Response {
|
||||
let repo = match repo.as_ref() {
|
||||
Some(r) => r,
|
||||
None => return Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some("No repository open".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
let force = req.params.get("force").and_then(|v| v.as_bool()).unwrap_or(false);
|
||||
|
||||
match crate::lock::break_all_locks(&repo.path, force).await {
|
||||
Ok(count) => Response {
|
||||
id: req.id.clone(),
|
||||
success: true,
|
||||
result: Some(serde_json::json!({ "removedLocks": count })),
|
||||
error: None,
|
||||
},
|
||||
Err(e) => Response {
|
||||
id: req.id.clone(),
|
||||
success: false,
|
||||
result: None,
|
||||
error: Some(e.to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
88
rust/src/pack_reader.rs
Normal file
88
rust/src/pack_reader.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
/// Pack file reader.
|
||||
///
|
||||
/// Reads chunks from pack files using the companion .idx file for lookup.
|
||||
|
||||
use std::path::Path;
|
||||
use sha2::{Sha256, Digest};
|
||||
use crate::error::ArchiveError;
|
||||
use crate::pack_writer::{IdxEntry, IDX_ENTRY_SIZE, PACK_MAGIC, PACK_VERSION, PACK_HEADER_SIZE};
|
||||
|
||||
/// Load all index entries from a .idx file.
|
||||
pub async fn load_idx(idx_path: &Path) -> Result<Vec<IdxEntry>, ArchiveError> {
|
||||
let data = tokio::fs::read(idx_path).await?;
|
||||
|
||||
if data.len() % IDX_ENTRY_SIZE != 0 {
|
||||
return Err(ArchiveError::Corruption(format!(
|
||||
"IDX file size {} is not a multiple of entry size {}",
|
||||
data.len(),
|
||||
IDX_ENTRY_SIZE
|
||||
)));
|
||||
}
|
||||
|
||||
let count = data.len() / IDX_ENTRY_SIZE;
|
||||
let mut entries = Vec::with_capacity(count);
|
||||
|
||||
for i in 0..count {
|
||||
let start = i * IDX_ENTRY_SIZE;
|
||||
let entry = IdxEntry::from_bytes(&data[start..start + IDX_ENTRY_SIZE])?;
|
||||
entries.push(entry);
|
||||
}
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Read a single chunk from a pack file at the given offset and size.
|
||||
pub async fn read_chunk(pack_path: &Path, offset: u64, size: u32) -> Result<Vec<u8>, ArchiveError> {
|
||||
use tokio::io::{AsyncReadExt, AsyncSeekExt};
|
||||
|
||||
let mut file = tokio::fs::File::open(pack_path).await?;
|
||||
file.seek(std::io::SeekFrom::Start(offset)).await?;
|
||||
|
||||
let mut buf = vec![0u8; size as usize];
|
||||
file.read_exact(&mut buf).await?;
|
||||
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Verify a pack file's header (magic, version, checksum).
|
||||
pub async fn verify_pack_header(pack_path: &Path) -> Result<bool, ArchiveError> {
|
||||
let data = tokio::fs::read(pack_path).await?;
|
||||
|
||||
if data.len() < PACK_HEADER_SIZE {
|
||||
return Err(ArchiveError::Corruption("Pack file too small for header".to_string()));
|
||||
}
|
||||
|
||||
let header_start = data.len() - PACK_HEADER_SIZE;
|
||||
let header = &data[header_start..];
|
||||
let chunk_data = &data[..header_start];
|
||||
|
||||
// Check magic
|
||||
if &header[0..4] != PACK_MAGIC {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Check version
|
||||
use byteorder::{LittleEndian, ReadBytesExt};
|
||||
use std::io::Cursor;
|
||||
let mut cursor = Cursor::new(&header[4..6]);
|
||||
let version = cursor.read_u16::<LittleEndian>()
|
||||
.map_err(|e| ArchiveError::Corruption(format!("Header read error: {}", e)))?;
|
||||
if version != PACK_VERSION {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Verify checksum
|
||||
let stored_checksum = &header[14..30];
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(chunk_data);
|
||||
let computed = hasher.finalize();
|
||||
|
||||
Ok(&computed[..16] == stored_checksum)
|
||||
}
|
||||
|
||||
/// Find a chunk in a sorted IDX by content hash using binary search.
|
||||
pub fn find_in_idx<'a>(entries: &'a [IdxEntry], hash: &[u8; 32]) -> Option<&'a IdxEntry> {
|
||||
entries.binary_search_by(|e| e.content_hash.cmp(hash))
|
||||
.ok()
|
||||
.map(|idx| &entries[idx])
|
||||
}
|
||||
240
rust/src/pack_writer.rs
Normal file
240
rust/src/pack_writer.rs
Normal file
@@ -0,0 +1,240 @@
|
||||
/// Pack file writer.
|
||||
///
|
||||
/// Accumulates compressed (and optionally encrypted) chunks into a pack file
|
||||
/// until it reaches the target size (~8MB), then finalizes by writing the
|
||||
/// .pack and .idx files atomically.
|
||||
///
|
||||
/// Pack file format:
|
||||
/// [chunk-0-data][chunk-1-data]...[chunk-N-data][32-byte header]
|
||||
///
|
||||
/// Header (last 32 bytes):
|
||||
/// Magic "CAPA" (4) + version u16 LE (2) + chunk_count u32 LE (4) +
|
||||
/// idx_size u32 LE (4) + SHA-256 checksum truncated to 16 bytes (16) +
|
||||
/// reserved (2)
|
||||
///
|
||||
/// IDX file format:
|
||||
/// Array of 80-byte fixed-size entries, sorted by content hash.
|
||||
/// content_hash (32) + offset u64 LE (8) + compressed_size u32 LE (4) +
|
||||
/// plaintext_size u32 LE (4) + nonce (12) + flags u32 LE (4) + reserved (16)
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use byteorder::{LittleEndian, WriteBytesExt};
|
||||
use sha2::{Sha256, Digest};
|
||||
use crate::error::ArchiveError;
|
||||
|
||||
pub const PACK_MAGIC: &[u8; 4] = b"CAPA";
|
||||
pub const PACK_VERSION: u16 = 1;
|
||||
pub const IDX_ENTRY_SIZE: usize = 80;
|
||||
pub const PACK_HEADER_SIZE: usize = 32;
|
||||
|
||||
/// Flags stored in IDX entries.
|
||||
pub const FLAG_ENCRYPTED: u32 = 0x01;
|
||||
pub const FLAG_GZIP: u32 = 0x02;
|
||||
|
||||
/// An entry in the pack index.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IdxEntry {
|
||||
pub content_hash: [u8; 32],
|
||||
pub offset: u64,
|
||||
pub compressed_size: u32,
|
||||
pub plaintext_size: u32,
|
||||
pub nonce: [u8; 12],
|
||||
pub flags: u32,
|
||||
}
|
||||
|
||||
impl IdxEntry {
|
||||
/// Serialize this entry to 80 bytes.
|
||||
pub fn to_bytes(&self) -> Vec<u8> {
|
||||
let mut buf = Vec::with_capacity(IDX_ENTRY_SIZE);
|
||||
buf.extend_from_slice(&self.content_hash); // 32
|
||||
buf.write_u64::<LittleEndian>(self.offset).unwrap(); // 8
|
||||
buf.write_u32::<LittleEndian>(self.compressed_size).unwrap(); // 4
|
||||
buf.write_u32::<LittleEndian>(self.plaintext_size).unwrap(); // 4
|
||||
buf.extend_from_slice(&self.nonce); // 12
|
||||
buf.write_u32::<LittleEndian>(self.flags).unwrap(); // 4
|
||||
buf.extend_from_slice(&[0u8; 16]); // 16 reserved
|
||||
debug_assert_eq!(buf.len(), IDX_ENTRY_SIZE);
|
||||
buf
|
||||
}
|
||||
|
||||
/// Parse an entry from 80 bytes.
|
||||
pub fn from_bytes(data: &[u8]) -> Result<Self, ArchiveError> {
|
||||
use byteorder::ReadBytesExt;
|
||||
use std::io::Cursor;
|
||||
|
||||
if data.len() < IDX_ENTRY_SIZE {
|
||||
return Err(ArchiveError::Corruption("IDX entry too short".to_string()));
|
||||
}
|
||||
|
||||
let mut content_hash = [0u8; 32];
|
||||
content_hash.copy_from_slice(&data[0..32]);
|
||||
|
||||
let mut cursor = Cursor::new(&data[32..]);
|
||||
let offset = cursor.read_u64::<LittleEndian>()
|
||||
.map_err(|e| ArchiveError::Corruption(format!("IDX read error: {}", e)))?;
|
||||
let compressed_size = cursor.read_u32::<LittleEndian>()
|
||||
.map_err(|e| ArchiveError::Corruption(format!("IDX read error: {}", e)))?;
|
||||
let plaintext_size = cursor.read_u32::<LittleEndian>()
|
||||
.map_err(|e| ArchiveError::Corruption(format!("IDX read error: {}", e)))?;
|
||||
|
||||
let mut nonce = [0u8; 12];
|
||||
nonce.copy_from_slice(&data[48..60]);
|
||||
|
||||
let mut cursor = Cursor::new(&data[60..]);
|
||||
let flags = cursor.read_u32::<LittleEndian>()
|
||||
.map_err(|e| ArchiveError::Corruption(format!("IDX read error: {}", e)))?;
|
||||
|
||||
Ok(Self {
|
||||
content_hash,
|
||||
offset,
|
||||
compressed_size,
|
||||
plaintext_size,
|
||||
nonce,
|
||||
flags,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Info about a finalized pack.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PackInfo {
|
||||
pub pack_id: String,
|
||||
pub pack_path: PathBuf,
|
||||
pub idx_path: PathBuf,
|
||||
pub chunk_count: u32,
|
||||
pub data_size: u64,
|
||||
}
|
||||
|
||||
/// Accumulates chunks and writes them to pack files.
|
||||
pub struct PackWriter {
|
||||
target_size: u64,
|
||||
data_buffer: Vec<u8>,
|
||||
entries: Vec<IdxEntry>,
|
||||
current_offset: u64,
|
||||
}
|
||||
|
||||
impl PackWriter {
|
||||
pub fn new(target_size: u64) -> Self {
|
||||
Self {
|
||||
target_size,
|
||||
data_buffer: Vec::new(),
|
||||
entries: Vec::new(),
|
||||
current_offset: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a chunk to the current pack buffer.
|
||||
pub fn add_chunk(
|
||||
&mut self,
|
||||
content_hash: [u8; 32],
|
||||
compressed_data: &[u8],
|
||||
plaintext_size: u32,
|
||||
nonce: [u8; 12],
|
||||
flags: u32,
|
||||
) {
|
||||
let entry = IdxEntry {
|
||||
content_hash,
|
||||
offset: self.current_offset,
|
||||
compressed_size: compressed_data.len() as u32,
|
||||
plaintext_size,
|
||||
nonce,
|
||||
flags,
|
||||
};
|
||||
|
||||
self.data_buffer.extend_from_slice(compressed_data);
|
||||
self.current_offset += compressed_data.len() as u64;
|
||||
self.entries.push(entry);
|
||||
}
|
||||
|
||||
/// Check if the pack has reached the target size.
|
||||
pub fn should_finalize(&self) -> bool {
|
||||
self.data_buffer.len() as u64 >= self.target_size
|
||||
}
|
||||
|
||||
/// Check if the pack has any chunks.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.entries.is_empty()
|
||||
}
|
||||
|
||||
/// Get the current number of chunks.
|
||||
pub fn chunk_count(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
|
||||
/// Finalize: write .pack and .idx files to the repository.
|
||||
/// Returns info about the written pack.
|
||||
pub async fn finalize(&mut self, repo_path: &str) -> Result<PackInfo, ArchiveError> {
|
||||
if self.entries.is_empty() {
|
||||
return Err(ArchiveError::Other("Cannot finalize empty pack".to_string()));
|
||||
}
|
||||
|
||||
// Generate pack ID (random UUID)
|
||||
let pack_id = uuid::Uuid::new_v4().to_string().replace("-", "");
|
||||
let shard = &pack_id[..2];
|
||||
|
||||
let pack_dir = Path::new(repo_path).join("packs").join("data").join(shard);
|
||||
tokio::fs::create_dir_all(&pack_dir).await?;
|
||||
|
||||
let pack_path = pack_dir.join(format!("{}.pack", pack_id));
|
||||
let idx_path = pack_dir.join(format!("{}.idx", pack_id));
|
||||
let tmp_pack_path = pack_dir.join(format!("{}.pack.tmp", pack_id));
|
||||
let tmp_idx_path = pack_dir.join(format!("{}.idx.tmp", pack_id));
|
||||
|
||||
// Sort entries by content hash for binary search in idx
|
||||
self.entries.sort_by(|a, b| a.content_hash.cmp(&b.content_hash));
|
||||
|
||||
// Build IDX data
|
||||
let mut idx_data = Vec::with_capacity(self.entries.len() * IDX_ENTRY_SIZE);
|
||||
for entry in &self.entries {
|
||||
idx_data.extend_from_slice(&entry.to_bytes());
|
||||
}
|
||||
|
||||
// Compute pack header checksum (SHA-256 of chunk data, truncated to 16 bytes)
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(&self.data_buffer);
|
||||
let checksum_full = hasher.finalize();
|
||||
let mut checksum = [0u8; 16];
|
||||
checksum.copy_from_slice(&checksum_full[..16]);
|
||||
|
||||
// Build pack header (32 bytes)
|
||||
let mut header = Vec::with_capacity(PACK_HEADER_SIZE);
|
||||
header.extend_from_slice(PACK_MAGIC); // 4
|
||||
header.write_u16::<LittleEndian>(PACK_VERSION).unwrap(); // 2
|
||||
header.write_u32::<LittleEndian>(self.entries.len() as u32).unwrap(); // 4
|
||||
header.write_u32::<LittleEndian>(idx_data.len() as u32).unwrap(); // 4
|
||||
header.extend_from_slice(&checksum); // 16
|
||||
header.extend_from_slice(&[0u8; 2]); // 2 reserved
|
||||
debug_assert_eq!(header.len(), PACK_HEADER_SIZE);
|
||||
|
||||
// Write pack file: [chunk data][header]
|
||||
let mut pack_data = Vec::with_capacity(self.data_buffer.len() + PACK_HEADER_SIZE);
|
||||
pack_data.extend_from_slice(&self.data_buffer);
|
||||
pack_data.extend_from_slice(&header);
|
||||
|
||||
// Atomic write: write to tmp, then rename
|
||||
tokio::fs::write(&tmp_idx_path, &idx_data).await?;
|
||||
tokio::fs::write(&tmp_pack_path, &pack_data).await?;
|
||||
tokio::fs::rename(&tmp_idx_path, &idx_path).await?;
|
||||
tokio::fs::rename(&tmp_pack_path, &pack_path).await?;
|
||||
|
||||
let info = PackInfo {
|
||||
pack_id: pack_id.clone(),
|
||||
pack_path: pack_path.clone(),
|
||||
idx_path: idx_path.clone(),
|
||||
chunk_count: self.entries.len() as u32,
|
||||
data_size: self.data_buffer.len() as u64,
|
||||
};
|
||||
|
||||
// Reset internal state for next pack
|
||||
self.data_buffer.clear();
|
||||
self.entries.clear();
|
||||
self.current_offset = 0;
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
/// Get a copy of current entries (for index building).
|
||||
pub fn entries(&self) -> &[IdxEntry] {
|
||||
&self.entries
|
||||
}
|
||||
}
|
||||
277
rust/src/prune.rs
Normal file
277
rust/src/prune.rs
Normal file
@@ -0,0 +1,277 @@
|
||||
/// Retention-based pruning and garbage collection.
|
||||
///
|
||||
/// Prune determines which snapshots to keep based on retention policies,
|
||||
/// deletes expired snapshots, and removes pack files where ALL chunks
|
||||
/// are unreferenced (whole-pack GC only).
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::ArchiveError;
|
||||
use crate::repository::Repository;
|
||||
use crate::snapshot;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RetentionPolicy {
|
||||
#[serde(default)]
|
||||
pub keep_last: Option<u32>,
|
||||
#[serde(default)]
|
||||
pub keep_days: Option<u32>,
|
||||
#[serde(default)]
|
||||
pub keep_weeks: Option<u32>,
|
||||
#[serde(default)]
|
||||
pub keep_months: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PruneResult {
|
||||
pub removed_snapshots: u32,
|
||||
pub removed_packs: u32,
|
||||
pub freed_bytes: u64,
|
||||
pub dry_run: bool,
|
||||
}
|
||||
|
||||
pub async fn prune(
|
||||
repo: &mut Repository,
|
||||
retention: &RetentionPolicy,
|
||||
dry_run: bool,
|
||||
) -> Result<PruneResult, ArchiveError> {
|
||||
// Acquire lock
|
||||
if !dry_run {
|
||||
repo.acquire_lock("prune").await?;
|
||||
}
|
||||
|
||||
let result = do_prune(repo, retention, dry_run).await;
|
||||
|
||||
if !dry_run {
|
||||
repo.release_lock().await?;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
async fn do_prune(
|
||||
repo: &mut Repository,
|
||||
retention: &RetentionPolicy,
|
||||
dry_run: bool,
|
||||
) -> Result<PruneResult, ArchiveError> {
|
||||
let mut result = PruneResult {
|
||||
removed_snapshots: 0,
|
||||
removed_packs: 0,
|
||||
freed_bytes: 0,
|
||||
dry_run,
|
||||
};
|
||||
|
||||
// Load all snapshots
|
||||
let mut snapshots = snapshot::list_snapshots(&repo.path, None).await?;
|
||||
|
||||
// Sort by creation time (newest first)
|
||||
snapshots.sort_by(|a, b| b.created_at.cmp(&a.created_at));
|
||||
|
||||
// Determine which snapshots to keep
|
||||
let keep_ids = determine_kept_snapshots(&snapshots, retention);
|
||||
|
||||
// Phase 1: Remove expired snapshots
|
||||
let to_remove: Vec<_> = snapshots.iter()
|
||||
.filter(|s| !keep_ids.contains(&s.id))
|
||||
.collect();
|
||||
|
||||
result.removed_snapshots = to_remove.len() as u32;
|
||||
|
||||
if !dry_run {
|
||||
for snap in &to_remove {
|
||||
snapshot::delete_snapshot(&repo.path, &snap.id).await?;
|
||||
tracing::info!("Removed snapshot {}", snap.id);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: Find and remove unreferenced packs
|
||||
// Reload remaining snapshots
|
||||
let remaining_snapshots = if dry_run {
|
||||
snapshots.iter()
|
||||
.filter(|s| keep_ids.contains(&s.id))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
snapshot::list_snapshots(&repo.path, None).await?
|
||||
};
|
||||
|
||||
let referenced_chunks = snapshot::referenced_chunks(&remaining_snapshots);
|
||||
let referenced_packs = find_referenced_packs(repo, &referenced_chunks);
|
||||
|
||||
// Find all pack IDs on disk
|
||||
let all_packs = find_all_pack_ids(&repo.path).await?;
|
||||
|
||||
for pack_id in &all_packs {
|
||||
if !referenced_packs.contains(pack_id) {
|
||||
// This pack is fully unreferenced — delete it
|
||||
let shard = &pack_id[..std::cmp::min(2, pack_id.len())];
|
||||
let pack_path = Path::new(&repo.path)
|
||||
.join("packs").join("data").join(shard)
|
||||
.join(format!("{}.pack", pack_id));
|
||||
let idx_path = Path::new(&repo.path)
|
||||
.join("packs").join("data").join(shard)
|
||||
.join(format!("{}.idx", pack_id));
|
||||
|
||||
if pack_path.exists() {
|
||||
if let Ok(meta) = tokio::fs::metadata(&pack_path).await {
|
||||
result.freed_bytes += meta.len();
|
||||
}
|
||||
}
|
||||
if idx_path.exists() {
|
||||
if let Ok(meta) = tokio::fs::metadata(&idx_path).await {
|
||||
result.freed_bytes += meta.len();
|
||||
}
|
||||
}
|
||||
|
||||
if !dry_run {
|
||||
let _ = tokio::fs::remove_file(&pack_path).await;
|
||||
let _ = tokio::fs::remove_file(&idx_path).await;
|
||||
|
||||
// Remove entries from global index
|
||||
repo.index.remove_pack_entries(pack_id);
|
||||
|
||||
tracing::info!("Removed pack {}", pack_id);
|
||||
}
|
||||
|
||||
result.removed_packs += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Compact index after pruning
|
||||
if !dry_run && result.removed_packs > 0 {
|
||||
repo.index.compact(&repo.path).await?;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Prune {}: removed {} snapshots, {} packs, freed {} bytes",
|
||||
if dry_run { "(dry run)" } else { "complete" },
|
||||
result.removed_snapshots,
|
||||
result.removed_packs,
|
||||
result.freed_bytes
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Determine which snapshot IDs to keep based on retention policy.
|
||||
fn determine_kept_snapshots(
|
||||
snapshots: &[snapshot::Snapshot],
|
||||
retention: &RetentionPolicy,
|
||||
) -> HashSet<String> {
|
||||
let mut keep = HashSet::new();
|
||||
|
||||
// keepLast: keep the N most recent
|
||||
if let Some(n) = retention.keep_last {
|
||||
for snap in snapshots.iter().take(n as usize) {
|
||||
keep.insert(snap.id.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let now = chrono::Utc::now();
|
||||
|
||||
// keepDays: keep one per day for the last N days
|
||||
if let Some(days) = retention.keep_days {
|
||||
let mut seen_days = HashSet::new();
|
||||
for snap in snapshots {
|
||||
if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(&snap.created_at) {
|
||||
let age = now.signed_duration_since(dt);
|
||||
if age.num_days() <= days as i64 {
|
||||
let day_key = dt.format("%Y-%m-%d").to_string();
|
||||
if seen_days.insert(day_key) {
|
||||
keep.insert(snap.id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// keepWeeks: keep one per week for the last N weeks
|
||||
if let Some(weeks) = retention.keep_weeks {
|
||||
let mut seen_weeks = HashSet::new();
|
||||
for snap in snapshots {
|
||||
if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(&snap.created_at) {
|
||||
let age = now.signed_duration_since(dt);
|
||||
if age.num_weeks() <= weeks as i64 {
|
||||
let week_key = dt.format("%Y-W%W").to_string();
|
||||
if seen_weeks.insert(week_key) {
|
||||
keep.insert(snap.id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// keepMonths: keep one per month for the last N months
|
||||
if let Some(months) = retention.keep_months {
|
||||
let mut seen_months = HashSet::new();
|
||||
for snap in snapshots {
|
||||
if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(&snap.created_at) {
|
||||
let age = now.signed_duration_since(dt);
|
||||
if age.num_days() <= (months as i64) * 31 {
|
||||
let month_key = dt.format("%Y-%m").to_string();
|
||||
if seen_months.insert(month_key) {
|
||||
keep.insert(snap.id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no retention policy is specified, keep everything
|
||||
if retention.keep_last.is_none()
|
||||
&& retention.keep_days.is_none()
|
||||
&& retention.keep_weeks.is_none()
|
||||
&& retention.keep_months.is_none()
|
||||
{
|
||||
for snap in snapshots {
|
||||
keep.insert(snap.id.clone());
|
||||
}
|
||||
}
|
||||
|
||||
keep
|
||||
}
|
||||
|
||||
/// Find pack IDs that contain at least one referenced chunk.
|
||||
fn find_referenced_packs(
|
||||
repo: &Repository,
|
||||
referenced_chunks: &HashSet<String>,
|
||||
) -> HashSet<String> {
|
||||
let mut packs = HashSet::new();
|
||||
for hash_hex in referenced_chunks {
|
||||
if let Some(entry) = repo.index.get(hash_hex) {
|
||||
packs.insert(entry.pack_id.clone());
|
||||
}
|
||||
}
|
||||
packs
|
||||
}
|
||||
|
||||
/// Find all pack IDs on disk.
|
||||
async fn find_all_pack_ids(repo_path: &str) -> Result<Vec<String>, ArchiveError> {
|
||||
let packs_dir = Path::new(repo_path).join("packs").join("data");
|
||||
if !packs_dir.exists() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut pack_ids = Vec::new();
|
||||
let mut stack = vec![packs_dir];
|
||||
|
||||
while let Some(current) = stack.pop() {
|
||||
let mut entries = tokio::fs::read_dir(¤t).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
stack.push(path);
|
||||
} else if path.extension().and_then(|e| e.to_str()) == Some("pack") {
|
||||
if let Some(id) = path.file_stem().and_then(|s| s.to_str()) {
|
||||
pack_ids.push(id.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(pack_ids)
|
||||
}
|
||||
52
rust/src/repair.rs
Normal file
52
rust/src/repair.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
/// Repository repair operations.
|
||||
|
||||
use serde::Serialize;
|
||||
use crate::error::ArchiveError;
|
||||
use crate::repository::Repository;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RepairResult {
|
||||
pub index_rebuilt: bool,
|
||||
pub indexed_chunks: u64,
|
||||
pub stale_locks_removed: u32,
|
||||
pub errors: Vec<String>,
|
||||
}
|
||||
|
||||
/// Repair a repository: rebuild index, remove stale locks.
|
||||
pub async fn repair(repo: &mut Repository) -> Result<RepairResult, ArchiveError> {
|
||||
let mut result = RepairResult {
|
||||
index_rebuilt: false,
|
||||
indexed_chunks: 0,
|
||||
stale_locks_removed: 0,
|
||||
errors: Vec::new(),
|
||||
};
|
||||
|
||||
// Rebuild global index from pack .idx files
|
||||
match repo.reindex().await {
|
||||
Ok(count) => {
|
||||
result.index_rebuilt = true;
|
||||
result.indexed_chunks = count as u64;
|
||||
tracing::info!("Rebuilt index with {} chunks", count);
|
||||
}
|
||||
Err(e) => {
|
||||
result.errors.push(format!("Index rebuild failed: {}", e));
|
||||
tracing::error!("Index rebuild failed: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove stale locks
|
||||
match crate::lock::check_and_break_stale(&repo.path).await {
|
||||
Ok(count) => {
|
||||
result.stale_locks_removed = count;
|
||||
if count > 0 {
|
||||
tracing::info!("Removed {} stale locks", count);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
result.errors.push(format!("Lock cleanup failed: {}", e));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
219
rust/src/repository.rs
Normal file
219
rust/src/repository.rs
Normal file
@@ -0,0 +1,219 @@
|
||||
/// Repository management: init, open, close.
|
||||
///
|
||||
/// A repository is a directory with a specific structure containing config,
|
||||
/// packs, snapshots, index, locks, and keys.
|
||||
|
||||
use std::path::Path;
|
||||
use crate::config::{RepositoryConfig, EncryptionConfig, KeyFile, KdfParams};
|
||||
use crate::encryption;
|
||||
use crate::error::ArchiveError;
|
||||
use crate::global_index::GlobalIndex;
|
||||
use crate::lock;
|
||||
|
||||
pub struct Repository {
|
||||
pub path: String,
|
||||
pub config: RepositoryConfig,
|
||||
pub index: GlobalIndex,
|
||||
pub master_key: Option<[u8; 32]>,
|
||||
lock_id: Option<String>,
|
||||
}
|
||||
|
||||
impl Repository {
|
||||
/// Initialize a new repository at the given path.
|
||||
pub async fn init(repo_path: &str, passphrase: Option<&str>) -> Result<Self, ArchiveError> {
|
||||
let path = Path::new(repo_path);
|
||||
|
||||
// Check if repo already exists
|
||||
let config_path = path.join("config.json");
|
||||
if config_path.exists() {
|
||||
return Err(ArchiveError::Config("Repository already exists at this path".to_string()));
|
||||
}
|
||||
|
||||
// Create directory structure
|
||||
tokio::fs::create_dir_all(path).await?;
|
||||
for dir in &["packs/data", "packs/parity", "snapshots", "index", "locks", "keys"] {
|
||||
tokio::fs::create_dir_all(path.join(dir)).await?;
|
||||
}
|
||||
|
||||
// Create config
|
||||
let encryption_config = if passphrase.is_some() {
|
||||
Some(EncryptionConfig::default())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let config = RepositoryConfig::new(encryption_config);
|
||||
let config_json = serde_json::to_string_pretty(&config)?;
|
||||
tokio::fs::write(&config_path, config_json).await?;
|
||||
|
||||
// Set up encryption if passphrase provided
|
||||
let master_key = if let Some(pass) = passphrase {
|
||||
let master = encryption::generate_master_key();
|
||||
let salt = encryption::generate_salt();
|
||||
let kdf_params = KdfParams::default();
|
||||
let kek = encryption::derive_key(pass, &salt, &kdf_params)?;
|
||||
let wrapped = encryption::wrap_key(&master, &kek)?;
|
||||
|
||||
let key_id = uuid::Uuid::new_v4().to_string();
|
||||
let key_file = KeyFile {
|
||||
id: key_id.clone(),
|
||||
created_at: chrono::Utc::now().to_rfc3339(),
|
||||
kdf: "argon2id".to_string(),
|
||||
kdf_salt: hex::encode(salt),
|
||||
kdf_params,
|
||||
encrypted_key: hex::encode(&wrapped.encrypted_key),
|
||||
nonce: hex::encode(wrapped.nonce),
|
||||
auth_tag: String::new(), // tag is appended to encrypted_key in AES-GCM
|
||||
};
|
||||
|
||||
let key_path = path.join("keys").join(format!("{}.key", key_id));
|
||||
let json = serde_json::to_string_pretty(&key_file)?;
|
||||
tokio::fs::write(&key_path, json).await?;
|
||||
|
||||
Some(master)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
tracing::info!("Initialized repository at {}", repo_path);
|
||||
|
||||
Ok(Self {
|
||||
path: repo_path.to_string(),
|
||||
config,
|
||||
index: GlobalIndex::new(),
|
||||
master_key,
|
||||
lock_id: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open an existing repository.
|
||||
pub async fn open(repo_path: &str, passphrase: Option<&str>) -> Result<Self, ArchiveError> {
|
||||
let path = Path::new(repo_path);
|
||||
let config_path = path.join("config.json");
|
||||
|
||||
if !config_path.exists() {
|
||||
return Err(ArchiveError::InvalidRepo(format!(
|
||||
"No config.json found at {}",
|
||||
repo_path
|
||||
)));
|
||||
}
|
||||
|
||||
// Load config
|
||||
let config_data = tokio::fs::read_to_string(&config_path).await?;
|
||||
let config: RepositoryConfig = serde_json::from_str(&config_data)
|
||||
.map_err(|e| ArchiveError::Config(format!("Failed to parse config: {}", e)))?;
|
||||
|
||||
// Break stale locks
|
||||
let broken = lock::check_and_break_stale(repo_path).await?;
|
||||
if broken > 0 {
|
||||
tracing::warn!("Broke {} stale lock(s)", broken);
|
||||
}
|
||||
|
||||
// Load encryption key if encrypted
|
||||
let master_key = if config.encryption.is_some() {
|
||||
let pass = passphrase.ok_or_else(|| {
|
||||
ArchiveError::Encryption("Repository is encrypted but no passphrase provided".to_string())
|
||||
})?;
|
||||
|
||||
let key = load_master_key(repo_path, pass).await?;
|
||||
Some(key)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Load global index
|
||||
let index = GlobalIndex::load(repo_path).await?;
|
||||
|
||||
tracing::info!("Opened repository at {} ({} indexed chunks)", repo_path, index.len());
|
||||
|
||||
Ok(Self {
|
||||
path: repo_path.to_string(),
|
||||
config,
|
||||
index,
|
||||
master_key,
|
||||
lock_id: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Acquire a write lock.
|
||||
pub async fn acquire_lock(&mut self, operation: &str) -> Result<(), ArchiveError> {
|
||||
let entry = lock::acquire(&self.path, operation).await?;
|
||||
self.lock_id = Some(entry.lock_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Release the write lock.
|
||||
pub async fn release_lock(&mut self) -> Result<(), ArchiveError> {
|
||||
if let Some(ref lock_id) = self.lock_id {
|
||||
lock::release(&self.path, lock_id).await?;
|
||||
self.lock_id = None;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Rebuild the global index from pack .idx files.
|
||||
pub async fn reindex(&mut self) -> Result<usize, ArchiveError> {
|
||||
self.index = GlobalIndex::rebuild_from_packs(&self.path).await?;
|
||||
let count = self.index.len();
|
||||
|
||||
// Save as a single compacted segment
|
||||
self.index.compact(&self.path).await?;
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
/// Close the repository: save index, release lock.
|
||||
pub async fn close(mut self) -> Result<(), ArchiveError> {
|
||||
self.index.save_segment(&self.path).await?;
|
||||
self.release_lock().await?;
|
||||
tracing::info!("Closed repository at {}", self.path);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Load the master key from the first key file found in keys/.
|
||||
async fn load_master_key(repo_path: &str, passphrase: &str) -> Result<[u8; 32], ArchiveError> {
|
||||
let keys_dir = Path::new(repo_path).join("keys");
|
||||
let mut dir = tokio::fs::read_dir(&keys_dir).await?;
|
||||
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|e| e.to_str()) != Some("key") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let data = tokio::fs::read_to_string(&path).await?;
|
||||
let key_file: KeyFile = serde_json::from_str(&data)
|
||||
.map_err(|e| ArchiveError::Encryption(format!("Failed to parse key file: {}", e)))?;
|
||||
|
||||
let salt = hex::decode(&key_file.kdf_salt)
|
||||
.map_err(|e| ArchiveError::Encryption(format!("Invalid salt: {}", e)))?;
|
||||
|
||||
let kek = encryption::derive_key(passphrase, &salt, &key_file.kdf_params)?;
|
||||
|
||||
let encrypted_key = hex::decode(&key_file.encrypted_key)
|
||||
.map_err(|e| ArchiveError::Encryption(format!("Invalid encrypted key: {}", e)))?;
|
||||
let nonce_bytes = hex::decode(&key_file.nonce)
|
||||
.map_err(|e| ArchiveError::Encryption(format!("Invalid nonce: {}", e)))?;
|
||||
|
||||
let mut nonce = [0u8; 12];
|
||||
if nonce_bytes.len() >= 12 {
|
||||
nonce.copy_from_slice(&nonce_bytes[..12]);
|
||||
}
|
||||
|
||||
let wrapped = encryption::WrappedKey {
|
||||
encrypted_key,
|
||||
nonce,
|
||||
};
|
||||
|
||||
match encryption::unwrap_key(&wrapped, &kek) {
|
||||
Ok(master) => return Ok(master),
|
||||
Err(_) => {
|
||||
// Wrong passphrase for this key file — try next
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(ArchiveError::Encryption("No key file could be unlocked with the provided passphrase".to_string()))
|
||||
}
|
||||
131
rust/src/restore.rs
Normal file
131
rust/src/restore.rs
Normal file
@@ -0,0 +1,131 @@
|
||||
/// Restore pipeline: reads a snapshot manifest, looks up chunks in the global
|
||||
/// index, reads from pack files, decrypts, decompresses, and writes to a Unix socket.
|
||||
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::net::UnixStream;
|
||||
|
||||
use crate::compression;
|
||||
use crate::encryption;
|
||||
use crate::error::ArchiveError;
|
||||
use crate::hasher;
|
||||
use crate::pack_reader;
|
||||
use crate::repository::Repository;
|
||||
use crate::snapshot;
|
||||
|
||||
/// Restore a snapshot (or a specific item) to a Unix socket.
|
||||
pub async fn restore(
|
||||
repo: &Repository,
|
||||
snapshot_id: &str,
|
||||
socket_path: &str,
|
||||
item_name: Option<&str>,
|
||||
) -> Result<(), ArchiveError> {
|
||||
// Load snapshot manifest
|
||||
let snap = snapshot::load_snapshot(&repo.path, snapshot_id).await?;
|
||||
|
||||
// Determine which items to restore
|
||||
let items_to_restore: Vec<&snapshot::SnapshotItem> = if let Some(name) = item_name {
|
||||
snap.items.iter()
|
||||
.filter(|i| i.name == name)
|
||||
.collect()
|
||||
} else {
|
||||
snap.items.iter().collect()
|
||||
};
|
||||
|
||||
if items_to_restore.is_empty() {
|
||||
return Err(ArchiveError::NotFound(format!(
|
||||
"No items found in snapshot {}{}",
|
||||
snapshot_id,
|
||||
item_name.map(|n| format!(" with name '{}'", n)).unwrap_or_default()
|
||||
)));
|
||||
}
|
||||
|
||||
// Connect to the Unix socket where TypeScript will read the restored data
|
||||
let mut stream = UnixStream::connect(socket_path).await
|
||||
.map_err(|e| ArchiveError::Io(e))?;
|
||||
|
||||
tracing::info!("Connected to restore socket: {}", socket_path);
|
||||
|
||||
let mut restored_bytes: u64 = 0;
|
||||
let mut chunks_read: u64 = 0;
|
||||
|
||||
for item in items_to_restore {
|
||||
for hash_hex in &item.chunks {
|
||||
// Look up chunk in global index
|
||||
let index_entry = repo.index.get(hash_hex)
|
||||
.ok_or_else(|| ArchiveError::NotFound(format!(
|
||||
"Chunk {} not found in index", hash_hex
|
||||
)))?;
|
||||
|
||||
// Determine pack file path
|
||||
let shard = &index_entry.pack_id[..2];
|
||||
let pack_path = std::path::Path::new(&repo.path)
|
||||
.join("packs")
|
||||
.join("data")
|
||||
.join(shard)
|
||||
.join(format!("{}.pack", index_entry.pack_id));
|
||||
|
||||
// Read chunk data from pack
|
||||
let stored_data = pack_reader::read_chunk(
|
||||
&pack_path,
|
||||
index_entry.offset,
|
||||
index_entry.compressed_size,
|
||||
).await?;
|
||||
|
||||
// Decrypt if encrypted
|
||||
let compressed = if let Some(ref key) = repo.master_key {
|
||||
// We need the nonce. Read it from the IDX file.
|
||||
let idx_path = std::path::Path::new(&repo.path)
|
||||
.join("packs")
|
||||
.join("data")
|
||||
.join(shard)
|
||||
.join(format!("{}.idx", index_entry.pack_id));
|
||||
|
||||
let entries = pack_reader::load_idx(&idx_path).await?;
|
||||
let hash_bytes = hasher::hex_to_hash(hash_hex)
|
||||
.map_err(|_| ArchiveError::Corruption(format!("Invalid hash: {}", hash_hex)))?;
|
||||
|
||||
let idx_entry = pack_reader::find_in_idx(&entries, &hash_bytes)
|
||||
.ok_or_else(|| ArchiveError::NotFound(format!(
|
||||
"Chunk {} not found in pack index {}", hash_hex, index_entry.pack_id
|
||||
)))?;
|
||||
|
||||
encryption::decrypt_chunk(&stored_data, key, &idx_entry.nonce)?
|
||||
} else {
|
||||
stored_data
|
||||
};
|
||||
|
||||
// Decompress
|
||||
let plaintext = compression::decompress(&compressed)?;
|
||||
|
||||
// Verify hash
|
||||
let actual_hash = hasher::hash_chunk(&plaintext);
|
||||
let expected_hash = hasher::hex_to_hash(hash_hex)
|
||||
.map_err(|_| ArchiveError::Corruption(format!("Invalid hash: {}", hash_hex)))?;
|
||||
|
||||
if actual_hash != expected_hash {
|
||||
return Err(ArchiveError::Corruption(format!(
|
||||
"Hash mismatch for chunk {}: expected {}, got {}",
|
||||
hash_hex,
|
||||
hash_hex,
|
||||
hasher::hash_to_hex(&actual_hash)
|
||||
)));
|
||||
}
|
||||
|
||||
// Write to output socket
|
||||
stream.write_all(&plaintext).await?;
|
||||
|
||||
restored_bytes += plaintext.len() as u64;
|
||||
chunks_read += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Close the write side
|
||||
stream.shutdown().await?;
|
||||
|
||||
tracing::info!(
|
||||
"Restore complete: {} bytes, {} chunks from snapshot {}",
|
||||
restored_bytes, chunks_read, snapshot_id
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
195
rust/src/snapshot.rs
Normal file
195
rust/src/snapshot.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
/// Snapshot manifest management.
|
||||
///
|
||||
/// A snapshot represents one complete backup operation, recording all metadata
|
||||
/// needed to reconstruct the original data.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::error::ArchiveError;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Snapshot {
|
||||
pub id: String,
|
||||
pub version: u32,
|
||||
pub created_at: String,
|
||||
pub tags: HashMap<String, String>,
|
||||
pub original_size: u64,
|
||||
pub stored_size: u64,
|
||||
pub chunk_count: u64,
|
||||
pub new_chunks: u64,
|
||||
pub reused_chunks: u64,
|
||||
pub items: Vec<SnapshotItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SnapshotItem {
|
||||
pub name: String,
|
||||
#[serde(rename = "type")]
|
||||
pub item_type: String,
|
||||
pub size: u64,
|
||||
pub chunks: Vec<String>, // hex-encoded SHA-256 hashes
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SnapshotFilter {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tags: Option<HashMap<String, String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub after: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub before: Option<String>,
|
||||
}
|
||||
|
||||
impl Snapshot {
|
||||
/// Create a new snapshot with auto-generated ID.
|
||||
pub fn new(
|
||||
items: Vec<SnapshotItem>,
|
||||
tags: HashMap<String, String>,
|
||||
original_size: u64,
|
||||
stored_size: u64,
|
||||
new_chunks: u64,
|
||||
reused_chunks: u64,
|
||||
) -> Self {
|
||||
let now = chrono::Utc::now();
|
||||
let short_id = &uuid::Uuid::new_v4().to_string()[..8];
|
||||
let id = format!("{}-{}", now.format("%Y%m%dT%H%M%SZ"), short_id);
|
||||
let chunk_count = items.iter().map(|i| i.chunks.len() as u64).sum();
|
||||
|
||||
Self {
|
||||
id,
|
||||
version: 1,
|
||||
created_at: now.to_rfc3339(),
|
||||
tags,
|
||||
original_size,
|
||||
stored_size,
|
||||
chunk_count,
|
||||
new_chunks,
|
||||
reused_chunks,
|
||||
items,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Save a snapshot manifest to disk.
|
||||
pub async fn save_snapshot(repo_path: &str, snapshot: &Snapshot) -> Result<(), ArchiveError> {
|
||||
let snapshots_dir = Path::new(repo_path).join("snapshots");
|
||||
tokio::fs::create_dir_all(&snapshots_dir).await?;
|
||||
|
||||
let path = snapshots_dir.join(format!("{}.json", snapshot.id));
|
||||
let tmp_path = snapshots_dir.join(format!("{}.json.tmp", snapshot.id));
|
||||
let json = serde_json::to_string_pretty(snapshot)?;
|
||||
|
||||
tokio::fs::write(&tmp_path, json).await?;
|
||||
tokio::fs::rename(&tmp_path, &path).await?;
|
||||
|
||||
tracing::info!("Saved snapshot {} ({} chunks, {} new)", snapshot.id, snapshot.chunk_count, snapshot.new_chunks);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load a snapshot manifest from disk.
|
||||
pub async fn load_snapshot(repo_path: &str, snapshot_id: &str) -> Result<Snapshot, ArchiveError> {
|
||||
let path = Path::new(repo_path).join("snapshots").join(format!("{}.json", snapshot_id));
|
||||
if !path.exists() {
|
||||
return Err(ArchiveError::NotFound(format!("Snapshot not found: {}", snapshot_id)));
|
||||
}
|
||||
|
||||
let data = tokio::fs::read_to_string(&path).await?;
|
||||
let snapshot: Snapshot = serde_json::from_str(&data)
|
||||
.map_err(|e| ArchiveError::Corruption(format!("Failed to parse snapshot {}: {}", snapshot_id, e)))?;
|
||||
|
||||
Ok(snapshot)
|
||||
}
|
||||
|
||||
/// List all snapshots, optionally filtered.
|
||||
pub async fn list_snapshots(repo_path: &str, filter: Option<&SnapshotFilter>) -> Result<Vec<Snapshot>, ArchiveError> {
|
||||
let snapshots_dir = Path::new(repo_path).join("snapshots");
|
||||
if !snapshots_dir.exists() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut snapshots = Vec::new();
|
||||
let mut dir = tokio::fs::read_dir(&snapshots_dir).await?;
|
||||
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|e| e.to_str()) != Some("json") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let data = match tokio::fs::read_to_string(&path).await {
|
||||
Ok(d) => d,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let snapshot: Snapshot = match serde_json::from_str(&data) {
|
||||
Ok(s) => s,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if let Some(f) = filter {
|
||||
if !matches_filter(&snapshot, f) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
snapshots.push(snapshot);
|
||||
}
|
||||
|
||||
// Sort by creation time (newest first)
|
||||
snapshots.sort_by(|a, b| b.created_at.cmp(&a.created_at));
|
||||
|
||||
Ok(snapshots)
|
||||
}
|
||||
|
||||
/// Delete a snapshot manifest.
|
||||
pub async fn delete_snapshot(repo_path: &str, snapshot_id: &str) -> Result<(), ArchiveError> {
|
||||
let path = Path::new(repo_path).join("snapshots").join(format!("{}.json", snapshot_id));
|
||||
if path.exists() {
|
||||
tokio::fs::remove_file(&path).await?;
|
||||
tracing::info!("Deleted snapshot {}", snapshot_id);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get all chunk hashes referenced by a list of snapshots.
|
||||
pub fn referenced_chunks(snapshots: &[Snapshot]) -> std::collections::HashSet<String> {
|
||||
let mut hashes = std::collections::HashSet::new();
|
||||
for snapshot in snapshots {
|
||||
for item in &snapshot.items {
|
||||
for hash in &item.chunks {
|
||||
hashes.insert(hash.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
hashes
|
||||
}
|
||||
|
||||
fn matches_filter(snapshot: &Snapshot, filter: &SnapshotFilter) -> bool {
|
||||
// Check tag filter
|
||||
if let Some(ref tags) = filter.tags {
|
||||
for (key, value) in tags {
|
||||
match snapshot.tags.get(key) {
|
||||
Some(v) if v == value => {},
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check date range
|
||||
if let Some(ref after) = filter.after {
|
||||
if snapshot.created_at < *after {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if let Some(ref before) = filter.before {
|
||||
if snapshot.created_at > *before {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
290
rust/src/verify.rs
Normal file
290
rust/src/verify.rs
Normal file
@@ -0,0 +1,290 @@
|
||||
/// Repository verification at three levels.
|
||||
|
||||
use std::path::Path;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::compression;
|
||||
use crate::encryption;
|
||||
use crate::error::ArchiveError;
|
||||
use crate::hasher;
|
||||
use crate::pack_reader;
|
||||
use crate::pack_writer::FLAG_ENCRYPTED;
|
||||
use crate::repository::Repository;
|
||||
use crate::snapshot;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct VerifyResult {
|
||||
pub ok: bool,
|
||||
pub errors: Vec<VerifyError>,
|
||||
pub stats: VerifyStats,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct VerifyError {
|
||||
pub pack: Option<String>,
|
||||
pub chunk: Option<String>,
|
||||
pub snapshot: Option<String>,
|
||||
pub error: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct VerifyStats {
|
||||
pub packs_checked: u64,
|
||||
pub chunks_checked: u64,
|
||||
pub snapshots_checked: u64,
|
||||
}
|
||||
|
||||
/// Verify repository integrity at the given level.
|
||||
pub async fn verify(repo: &Repository, level: &str) -> Result<VerifyResult, ArchiveError> {
|
||||
let mut errors = Vec::new();
|
||||
let mut stats = VerifyStats {
|
||||
packs_checked: 0,
|
||||
chunks_checked: 0,
|
||||
snapshots_checked: 0,
|
||||
};
|
||||
|
||||
// Level 1: Quick — check snapshots and index consistency
|
||||
verify_snapshots(repo, &mut errors, &mut stats).await?;
|
||||
|
||||
if level == "standard" || level == "full" {
|
||||
// Level 2: Standard — check pack headers
|
||||
verify_pack_headers(repo, &mut errors, &mut stats).await?;
|
||||
}
|
||||
|
||||
if level == "full" {
|
||||
// Level 3: Full — read and verify every chunk
|
||||
verify_all_chunks(repo, &mut errors, &mut stats).await?;
|
||||
}
|
||||
|
||||
Ok(VerifyResult {
|
||||
ok: errors.is_empty(),
|
||||
errors,
|
||||
stats,
|
||||
})
|
||||
}
|
||||
|
||||
/// Quick verification: check that all chunk hashes in snapshots exist in the index.
|
||||
async fn verify_snapshots(
|
||||
repo: &Repository,
|
||||
errors: &mut Vec<VerifyError>,
|
||||
stats: &mut VerifyStats,
|
||||
) -> Result<(), ArchiveError> {
|
||||
let snapshots = snapshot::list_snapshots(&repo.path, None).await?;
|
||||
|
||||
for snap in &snapshots {
|
||||
stats.snapshots_checked += 1;
|
||||
|
||||
for item in &snap.items {
|
||||
for hash_hex in &item.chunks {
|
||||
if !repo.index.has(hash_hex) {
|
||||
errors.push(VerifyError {
|
||||
pack: None,
|
||||
chunk: Some(hash_hex.clone()),
|
||||
snapshot: Some(snap.id.clone()),
|
||||
error: format!("Chunk {} referenced by snapshot {} not found in index", hash_hex, snap.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Standard verification: check pack file headers.
|
||||
async fn verify_pack_headers(
|
||||
repo: &Repository,
|
||||
errors: &mut Vec<VerifyError>,
|
||||
stats: &mut VerifyStats,
|
||||
) -> Result<(), ArchiveError> {
|
||||
let packs_dir = Path::new(&repo.path).join("packs").join("data");
|
||||
if !packs_dir.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let pack_files = find_pack_files(&packs_dir).await?;
|
||||
|
||||
for pack_path in &pack_files {
|
||||
stats.packs_checked += 1;
|
||||
|
||||
match pack_reader::verify_pack_header(pack_path).await {
|
||||
Ok(true) => {},
|
||||
Ok(false) => {
|
||||
let pack_name = pack_path.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
errors.push(VerifyError {
|
||||
pack: Some(pack_name),
|
||||
chunk: None,
|
||||
snapshot: None,
|
||||
error: "Pack header verification failed (bad magic, version, or checksum)".to_string(),
|
||||
});
|
||||
},
|
||||
Err(e) => {
|
||||
let pack_name = pack_path.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
errors.push(VerifyError {
|
||||
pack: Some(pack_name),
|
||||
chunk: None,
|
||||
snapshot: None,
|
||||
error: format!("Failed to read pack: {}", e),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Full verification: read every chunk, decompress, and verify hash.
|
||||
async fn verify_all_chunks(
|
||||
repo: &Repository,
|
||||
errors: &mut Vec<VerifyError>,
|
||||
stats: &mut VerifyStats,
|
||||
) -> Result<(), ArchiveError> {
|
||||
let packs_dir = Path::new(&repo.path).join("packs").join("data");
|
||||
if !packs_dir.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let idx_files = find_idx_files(&packs_dir).await?;
|
||||
|
||||
for idx_path in &idx_files {
|
||||
let entries = match pack_reader::load_idx(idx_path).await {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
errors.push(VerifyError {
|
||||
pack: None,
|
||||
chunk: None,
|
||||
snapshot: None,
|
||||
error: format!("Failed to load idx {}: {}", idx_path.display(), e),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let pack_path = idx_path.with_extension("pack");
|
||||
if !pack_path.exists() {
|
||||
let pack_name = idx_path.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
errors.push(VerifyError {
|
||||
pack: Some(pack_name),
|
||||
chunk: None,
|
||||
snapshot: None,
|
||||
error: "Pack file missing for existing idx file".to_string(),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
for entry in &entries {
|
||||
stats.chunks_checked += 1;
|
||||
let hash_hex = hasher::hash_to_hex(&entry.content_hash);
|
||||
|
||||
// Read chunk from pack
|
||||
let stored_data = match pack_reader::read_chunk(&pack_path, entry.offset, entry.compressed_size).await {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
errors.push(VerifyError {
|
||||
pack: None,
|
||||
chunk: Some(hash_hex),
|
||||
snapshot: None,
|
||||
error: format!("Failed to read chunk: {}", e),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Decrypt if encrypted
|
||||
let compressed = if entry.flags & FLAG_ENCRYPTED != 0 {
|
||||
if let Some(ref key) = repo.master_key {
|
||||
match encryption::decrypt_chunk(&stored_data, key, &entry.nonce) {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
errors.push(VerifyError {
|
||||
pack: None,
|
||||
chunk: Some(hash_hex),
|
||||
snapshot: None,
|
||||
error: format!("Decryption failed: {}", e),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
errors.push(VerifyError {
|
||||
pack: None,
|
||||
chunk: Some(hash_hex),
|
||||
snapshot: None,
|
||||
error: "Chunk is encrypted but no key available".to_string(),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
stored_data
|
||||
};
|
||||
|
||||
// Decompress
|
||||
let plaintext = match compression::decompress(&compressed) {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
errors.push(VerifyError {
|
||||
pack: None,
|
||||
chunk: Some(hash_hex),
|
||||
snapshot: None,
|
||||
error: format!("Decompression failed: {}", e),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Verify hash
|
||||
if !hasher::verify_hash(&plaintext, &entry.content_hash) {
|
||||
errors.push(VerifyError {
|
||||
pack: None,
|
||||
chunk: Some(hash_hex),
|
||||
snapshot: None,
|
||||
error: "Hash mismatch after decompress/decrypt".to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn find_pack_files(dir: &Path) -> Result<Vec<std::path::PathBuf>, ArchiveError> {
|
||||
find_files_by_extension(dir, "pack").await
|
||||
}
|
||||
|
||||
async fn find_idx_files(dir: &Path) -> Result<Vec<std::path::PathBuf>, ArchiveError> {
|
||||
find_files_by_extension(dir, "idx").await
|
||||
}
|
||||
|
||||
async fn find_files_by_extension(dir: &Path, ext: &str) -> Result<Vec<std::path::PathBuf>, ArchiveError> {
|
||||
let mut files = Vec::new();
|
||||
let mut stack = vec![dir.to_path_buf()];
|
||||
|
||||
while let Some(current) = stack.pop() {
|
||||
if !current.exists() {
|
||||
continue;
|
||||
}
|
||||
let mut entries = tokio::fs::read_dir(¤t).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
stack.push(path);
|
||||
} else if path.extension().and_then(|e| e.to_str()) == Some(ext) {
|
||||
files.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
215
test/test.ts
Normal file
215
test/test.ts
Normal file
@@ -0,0 +1,215 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as path from 'node:path';
|
||||
import * as fs from 'node:fs';
|
||||
import * as stream from 'node:stream';
|
||||
import { ContainerArchive } from '../ts/index.js';
|
||||
|
||||
const testRepoPath = path.resolve('.nogit/test-repo');
|
||||
const testRepoEncryptedPath = path.resolve('.nogit/test-repo-encrypted');
|
||||
|
||||
// Clean up test directories before tests
|
||||
tap.preTask('cleanup test directories', async () => {
|
||||
for (const p of [testRepoPath, testRepoEncryptedPath]) {
|
||||
if (fs.existsSync(p)) {
|
||||
fs.rmSync(p, { recursive: true });
|
||||
}
|
||||
}
|
||||
fs.mkdirSync('.nogit', { recursive: true });
|
||||
});
|
||||
|
||||
// ==================== Basic Repository Lifecycle ====================
|
||||
|
||||
let repo: ContainerArchive;
|
||||
|
||||
tap.test('should initialize a new repository', async () => {
|
||||
repo = await ContainerArchive.init(testRepoPath);
|
||||
expect(repo).toBeTruthy();
|
||||
|
||||
// Verify directory structure was created
|
||||
expect(fs.existsSync(path.join(testRepoPath, 'config.json'))).toBeTrue();
|
||||
expect(fs.existsSync(path.join(testRepoPath, 'packs', 'data'))).toBeTrue();
|
||||
expect(fs.existsSync(path.join(testRepoPath, 'snapshots'))).toBeTrue();
|
||||
expect(fs.existsSync(path.join(testRepoPath, 'index'))).toBeTrue();
|
||||
});
|
||||
|
||||
// ==================== Ingest ====================
|
||||
|
||||
tap.test('should ingest data and create a snapshot', async () => {
|
||||
// Create a 512KB buffer with deterministic content
|
||||
const testData = Buffer.alloc(512 * 1024);
|
||||
for (let i = 0; i < testData.length; i++) {
|
||||
testData[i] = i % 256;
|
||||
}
|
||||
|
||||
const inputStream = stream.Readable.from(testData);
|
||||
const snapshot = await repo.ingest(inputStream, {
|
||||
tags: { service: 'test', type: 'unit-test' },
|
||||
items: [{ name: 'test-data.bin', type: 'binary' }],
|
||||
});
|
||||
|
||||
expect(snapshot).toBeTruthy();
|
||||
expect(snapshot.id).toBeTruthy();
|
||||
expect(snapshot.originalSize).toEqual(512 * 1024);
|
||||
expect(snapshot.newChunks).toBeGreaterThan(0);
|
||||
expect(snapshot.items.length).toEqual(1);
|
||||
expect(snapshot.items[0].name).toEqual('test-data.bin');
|
||||
});
|
||||
|
||||
// ==================== Dedup ====================
|
||||
|
||||
tap.test('should deduplicate on second ingest of same data', async () => {
|
||||
// Ingest the exact same data again
|
||||
const testData = Buffer.alloc(512 * 1024);
|
||||
for (let i = 0; i < testData.length; i++) {
|
||||
testData[i] = i % 256;
|
||||
}
|
||||
|
||||
const inputStream = stream.Readable.from(testData);
|
||||
const snapshot = await repo.ingest(inputStream, {
|
||||
tags: { service: 'test', type: 'dedup-test' },
|
||||
items: [{ name: 'test-data-dup.bin', type: 'binary' }],
|
||||
});
|
||||
|
||||
expect(snapshot).toBeTruthy();
|
||||
expect(snapshot.newChunks).toEqual(0);
|
||||
expect(snapshot.reusedChunks).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
// ==================== List Snapshots ====================
|
||||
|
||||
tap.test('should list snapshots', async () => {
|
||||
const snapshots = await repo.listSnapshots();
|
||||
expect(snapshots.length).toEqual(2);
|
||||
});
|
||||
|
||||
tap.test('should filter snapshots by tag', async () => {
|
||||
const snapshots = await repo.listSnapshots({
|
||||
tags: { type: 'dedup-test' },
|
||||
});
|
||||
expect(snapshots.length).toEqual(1);
|
||||
expect(snapshots[0].tags.type).toEqual('dedup-test');
|
||||
});
|
||||
|
||||
// ==================== Restore ====================
|
||||
|
||||
tap.test('should restore data byte-for-byte', async () => {
|
||||
const snapshots = await repo.listSnapshots();
|
||||
const snapshotId = snapshots[snapshots.length - 1].id; // oldest
|
||||
|
||||
const restoreStream = await repo.restore(snapshotId);
|
||||
const chunks: Buffer[] = [];
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
restoreStream.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
restoreStream.on('end', resolve);
|
||||
restoreStream.on('error', reject);
|
||||
});
|
||||
|
||||
const restored = Buffer.concat(chunks);
|
||||
|
||||
// Create expected data
|
||||
const expected = Buffer.alloc(512 * 1024);
|
||||
for (let i = 0; i < expected.length; i++) {
|
||||
expected[i] = i % 256;
|
||||
}
|
||||
|
||||
expect(restored.length).toEqual(expected.length);
|
||||
expect(restored.equals(expected)).toBeTrue();
|
||||
});
|
||||
|
||||
// ==================== Verify ====================
|
||||
|
||||
tap.test('should verify repository at quick level', async () => {
|
||||
const result = await repo.verify({ level: 'quick' });
|
||||
expect(result.ok).toBeTrue();
|
||||
expect(result.errors.length).toEqual(0);
|
||||
});
|
||||
|
||||
tap.test('should verify repository at standard level', async () => {
|
||||
const result = await repo.verify({ level: 'standard' });
|
||||
expect(result.ok).toBeTrue();
|
||||
});
|
||||
|
||||
tap.test('should verify repository at full level', async () => {
|
||||
const result = await repo.verify({ level: 'full' });
|
||||
expect(result.ok).toBeTrue();
|
||||
expect(result.stats.chunksChecked).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
// ==================== Prune ====================
|
||||
|
||||
tap.test('should prune with keepLast=1', async () => {
|
||||
const result = await repo.prune({ keepLast: 1 });
|
||||
expect(result.removedSnapshots).toEqual(1);
|
||||
expect(result.dryRun).toBeFalse();
|
||||
|
||||
// Verify only 1 snapshot remains
|
||||
const snapshots = await repo.listSnapshots();
|
||||
expect(snapshots.length).toEqual(1);
|
||||
});
|
||||
|
||||
// ==================== Close ====================
|
||||
|
||||
tap.test('should close repository', async () => {
|
||||
await repo.close();
|
||||
});
|
||||
|
||||
// ==================== Reopen ====================
|
||||
|
||||
tap.test('should reopen repository', async () => {
|
||||
repo = await ContainerArchive.open(testRepoPath);
|
||||
const snapshots = await repo.listSnapshots();
|
||||
expect(snapshots.length).toEqual(1);
|
||||
await repo.close();
|
||||
});
|
||||
|
||||
// ==================== Encrypted Repository ====================
|
||||
|
||||
tap.test('should create and use encrypted repository', async () => {
|
||||
const encRepo = await ContainerArchive.init(testRepoEncryptedPath, {
|
||||
passphrase: 'test-password-123',
|
||||
});
|
||||
|
||||
// Verify key file was created
|
||||
const keysDir = path.join(testRepoEncryptedPath, 'keys');
|
||||
const keyFiles = fs.readdirSync(keysDir).filter((f: string) => f.endsWith('.key'));
|
||||
expect(keyFiles.length).toEqual(1);
|
||||
|
||||
// Ingest data
|
||||
const testData = Buffer.alloc(128 * 1024, 'encrypted-test-data');
|
||||
const inputStream = stream.Readable.from(testData);
|
||||
const snapshot = await encRepo.ingest(inputStream, {
|
||||
tags: { encrypted: 'true' },
|
||||
items: [{ name: 'secret.bin' }],
|
||||
});
|
||||
|
||||
expect(snapshot.newChunks).toBeGreaterThan(0);
|
||||
|
||||
// Restore and verify
|
||||
const restoreStream = await encRepo.restore(snapshot.id);
|
||||
const chunks: Buffer[] = [];
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
restoreStream.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
restoreStream.on('end', resolve);
|
||||
restoreStream.on('error', reject);
|
||||
});
|
||||
|
||||
const restored = Buffer.concat(chunks);
|
||||
expect(restored.length).toEqual(testData.length);
|
||||
expect(restored.equals(testData)).toBeTrue();
|
||||
|
||||
await encRepo.close();
|
||||
});
|
||||
|
||||
tap.test('should open encrypted repository with correct passphrase', async () => {
|
||||
const encRepo = await ContainerArchive.open(testRepoEncryptedPath, {
|
||||
passphrase: 'test-password-123',
|
||||
});
|
||||
|
||||
const snapshots = await encRepo.listSnapshots();
|
||||
expect(snapshots.length).toEqual(1);
|
||||
|
||||
await encRepo.close();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
8
ts/00_commitinfo_data.ts
Normal file
8
ts/00_commitinfo_data.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* autocreated commitinfo by @push.rocks/commitinfo
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/containerarchive',
|
||||
version: '0.0.1',
|
||||
description: 'content-addressed incremental backup engine with deduplication, encryption, and error correction',
|
||||
};
|
||||
373
ts/classes.containerarchive.ts
Normal file
373
ts/classes.containerarchive.ts
Normal file
@@ -0,0 +1,373 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import { commitinfo } from './00_commitinfo_data.js';
|
||||
import type {
|
||||
TContainerArchiveCommands,
|
||||
IInitOptions,
|
||||
IOpenOptions,
|
||||
IIngestOptions,
|
||||
IIngestItem,
|
||||
IIngestItemOptions,
|
||||
IRestoreOptions,
|
||||
ISnapshot,
|
||||
ISnapshotFilter,
|
||||
IVerifyOptions,
|
||||
IVerifyResult,
|
||||
IRetentionPolicy,
|
||||
IPruneResult,
|
||||
IRepairResult,
|
||||
IUnlockOptions,
|
||||
IIngestProgress,
|
||||
IIngestComplete,
|
||||
IVerifyError,
|
||||
IRepositoryConfig,
|
||||
} from './interfaces.js';
|
||||
|
||||
/**
|
||||
* Content-addressed incremental backup engine.
|
||||
*
|
||||
* Provides deduplicated, optionally encrypted, gzip-compressed storage
|
||||
* for arbitrary data streams with full snapshot history.
|
||||
*/
|
||||
export class ContainerArchive {
|
||||
private bridge: plugins.smartrust.RustBridge<TContainerArchiveCommands>;
|
||||
private repoPath: string;
|
||||
private spawned = false;
|
||||
|
||||
// Event subjects
|
||||
public ingestProgress = new plugins.smartrx.rxjs.Subject<IIngestProgress>();
|
||||
public ingestComplete = new plugins.smartrx.rxjs.Subject<IIngestComplete>();
|
||||
public verifyError = new plugins.smartrx.rxjs.Subject<IVerifyError>();
|
||||
|
||||
private constructor(repoPath: string) {
|
||||
this.repoPath = plugins.path.resolve(repoPath);
|
||||
|
||||
const packageDir = plugins.path.resolve(
|
||||
plugins.path.dirname(new URL(import.meta.url).pathname),
|
||||
'..',
|
||||
);
|
||||
|
||||
this.bridge = new plugins.smartrust.RustBridge<TContainerArchiveCommands>({
|
||||
binaryName: 'containerarchive',
|
||||
localPaths: [
|
||||
plugins.path.join(packageDir, 'dist_rust', 'containerarchive'),
|
||||
],
|
||||
readyTimeoutMs: 30000,
|
||||
requestTimeoutMs: 300000,
|
||||
});
|
||||
|
||||
// Listen for events from the Rust binary
|
||||
this.bridge.on('event', (event: { event: string; data: any }) => {
|
||||
if (event.event === 'progress') {
|
||||
this.ingestProgress.next(event.data);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private async ensureSpawned(): Promise<void> {
|
||||
if (!this.spawned) {
|
||||
await this.bridge.spawn();
|
||||
this.spawned = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a new repository at the given path.
|
||||
*/
|
||||
static async init(repoPath: string, options?: IInitOptions): Promise<ContainerArchive> {
|
||||
const instance = new ContainerArchive(repoPath);
|
||||
await instance.ensureSpawned();
|
||||
|
||||
await instance.bridge.sendCommand('init', {
|
||||
path: instance.repoPath,
|
||||
passphrase: options?.passphrase,
|
||||
});
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open an existing repository at the given path.
|
||||
*/
|
||||
static async open(repoPath: string, options?: IOpenOptions): Promise<ContainerArchive> {
|
||||
const instance = new ContainerArchive(repoPath);
|
||||
await instance.ensureSpawned();
|
||||
|
||||
await instance.bridge.sendCommand('open', {
|
||||
path: instance.repoPath,
|
||||
passphrase: options?.passphrase,
|
||||
});
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ingest a single data stream into the repository.
|
||||
*/
|
||||
async ingest(
|
||||
inputStream: NodeJS.ReadableStream,
|
||||
options?: IIngestOptions,
|
||||
): Promise<ISnapshot> {
|
||||
const socketPath = plugins.path.join(
|
||||
plugins.os.tmpdir(),
|
||||
`containerarchive-ingest-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`,
|
||||
);
|
||||
|
||||
// Create Unix socket server that Rust will connect to
|
||||
const { promise: dataTransferred, server } = await this.createSocketServer(
|
||||
socketPath,
|
||||
inputStream,
|
||||
);
|
||||
|
||||
try {
|
||||
// Send ingest command to Rust (Rust connects to our socket)
|
||||
const result = await this.bridge.sendCommand('ingest', {
|
||||
socketPath,
|
||||
tags: options?.tags,
|
||||
items: options?.items || [{ name: 'data', type: 'data' }],
|
||||
});
|
||||
|
||||
// Wait for data transfer to complete
|
||||
await dataTransferred;
|
||||
|
||||
const snapshot = result.snapshot;
|
||||
this.ingestComplete.next({
|
||||
snapshotId: snapshot.id,
|
||||
originalSize: snapshot.originalSize,
|
||||
storedSize: snapshot.storedSize,
|
||||
newChunks: snapshot.newChunks,
|
||||
reusedChunks: snapshot.reusedChunks,
|
||||
});
|
||||
|
||||
return snapshot;
|
||||
} finally {
|
||||
server.close();
|
||||
// Clean up socket file
|
||||
try {
|
||||
plugins.fs.unlinkSync(socketPath);
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ingest multiple data streams as a single multi-item snapshot.
|
||||
*/
|
||||
async ingestMulti(
|
||||
items: IIngestItem[],
|
||||
options?: IIngestOptions,
|
||||
): Promise<ISnapshot> {
|
||||
// For multi-item, we concatenate all streams into one socket
|
||||
// and pass item metadata so Rust can split them.
|
||||
// For now, we implement a simple sequential approach:
|
||||
// ingest first item only (multi-item will be enhanced later).
|
||||
if (items.length === 0) {
|
||||
throw new Error('At least one item is required');
|
||||
}
|
||||
|
||||
const firstItem = items[0];
|
||||
return this.ingest(firstItem.stream, {
|
||||
...options,
|
||||
items: items.map((i) => ({ name: i.name, type: i.type || 'data' })),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* List snapshots with optional filtering.
|
||||
*/
|
||||
async listSnapshots(filter?: ISnapshotFilter): Promise<ISnapshot[]> {
|
||||
const result = await this.bridge.sendCommand('listSnapshots', {
|
||||
filter,
|
||||
});
|
||||
return result.snapshots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get details of a specific snapshot.
|
||||
*/
|
||||
async getSnapshot(snapshotId: string): Promise<ISnapshot> {
|
||||
const result = await this.bridge.sendCommand('getSnapshot', {
|
||||
snapshotId,
|
||||
});
|
||||
return result.snapshot;
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore a snapshot to a ReadableStream.
|
||||
*/
|
||||
async restore(
|
||||
snapshotId: string,
|
||||
options?: IRestoreOptions,
|
||||
): Promise<NodeJS.ReadableStream> {
|
||||
const socketPath = plugins.path.join(
|
||||
plugins.os.tmpdir(),
|
||||
`containerarchive-restore-${Date.now()}-${Math.random().toString(36).slice(2)}.sock`,
|
||||
);
|
||||
|
||||
// Create Unix socket server that Rust will connect to and write data
|
||||
const { readable, server } = await this.createRestoreSocketServer(socketPath);
|
||||
|
||||
// Send restore command to Rust (Rust connects and writes data)
|
||||
// Don't await — let it run in parallel with reading
|
||||
this.bridge.sendCommand('restore', {
|
||||
snapshotId,
|
||||
socketPath,
|
||||
item: options?.item,
|
||||
}).catch((err) => {
|
||||
readable.destroy(err);
|
||||
}).finally(() => {
|
||||
server.close();
|
||||
try {
|
||||
plugins.fs.unlinkSync(socketPath);
|
||||
} catch {}
|
||||
});
|
||||
|
||||
return readable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify repository integrity.
|
||||
*/
|
||||
async verify(options?: IVerifyOptions): Promise<IVerifyResult> {
|
||||
const result = await this.bridge.sendCommand('verify', {
|
||||
level: options?.level || 'standard',
|
||||
});
|
||||
|
||||
for (const error of result.errors) {
|
||||
this.verifyError.next(error);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Repair repository (rebuild index, remove stale locks).
|
||||
*/
|
||||
async repair(): Promise<IRepairResult> {
|
||||
return this.bridge.sendCommand('repair', {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune old snapshots and garbage-collect unreferenced packs.
|
||||
*/
|
||||
async prune(retention: IRetentionPolicy, dryRun = false): Promise<IPruneResult> {
|
||||
return this.bridge.sendCommand('prune', {
|
||||
retention,
|
||||
dryRun,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild the global index from pack .idx files.
|
||||
*/
|
||||
async reindex(): Promise<void> {
|
||||
await this.bridge.sendCommand('reindex', {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove locks from the repository.
|
||||
*/
|
||||
async unlock(options?: IUnlockOptions): Promise<void> {
|
||||
await this.bridge.sendCommand('unlock', {
|
||||
force: options?.force,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to events.
|
||||
*/
|
||||
on(event: 'ingest:progress', handler: (data: IIngestProgress) => void): plugins.smartrx.rxjs.Subscription;
|
||||
on(event: 'ingest:complete', handler: (data: IIngestComplete) => void): plugins.smartrx.rxjs.Subscription;
|
||||
on(event: 'verify:error', handler: (data: IVerifyError) => void): plugins.smartrx.rxjs.Subscription;
|
||||
on(event: string, handler: (data: any) => void): plugins.smartrx.rxjs.Subscription {
|
||||
switch (event) {
|
||||
case 'ingest:progress':
|
||||
return this.ingestProgress.subscribe(handler);
|
||||
case 'ingest:complete':
|
||||
return this.ingestComplete.subscribe(handler);
|
||||
case 'verify:error':
|
||||
return this.verifyError.subscribe(handler);
|
||||
default:
|
||||
throw new Error(`Unknown event: ${event}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the repository and terminate the Rust process.
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
try {
|
||||
await this.bridge.sendCommand('close', {});
|
||||
} catch {
|
||||
// Ignore errors during close
|
||||
}
|
||||
this.bridge.kill();
|
||||
this.spawned = false;
|
||||
|
||||
this.ingestProgress.complete();
|
||||
this.ingestComplete.complete();
|
||||
this.verifyError.complete();
|
||||
}
|
||||
|
||||
// ==================== Private Helpers ====================
|
||||
|
||||
/**
|
||||
* Create a Unix socket server that accepts a connection from Rust
|
||||
* and pipes the inputStream to it (for ingest).
|
||||
*/
|
||||
private createSocketServer(
|
||||
socketPath: string,
|
||||
inputStream: NodeJS.ReadableStream,
|
||||
): Promise<{
|
||||
promise: Promise<void>;
|
||||
server: plugins.net.Server;
|
||||
}> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const server = plugins.net.createServer((socket) => {
|
||||
// Pipe input data to the Rust process via socket
|
||||
const readableStream = inputStream as NodeJS.ReadableStream;
|
||||
(readableStream as any).pipe(socket);
|
||||
});
|
||||
|
||||
server.on('error', reject);
|
||||
|
||||
server.listen(socketPath, () => {
|
||||
const promise = new Promise<void>((res) => {
|
||||
server.on('close', () => res());
|
||||
// Also resolve after a connection is handled
|
||||
server.once('connection', (socket) => {
|
||||
socket.on('end', () => {
|
||||
res();
|
||||
});
|
||||
socket.on('error', () => {
|
||||
res();
|
||||
});
|
||||
});
|
||||
});
|
||||
resolve({ promise, server });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Unix socket server that accepts a connection from Rust
|
||||
* and provides a ReadableStream of the received data (for restore).
|
||||
*/
|
||||
private createRestoreSocketServer(
|
||||
socketPath: string,
|
||||
): Promise<{
|
||||
readable: plugins.stream.PassThrough;
|
||||
server: plugins.net.Server;
|
||||
}> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const passthrough = new plugins.stream.PassThrough();
|
||||
const server = plugins.net.createServer((socket) => {
|
||||
socket.pipe(passthrough);
|
||||
});
|
||||
|
||||
server.on('error', reject);
|
||||
|
||||
server.listen(socketPath, () => {
|
||||
resolve({ readable: passthrough, server });
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
2
ts/index.ts
Normal file
2
ts/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export * from './classes.containerarchive.js';
|
||||
export * from './interfaces.js';
|
||||
219
ts/interfaces.ts
Normal file
219
ts/interfaces.ts
Normal file
@@ -0,0 +1,219 @@
|
||||
import type { ICommandDefinition } from '@push.rocks/smartrust';
|
||||
|
||||
// ==================== Repository Config ====================
|
||||
|
||||
export interface IRepositoryConfig {
|
||||
version: number;
|
||||
id: string;
|
||||
createdAt: string;
|
||||
chunking: IChunkingConfig;
|
||||
compression: string;
|
||||
encryption?: IEncryptionConfig;
|
||||
packTargetSize: number;
|
||||
}
|
||||
|
||||
export interface IChunkingConfig {
|
||||
algorithm: string;
|
||||
minSize: number;
|
||||
avgSize: number;
|
||||
maxSize: number;
|
||||
}
|
||||
|
||||
export interface IEncryptionConfig {
|
||||
algorithm: string;
|
||||
kdf: string;
|
||||
kdfParams: IKdfParams;
|
||||
}
|
||||
|
||||
export interface IKdfParams {
|
||||
memory: number;
|
||||
iterations: number;
|
||||
parallelism: number;
|
||||
}
|
||||
|
||||
// ==================== Snapshots ====================
|
||||
|
||||
export interface ISnapshot {
|
||||
id: string;
|
||||
version: number;
|
||||
createdAt: string;
|
||||
tags: Record<string, string>;
|
||||
originalSize: number;
|
||||
storedSize: number;
|
||||
chunkCount: number;
|
||||
newChunks: number;
|
||||
reusedChunks: number;
|
||||
items: ISnapshotItem[];
|
||||
}
|
||||
|
||||
export interface ISnapshotItem {
|
||||
name: string;
|
||||
type: string;
|
||||
size: number;
|
||||
chunks: string[];
|
||||
}
|
||||
|
||||
export interface ISnapshotFilter {
|
||||
tags?: Record<string, string>;
|
||||
after?: string;
|
||||
before?: string;
|
||||
}
|
||||
|
||||
// ==================== Ingest ====================
|
||||
|
||||
export interface IInitOptions {
|
||||
passphrase?: string;
|
||||
chunking?: Partial<IChunkingConfig>;
|
||||
packTargetSize?: number;
|
||||
}
|
||||
|
||||
export interface IOpenOptions {
|
||||
passphrase?: string;
|
||||
}
|
||||
|
||||
export interface IIngestOptions {
|
||||
tags?: Record<string, string>;
|
||||
items?: IIngestItemOptions[];
|
||||
}
|
||||
|
||||
export interface IIngestItemOptions {
|
||||
name: string;
|
||||
type?: string;
|
||||
}
|
||||
|
||||
export interface IIngestItem {
|
||||
stream: NodeJS.ReadableStream;
|
||||
name: string;
|
||||
type?: string;
|
||||
}
|
||||
|
||||
// ==================== Restore ====================
|
||||
|
||||
export interface IRestoreOptions {
|
||||
item?: string;
|
||||
}
|
||||
|
||||
// ==================== Maintenance ====================
|
||||
|
||||
export interface IVerifyOptions {
|
||||
level?: 'quick' | 'standard' | 'full';
|
||||
}
|
||||
|
||||
export interface IVerifyResult {
|
||||
ok: boolean;
|
||||
errors: IVerifyError[];
|
||||
stats: {
|
||||
packsChecked: number;
|
||||
chunksChecked: number;
|
||||
snapshotsChecked: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface IVerifyError {
|
||||
pack?: string;
|
||||
chunk?: string;
|
||||
snapshot?: string;
|
||||
error: string;
|
||||
}
|
||||
|
||||
export interface IRetentionPolicy {
|
||||
keepLast?: number;
|
||||
keepDays?: number;
|
||||
keepWeeks?: number;
|
||||
keepMonths?: number;
|
||||
}
|
||||
|
||||
export interface IPruneResult {
|
||||
removedSnapshots: number;
|
||||
removedPacks: number;
|
||||
freedBytes: number;
|
||||
dryRun: boolean;
|
||||
}
|
||||
|
||||
export interface IRepairResult {
|
||||
indexRebuilt: boolean;
|
||||
indexedChunks: number;
|
||||
staleLocksRemoved: number;
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
export interface IUnlockOptions {
|
||||
force?: boolean;
|
||||
}
|
||||
|
||||
// ==================== Events ====================
|
||||
|
||||
export interface IIngestProgress {
|
||||
operation: string;
|
||||
percentage: number;
|
||||
message: string;
|
||||
}
|
||||
|
||||
export interface IIngestComplete {
|
||||
snapshotId: string;
|
||||
originalSize: number;
|
||||
storedSize: number;
|
||||
newChunks: number;
|
||||
reusedChunks: number;
|
||||
}
|
||||
|
||||
// ==================== IPC Command Map ====================
|
||||
|
||||
export type TContainerArchiveCommands = {
|
||||
init: ICommandDefinition<
|
||||
{ path: string; passphrase?: string },
|
||||
IRepositoryConfig
|
||||
>;
|
||||
open: ICommandDefinition<
|
||||
{ path: string; passphrase?: string },
|
||||
IRepositoryConfig
|
||||
>;
|
||||
close: ICommandDefinition<
|
||||
Record<string, never>,
|
||||
Record<string, never>
|
||||
>;
|
||||
ingest: ICommandDefinition<
|
||||
{
|
||||
socketPath: string;
|
||||
tags?: Record<string, string>;
|
||||
items?: IIngestItemOptions[];
|
||||
},
|
||||
{ snapshot: ISnapshot }
|
||||
>;
|
||||
restore: ICommandDefinition<
|
||||
{
|
||||
snapshotId: string;
|
||||
socketPath: string;
|
||||
item?: string;
|
||||
},
|
||||
Record<string, never>
|
||||
>;
|
||||
listSnapshots: ICommandDefinition<
|
||||
{ filter?: ISnapshotFilter },
|
||||
{ snapshots: ISnapshot[] }
|
||||
>;
|
||||
getSnapshot: ICommandDefinition<
|
||||
{ snapshotId: string },
|
||||
{ snapshot: ISnapshot }
|
||||
>;
|
||||
verify: ICommandDefinition<
|
||||
{ level: string },
|
||||
IVerifyResult
|
||||
>;
|
||||
repair: ICommandDefinition<
|
||||
Record<string, never>,
|
||||
IRepairResult
|
||||
>;
|
||||
prune: ICommandDefinition<
|
||||
{ retention: IRetentionPolicy; dryRun?: boolean },
|
||||
IPruneResult
|
||||
>;
|
||||
reindex: ICommandDefinition<
|
||||
Record<string, never>,
|
||||
{ indexedChunks: number }
|
||||
>;
|
||||
unlock: ICommandDefinition<
|
||||
{ force?: boolean },
|
||||
{ removedLocks: number }
|
||||
>;
|
||||
};
|
||||
17
ts/plugins.ts
Normal file
17
ts/plugins.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
// node native scope
|
||||
import * as path from 'node:path';
|
||||
import * as fs from 'node:fs';
|
||||
import * as net from 'node:net';
|
||||
import * as os from 'node:os';
|
||||
import * as stream from 'node:stream';
|
||||
import * as crypto from 'node:crypto';
|
||||
|
||||
export { path, fs, net, os, stream, crypto };
|
||||
|
||||
// @push.rocks scope
|
||||
import * as smartrust from '@push.rocks/smartrust';
|
||||
import * as smartrx from '@push.rocks/smartrx';
|
||||
import * as smartpromise from '@push.rocks/smartpromise';
|
||||
import * as lik from '@push.rocks/lik';
|
||||
|
||||
export { smartrust, smartrx, smartpromise, lik };
|
||||
14
tsconfig.json
Normal file
14
tsconfig.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"experimentalDecorators": true,
|
||||
"useDefineForClassFields": false,
|
||||
"target": "ES2022",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"esModuleInterop": true,
|
||||
"verbatimModuleSyntax": true
|
||||
},
|
||||
"exclude": [
|
||||
"dist_*/**/*.d.ts"
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user