feat: wire parity into ingest pipeline, optimize restore with nonce caching

- Parity generation auto-triggers after every N packs during ingest
- ParityConfig stored in repository config.json
- Nonce stored in global index entries, eliminating IDX re-reads during
  encrypted restore (fast path) with IDX cache fallback
- Repair now attempts parity-based pack reconstruction before reindexing
This commit is contained in:
2026-03-22 00:14:17 +00:00
parent ca510f4578
commit 66aa43494e
4 changed files with 101 additions and 22 deletions

View File

@@ -1,6 +1,7 @@
/// Restore pipeline: reads a snapshot manifest, looks up chunks in the global
/// index, reads from pack files, decrypts, decompresses, and writes to a Unix socket.
use std::collections::HashMap;
use tokio::io::AsyncWriteExt;
use tokio::net::UnixStream;
@@ -9,6 +10,7 @@ use crate::encryption;
use crate::error::ArchiveError;
use crate::hasher;
use crate::pack_reader;
use crate::pack_writer::IdxEntry;
use crate::repository::Repository;
use crate::snapshot;
@@ -45,6 +47,9 @@ pub async fn restore(
tracing::info!("Connected to restore socket: {}", socket_path);
// Cache loaded IDX entries per pack to avoid re-reading
let mut idx_cache: HashMap<String, Vec<IdxEntry>> = HashMap::new();
let mut restored_bytes: u64 = 0;
let mut chunks_read: u64 = 0;
@@ -73,23 +78,41 @@ pub async fn restore(
// Decrypt if encrypted
let compressed = if let Some(ref key) = repo.master_key {
// We need the nonce. Read it from the IDX file.
let idx_path = std::path::Path::new(&repo.path)
.join("packs")
.join("data")
.join(shard)
.join(format!("{}.idx", index_entry.pack_id));
// Try to get nonce from the global index first (fast path)
let nonce = if let Some(ref nonce_hex) = index_entry.nonce {
let nonce_bytes = hex::decode(nonce_hex)
.map_err(|_| ArchiveError::Corruption(format!("Invalid nonce hex: {}", nonce_hex)))?;
let mut n = [0u8; 12];
if nonce_bytes.len() >= 12 {
n.copy_from_slice(&nonce_bytes[..12]);
}
n
} else {
// Fallback: read from IDX file (cached)
let entries = if let Some(cached) = idx_cache.get(&index_entry.pack_id) {
cached
} else {
let idx_path = std::path::Path::new(&repo.path)
.join("packs")
.join("data")
.join(shard)
.join(format!("{}.idx", index_entry.pack_id));
let loaded = pack_reader::load_idx(&idx_path).await?;
idx_cache.insert(index_entry.pack_id.clone(), loaded);
idx_cache.get(&index_entry.pack_id).unwrap()
};
let entries = pack_reader::load_idx(&idx_path).await?;
let hash_bytes = hasher::hex_to_hash(hash_hex)
.map_err(|_| ArchiveError::Corruption(format!("Invalid hash: {}", hash_hex)))?;
let hash_bytes = hasher::hex_to_hash(hash_hex)
.map_err(|_| ArchiveError::Corruption(format!("Invalid hash: {}", hash_hex)))?;
let idx_entry = pack_reader::find_in_idx(&entries, &hash_bytes)
.ok_or_else(|| ArchiveError::NotFound(format!(
"Chunk {} not found in pack index {}", hash_hex, index_entry.pack_id
)))?;
let idx_entry = pack_reader::find_in_idx(entries, &hash_bytes)
.ok_or_else(|| ArchiveError::NotFound(format!(
"Chunk {} not found in pack index {}", hash_hex, index_entry.pack_id
)))?;
idx_entry.nonce
};
encryption::decrypt_chunk(&stored_data, key, &idx_entry.nonce)?
encryption::decrypt_chunk(&stored_data, key, &nonce)?
} else {
stored_data
};