import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; import { BSON } from 'bson'; // --------------------------------------------------------------------------- // Binary format constants (must match Rust: record.rs) // --------------------------------------------------------------------------- /** File-level magic: "SMARTDB\0" */ const FILE_MAGIC = Buffer.from('SMARTDB\0', 'ascii'); /** Current format version */ const FORMAT_VERSION = 1; /** File type tags */ const FILE_TYPE_DATA = 1; const FILE_TYPE_HINT = 3; /** File header total size */ const FILE_HEADER_SIZE = 64; /** Per-record magic */ const RECORD_MAGIC = 0xDB01; /** Per-record header size */ const RECORD_HEADER_SIZE = 22; // 2 + 8 + 4 + 4 + 4 // --------------------------------------------------------------------------- // Binary encoding helpers // --------------------------------------------------------------------------- function writeFileHeader(fileType: number): Buffer { const buf = Buffer.alloc(FILE_HEADER_SIZE, 0); FILE_MAGIC.copy(buf, 0); buf.writeUInt16LE(FORMAT_VERSION, 8); buf.writeUInt8(fileType, 10); buf.writeUInt32LE(0, 11); // flags const now = BigInt(Date.now()); buf.writeBigUInt64LE(now, 15); // bytes 23..64 are reserved (zeros) return buf; } function encodeDataRecord(timestamp: bigint, key: Buffer, value: Buffer): Buffer { const keyLen = key.length; const valLen = value.length; const totalSize = RECORD_HEADER_SIZE + keyLen + valLen; const buf = Buffer.alloc(totalSize); // Write header fields (without CRC) buf.writeUInt16LE(RECORD_MAGIC, 0); buf.writeBigUInt64LE(timestamp, 2); buf.writeUInt32LE(keyLen, 10); buf.writeUInt32LE(valLen, 14); // CRC placeholder at offset 18..22 (will fill below) key.copy(buf, RECORD_HEADER_SIZE); value.copy(buf, RECORD_HEADER_SIZE + keyLen); // CRC32 covers everything except the CRC field itself: // bytes [0..18] + bytes [22..] const crc = crc32(Buffer.concat([ buf.subarray(0, 18), buf.subarray(22), ])); buf.writeUInt32LE(crc, 18); return buf; } function encodeHintEntry(key: string, offset: bigint, recordLen: number, valueLen: number, timestamp: bigint): Buffer { const keyBuf = Buffer.from(key, 'utf-8'); const buf = Buffer.alloc(4 + keyBuf.length + 8 + 4 + 4 + 8); let pos = 0; buf.writeUInt32LE(keyBuf.length, pos); pos += 4; keyBuf.copy(buf, pos); pos += keyBuf.length; buf.writeBigUInt64LE(offset, pos); pos += 8; buf.writeUInt32LE(recordLen, pos); pos += 4; buf.writeUInt32LE(valueLen, pos); pos += 4; buf.writeBigUInt64LE(timestamp, pos); return buf; } // --------------------------------------------------------------------------- // CRC32 (matching crc32fast in Rust) // --------------------------------------------------------------------------- const CRC32_TABLE = (() => { const table = new Uint32Array(256); for (let i = 0; i < 256; i++) { let crc = i; for (let j = 0; j < 8; j++) { crc = (crc & 1) ? (0xEDB88320 ^ (crc >>> 1)) : (crc >>> 1); } table[i] = crc; } return table; })(); function crc32(data: Buffer): number { let crc = 0xFFFFFFFF; for (let i = 0; i < data.length; i++) { crc = CRC32_TABLE[(crc ^ data[i]) & 0xFF] ^ (crc >>> 8); } return (crc ^ 0xFFFFFFFF) >>> 0; } // --------------------------------------------------------------------------- // Migration: v0 (JSON) → v1 (Bitcask binary) // --------------------------------------------------------------------------- interface IKeyDirEntry { offset: bigint; recordLen: number; valueLen: number; timestamp: bigint; } /** * Migrate a storage directory from v0 (JSON-per-collection) to v1 (Bitcask binary). * * - Original .json files are NOT modified or deleted. * - New v1 files are written into {db}/{coll}/ subdirectories. * - Returns a list of old files that can be safely deleted. * - On failure, cleans up any partial new files and throws. */ export async function migrateV0ToV1(storagePath: string): Promise { const deletableFiles: string[] = []; const createdDirs: string[] = []; try { const dbEntries = fs.readdirSync(storagePath, { withFileTypes: true }); for (const dbEntry of dbEntries) { if (!dbEntry.isDirectory()) continue; const dbDir = path.join(storagePath, dbEntry.name); const collFiles = fs.readdirSync(dbDir, { withFileTypes: true }); for (const collFile of collFiles) { if (!collFile.isFile()) continue; if (!collFile.name.endsWith('.json')) continue; if (collFile.name.endsWith('.indexes.json')) continue; const collName = collFile.name.replace(/\.json$/, ''); const jsonPath = path.join(dbDir, collFile.name); const indexJsonPath = path.join(dbDir, `${collName}.indexes.json`); // Target directory const collDir = path.join(dbDir, collName); if (fs.existsSync(collDir)) { // Already migrated continue; } console.log(`[smartdb] Migrating ${dbEntry.name}.${collName}...`); // Read the JSON collection const jsonData = fs.readFileSync(jsonPath, 'utf-8'); const docs: any[] = JSON.parse(jsonData); // Create collection directory fs.mkdirSync(collDir, { recursive: true }); createdDirs.push(collDir); // Write data.rdb const dataPath = path.join(collDir, 'data.rdb'); const fd = fs.openSync(dataPath, 'w'); try { // File header const headerBuf = writeFileHeader(FILE_TYPE_DATA); fs.writeSync(fd, headerBuf); let currentOffset = BigInt(FILE_HEADER_SIZE); const keydir: Map = new Map(); const ts = BigInt(Date.now()); for (const doc of docs) { // Extract _id let idHex: string; if (doc._id && doc._id.$oid) { idHex = doc._id.$oid; } else if (typeof doc._id === 'string') { idHex = doc._id; } else if (doc._id) { idHex = String(doc._id); } else { // Generate a new ObjectId idHex = crypto.randomBytes(12).toString('hex'); doc._id = { $oid: idHex }; } // Serialize to BSON const bsonBytes = BSON.serialize(doc); const keyBuf = Buffer.from(idHex, 'utf-8'); const valueBuf = Buffer.from(bsonBytes); const record = encodeDataRecord(ts, keyBuf, valueBuf); fs.writeSync(fd, record); keydir.set(idHex, { offset: currentOffset, recordLen: record.length, valueLen: valueBuf.length, timestamp: ts, }); currentOffset += BigInt(record.length); } fs.fsyncSync(fd); fs.closeSync(fd); // Write keydir.hint const hintPath = path.join(collDir, 'keydir.hint'); const hintFd = fs.openSync(hintPath, 'w'); fs.writeSync(hintFd, writeFileHeader(FILE_TYPE_HINT)); for (const [key, entry] of keydir) { fs.writeSync(hintFd, encodeHintEntry(key, entry.offset, entry.recordLen, entry.valueLen, entry.timestamp)); } fs.fsyncSync(hintFd); fs.closeSync(hintFd); } catch (writeErr) { // Clean up on write failure try { fs.closeSync(fd); } catch {} throw writeErr; } // Copy indexes.json if it exists if (fs.existsSync(indexJsonPath)) { const destIndexPath = path.join(collDir, 'indexes.json'); fs.copyFileSync(indexJsonPath, destIndexPath); deletableFiles.push(indexJsonPath); } else { // Write default _id index const destIndexPath = path.join(collDir, 'indexes.json'); fs.writeFileSync(destIndexPath, JSON.stringify([{ name: '_id_', key: { _id: 1 } }], null, 2)); } deletableFiles.push(jsonPath); console.log(`[smartdb] Migrated ${dbEntry.name}.${collName}: ${docs.length} documents`); } } } catch (err) { // Clean up any partially created directories for (const dir of createdDirs) { try { fs.rmSync(dir, { recursive: true, force: true }); } catch {} } throw err; } return deletableFiles; }