feat(storage): add Bitcask storage migration, binary WAL, and data compaction support

This commit is contained in:
2026-04-04 19:49:47 +00:00
parent 9e7ce25b45
commit d8a8259c73
22 changed files with 2807 additions and 412 deletions

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartdb',
version: '2.1.1',
version: '2.2.0',
description: 'A MongoDB-compatible embedded database server with wire protocol support, backed by a high-performance Rust engine.'
}

View File

@@ -7,6 +7,9 @@ export * from './ts_smartdb/index.js';
export { LocalSmartDb } from './ts_local/index.js';
export type { ILocalSmartDbOptions, ILocalSmartDbConnectionInfo } from './ts_local/index.js';
// Export migration
export { StorageMigrator } from './ts_migration/index.js';
// Export commitinfo
export { commitinfo };

View File

@@ -2,6 +2,7 @@ import * as crypto from 'crypto';
import * as path from 'path';
import * as os from 'os';
import { SmartdbServer } from '../ts_smartdb/index.js';
import { StorageMigrator } from '../ts_migration/index.js';
/**
* Connection information returned by LocalSmartDb.start()
@@ -73,6 +74,10 @@ export class LocalSmartDb {
throw new Error('LocalSmartDb is already running');
}
// Run storage migration before starting the Rust engine
const migrator = new StorageMigrator(this.options.folderPath);
await migrator.run();
// Use provided socket path or generate one
this.generatedSocketPath = this.options.socketPath ?? this.generateSocketPath();

View File

@@ -0,0 +1,93 @@
import * as fs from 'fs';
import * as path from 'path';
import { migrateV0ToV1 } from './migrators/v0_to_v1.js';
/**
* Detected storage format version.
* - v0: Legacy JSON format ({db}/{coll}.json files)
* - v1: Bitcask binary format ({db}/{coll}/data.rdb directories)
*/
type TStorageVersion = 0 | 1;
/**
* StorageMigrator — runs before the Rust engine starts.
*
* Detects the current storage format version and runs the appropriate
* migration chain. The Rust engine only knows the current format (v1).
*
* Migration is safe: original files are never modified or deleted.
* On success, a console hint is printed about which old files can be removed.
*/
export class StorageMigrator {
private storagePath: string;
constructor(storagePath: string) {
this.storagePath = storagePath;
}
/**
* Run any needed migrations. Safe to call even if storage is already current.
*/
async run(): Promise<void> {
if (!fs.existsSync(this.storagePath)) {
return; // No data yet — nothing to migrate
}
const version = this.detectVersion();
if (version === 1) {
return; // Already current
}
if (version === 0) {
console.log(`[smartdb] Detected v0 (JSON) storage format at ${this.storagePath}`);
console.log(`[smartdb] Running migration v0 → v1 (Bitcask binary format)...`);
const deletableFiles = await migrateV0ToV1(this.storagePath);
if (deletableFiles.length > 0) {
console.log(`[smartdb] Migration v0 → v1 complete.`);
console.log(`[smartdb] The following old files can be safely deleted:`);
for (const f of deletableFiles) {
console.log(`[smartdb] ${f}`);
}
} else {
console.log(`[smartdb] Migration v0 → v1 complete. No old files to clean up.`);
}
}
}
/**
* Detect the storage format version by inspecting the directory structure.
*
* v0: {db}/{coll}.json files exist
* v1: {db}/{coll}/data.rdb directories exist
*/
private detectVersion(): TStorageVersion {
const entries = fs.readdirSync(this.storagePath, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isDirectory()) continue;
const dbDir = path.join(this.storagePath, entry.name);
const dbEntries = fs.readdirSync(dbDir, { withFileTypes: true });
for (const dbEntry of dbEntries) {
// v1: subdirectory with data.rdb
if (dbEntry.isDirectory()) {
const dataRdb = path.join(dbDir, dbEntry.name, 'data.rdb');
if (fs.existsSync(dataRdb)) {
return 1;
}
}
// v0: .json file (not .indexes.json)
if (dbEntry.isFile() && dbEntry.name.endsWith('.json') && !dbEntry.name.endsWith('.indexes.json')) {
return 0;
}
}
}
// Empty or unrecognized — treat as v1 (fresh start)
return 1;
}
}

1
ts/ts_migration/index.ts Normal file
View File

@@ -0,0 +1 @@
export { StorageMigrator } from './classes.storagemigrator.js';

View File

@@ -0,0 +1,253 @@
import * as fs from 'fs';
import * as path from 'path';
import * as crypto from 'crypto';
import { BSON } from 'bson';
// ---------------------------------------------------------------------------
// Binary format constants (must match Rust: record.rs)
// ---------------------------------------------------------------------------
/** File-level magic: "SMARTDB\0" */
const FILE_MAGIC = Buffer.from('SMARTDB\0', 'ascii');
/** Current format version */
const FORMAT_VERSION = 1;
/** File type tags */
const FILE_TYPE_DATA = 1;
const FILE_TYPE_HINT = 3;
/** File header total size */
const FILE_HEADER_SIZE = 64;
/** Per-record magic */
const RECORD_MAGIC = 0xDB01;
/** Per-record header size */
const RECORD_HEADER_SIZE = 22; // 2 + 8 + 4 + 4 + 4
// ---------------------------------------------------------------------------
// Binary encoding helpers
// ---------------------------------------------------------------------------
function writeFileHeader(fileType: number): Buffer {
const buf = Buffer.alloc(FILE_HEADER_SIZE, 0);
FILE_MAGIC.copy(buf, 0);
buf.writeUInt16LE(FORMAT_VERSION, 8);
buf.writeUInt8(fileType, 10);
buf.writeUInt32LE(0, 11); // flags
const now = BigInt(Date.now());
buf.writeBigUInt64LE(now, 15);
// bytes 23..64 are reserved (zeros)
return buf;
}
function encodeDataRecord(timestamp: bigint, key: Buffer, value: Buffer): Buffer {
const keyLen = key.length;
const valLen = value.length;
const totalSize = RECORD_HEADER_SIZE + keyLen + valLen;
const buf = Buffer.alloc(totalSize);
// Write header fields (without CRC)
buf.writeUInt16LE(RECORD_MAGIC, 0);
buf.writeBigUInt64LE(timestamp, 2);
buf.writeUInt32LE(keyLen, 10);
buf.writeUInt32LE(valLen, 14);
// CRC placeholder at offset 18..22 (will fill below)
key.copy(buf, RECORD_HEADER_SIZE);
value.copy(buf, RECORD_HEADER_SIZE + keyLen);
// CRC32 covers everything except the CRC field itself:
// bytes [0..18] + bytes [22..]
const crc = crc32(Buffer.concat([
buf.subarray(0, 18),
buf.subarray(22),
]));
buf.writeUInt32LE(crc, 18);
return buf;
}
function encodeHintEntry(key: string, offset: bigint, recordLen: number, valueLen: number, timestamp: bigint): Buffer {
const keyBuf = Buffer.from(key, 'utf-8');
const buf = Buffer.alloc(4 + keyBuf.length + 8 + 4 + 4 + 8);
let pos = 0;
buf.writeUInt32LE(keyBuf.length, pos); pos += 4;
keyBuf.copy(buf, pos); pos += keyBuf.length;
buf.writeBigUInt64LE(offset, pos); pos += 8;
buf.writeUInt32LE(recordLen, pos); pos += 4;
buf.writeUInt32LE(valueLen, pos); pos += 4;
buf.writeBigUInt64LE(timestamp, pos);
return buf;
}
// ---------------------------------------------------------------------------
// CRC32 (matching crc32fast in Rust)
// ---------------------------------------------------------------------------
const CRC32_TABLE = (() => {
const table = new Uint32Array(256);
for (let i = 0; i < 256; i++) {
let crc = i;
for (let j = 0; j < 8; j++) {
crc = (crc & 1) ? (0xEDB88320 ^ (crc >>> 1)) : (crc >>> 1);
}
table[i] = crc;
}
return table;
})();
function crc32(data: Buffer): number {
let crc = 0xFFFFFFFF;
for (let i = 0; i < data.length; i++) {
crc = CRC32_TABLE[(crc ^ data[i]) & 0xFF] ^ (crc >>> 8);
}
return (crc ^ 0xFFFFFFFF) >>> 0;
}
// ---------------------------------------------------------------------------
// Migration: v0 (JSON) → v1 (Bitcask binary)
// ---------------------------------------------------------------------------
interface IKeyDirEntry {
offset: bigint;
recordLen: number;
valueLen: number;
timestamp: bigint;
}
/**
* Migrate a storage directory from v0 (JSON-per-collection) to v1 (Bitcask binary).
*
* - Original .json files are NOT modified or deleted.
* - New v1 files are written into {db}/{coll}/ subdirectories.
* - Returns a list of old files that can be safely deleted.
* - On failure, cleans up any partial new files and throws.
*/
export async function migrateV0ToV1(storagePath: string): Promise<string[]> {
const deletableFiles: string[] = [];
const createdDirs: string[] = [];
try {
const dbEntries = fs.readdirSync(storagePath, { withFileTypes: true });
for (const dbEntry of dbEntries) {
if (!dbEntry.isDirectory()) continue;
const dbDir = path.join(storagePath, dbEntry.name);
const collFiles = fs.readdirSync(dbDir, { withFileTypes: true });
for (const collFile of collFiles) {
if (!collFile.isFile()) continue;
if (!collFile.name.endsWith('.json')) continue;
if (collFile.name.endsWith('.indexes.json')) continue;
const collName = collFile.name.replace(/\.json$/, '');
const jsonPath = path.join(dbDir, collFile.name);
const indexJsonPath = path.join(dbDir, `${collName}.indexes.json`);
// Target directory
const collDir = path.join(dbDir, collName);
if (fs.existsSync(collDir)) {
// Already migrated
continue;
}
console.log(`[smartdb] Migrating ${dbEntry.name}.${collName}...`);
// Read the JSON collection
const jsonData = fs.readFileSync(jsonPath, 'utf-8');
const docs: any[] = JSON.parse(jsonData);
// Create collection directory
fs.mkdirSync(collDir, { recursive: true });
createdDirs.push(collDir);
// Write data.rdb
const dataPath = path.join(collDir, 'data.rdb');
const fd = fs.openSync(dataPath, 'w');
try {
// File header
const headerBuf = writeFileHeader(FILE_TYPE_DATA);
fs.writeSync(fd, headerBuf);
let currentOffset = BigInt(FILE_HEADER_SIZE);
const keydir: Map<string, IKeyDirEntry> = new Map();
const ts = BigInt(Date.now());
for (const doc of docs) {
// Extract _id
let idHex: string;
if (doc._id && doc._id.$oid) {
idHex = doc._id.$oid;
} else if (typeof doc._id === 'string') {
idHex = doc._id;
} else if (doc._id) {
idHex = String(doc._id);
} else {
// Generate a new ObjectId
idHex = crypto.randomBytes(12).toString('hex');
doc._id = { $oid: idHex };
}
// Serialize to BSON
const bsonBytes = BSON.serialize(doc);
const keyBuf = Buffer.from(idHex, 'utf-8');
const valueBuf = Buffer.from(bsonBytes);
const record = encodeDataRecord(ts, keyBuf, valueBuf);
fs.writeSync(fd, record);
keydir.set(idHex, {
offset: currentOffset,
recordLen: record.length,
valueLen: valueBuf.length,
timestamp: ts,
});
currentOffset += BigInt(record.length);
}
fs.fsyncSync(fd);
fs.closeSync(fd);
// Write keydir.hint
const hintPath = path.join(collDir, 'keydir.hint');
const hintFd = fs.openSync(hintPath, 'w');
fs.writeSync(hintFd, writeFileHeader(FILE_TYPE_HINT));
for (const [key, entry] of keydir) {
fs.writeSync(hintFd, encodeHintEntry(key, entry.offset, entry.recordLen, entry.valueLen, entry.timestamp));
}
fs.fsyncSync(hintFd);
fs.closeSync(hintFd);
} catch (writeErr) {
// Clean up on write failure
try { fs.closeSync(fd); } catch {}
throw writeErr;
}
// Copy indexes.json if it exists
if (fs.existsSync(indexJsonPath)) {
const destIndexPath = path.join(collDir, 'indexes.json');
fs.copyFileSync(indexJsonPath, destIndexPath);
deletableFiles.push(indexJsonPath);
} else {
// Write default _id index
const destIndexPath = path.join(collDir, 'indexes.json');
fs.writeFileSync(destIndexPath, JSON.stringify([{ name: '_id_', key: { _id: 1 } }], null, 2));
}
deletableFiles.push(jsonPath);
console.log(`[smartdb] Migrated ${dbEntry.name}.${collName}: ${docs.length} documents`);
}
}
} catch (err) {
// Clean up any partially created directories
for (const dir of createdDirs) {
try {
fs.rmSync(dir, { recursive: true, force: true });
} catch {}
}
throw err;
}
return deletableFiles;
}

View File

@@ -1,4 +1,5 @@
import { RustDbBridge } from '../rust-db-bridge.js';
import { StorageMigrator } from '../../ts_migration/index.js';
import type {
IOpLogEntry,
IOpLogResult,
@@ -75,6 +76,12 @@ export class SmartdbServer {
throw new Error('Server is already running');
}
// Run storage migration for file-based storage before starting Rust engine
if (this.options.storage === 'file' && this.options.storagePath) {
const migrator = new StorageMigrator(this.options.storagePath);
await migrator.run();
}
const spawned = await this.bridge.spawn();
if (!spawned) {
throw new Error(