feat(cluster,server,auth): add operational health endpoints, persist cluster topology, and hide credential secrets from runtime listings
This commit is contained in:
@@ -0,0 +1,317 @@
|
||||
/// <reference types="node" />
|
||||
|
||||
import { readFile, readdir, rm } from 'fs/promises';
|
||||
import { join } from 'path';
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import { CreateBucketCommand, GetObjectCommand, PutObjectCommand, S3Client } from '@aws-sdk/client-s3';
|
||||
import { Readable } from 'stream';
|
||||
import * as smartstorage from '../ts/index.js';
|
||||
|
||||
const baseDir = join(process.cwd(), '.nogit', `cluster-multinode-${Date.now()}`);
|
||||
const nodes: smartstorage.SmartStorage[] = [];
|
||||
|
||||
const makeDrivePaths = (nodeId: string) => {
|
||||
return [1, 2].map((driveIndex) => join(baseDir, nodeId, `drive-${driveIndex}`));
|
||||
};
|
||||
|
||||
const streamToString = async (stream: Readable): Promise<string> => {
|
||||
const chunks: Buffer[] = [];
|
||||
return new Promise((resolve, reject) => {
|
||||
stream.on('data', (chunk: string | Buffer | Uint8Array) => chunks.push(Buffer.from(chunk)));
|
||||
stream.on('error', reject);
|
||||
stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
|
||||
});
|
||||
};
|
||||
|
||||
const fileExistsBelow = async (directory: string, fileName: string): Promise<boolean> => {
|
||||
let entries;
|
||||
try {
|
||||
entries = await readdir(directory, { withFileTypes: true });
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
const entryPath = join(directory, entry.name);
|
||||
if (entry.isFile() && entry.name === fileName) {
|
||||
return true;
|
||||
}
|
||||
if (entry.isDirectory() && await fileExistsBelow(entryPath, fileName)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
const waitFor = async (check: () => Promise<boolean>, timeoutMs = 10000) => {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
let lastError = '';
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
if (await check()) {
|
||||
return;
|
||||
}
|
||||
} catch (error) {
|
||||
lastError = error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||
}
|
||||
throw new Error(`Timed out waiting for cluster condition${lastError ? `: ${lastError}` : ''}`);
|
||||
};
|
||||
|
||||
tap.test('setup: start three clustered storage nodes', async () => {
|
||||
await rm(baseDir, { recursive: true, force: true });
|
||||
|
||||
const node1 = await smartstorage.SmartStorage.createAndStart({
|
||||
server: {
|
||||
address: '127.0.0.1',
|
||||
port: 3350,
|
||||
silent: true,
|
||||
},
|
||||
storage: {
|
||||
directory: join(baseDir, 'node-1', 'storage'),
|
||||
},
|
||||
cluster: {
|
||||
enabled: true,
|
||||
nodeId: 'node-1',
|
||||
quicPort: 4350,
|
||||
seedNodes: [],
|
||||
erasure: {
|
||||
dataShards: 4,
|
||||
parityShards: 2,
|
||||
chunkSizeBytes: 1024 * 1024,
|
||||
},
|
||||
drives: {
|
||||
paths: makeDrivePaths('node-1'),
|
||||
},
|
||||
heartbeatIntervalMs: 500,
|
||||
heartbeatTimeoutMs: 3000,
|
||||
},
|
||||
});
|
||||
nodes.push(node1);
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
|
||||
const node2 = await smartstorage.SmartStorage.createAndStart({
|
||||
server: {
|
||||
address: '127.0.0.1',
|
||||
port: 3351,
|
||||
silent: true,
|
||||
},
|
||||
storage: {
|
||||
directory: join(baseDir, 'node-2', 'storage'),
|
||||
},
|
||||
cluster: {
|
||||
enabled: true,
|
||||
nodeId: 'node-2',
|
||||
quicPort: 4351,
|
||||
seedNodes: ['127.0.0.1:4350'],
|
||||
erasure: {
|
||||
dataShards: 4,
|
||||
parityShards: 2,
|
||||
chunkSizeBytes: 1024 * 1024,
|
||||
},
|
||||
drives: {
|
||||
paths: makeDrivePaths('node-2'),
|
||||
},
|
||||
heartbeatIntervalMs: 500,
|
||||
heartbeatTimeoutMs: 3000,
|
||||
},
|
||||
});
|
||||
nodes.push(node2);
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
|
||||
const node3 = await smartstorage.SmartStorage.createAndStart({
|
||||
server: {
|
||||
address: '127.0.0.1',
|
||||
port: 3352,
|
||||
silent: true,
|
||||
},
|
||||
storage: {
|
||||
directory: join(baseDir, 'node-3', 'storage'),
|
||||
},
|
||||
cluster: {
|
||||
enabled: true,
|
||||
nodeId: 'node-3',
|
||||
quicPort: 4352,
|
||||
seedNodes: ['127.0.0.1:4350'],
|
||||
erasure: {
|
||||
dataShards: 4,
|
||||
parityShards: 2,
|
||||
chunkSizeBytes: 1024 * 1024,
|
||||
},
|
||||
drives: {
|
||||
paths: makeDrivePaths('node-3'),
|
||||
},
|
||||
heartbeatIntervalMs: 500,
|
||||
heartbeatTimeoutMs: 3000,
|
||||
},
|
||||
});
|
||||
nodes.push(node3);
|
||||
});
|
||||
|
||||
tap.test('seed node should report joined peers and multi-node erasure topology', async () => {
|
||||
const seed = nodes[0];
|
||||
|
||||
await waitFor(async () => {
|
||||
const health = await seed.getClusterHealth();
|
||||
if (health.peers?.length !== 2 || health.erasure?.erasureSetCount !== 1) {
|
||||
throw new Error(JSON.stringify(health));
|
||||
}
|
||||
return health.peers?.length === 2 && health.erasure?.erasureSetCount === 1;
|
||||
});
|
||||
|
||||
const health = await seed.getClusterHealth();
|
||||
const peerIds = health.peers!.map((peer) => peer.nodeId).sort();
|
||||
|
||||
expect(health.enabled).toEqual(true);
|
||||
expect(health.nodeId).toEqual('node-1');
|
||||
expect(health.quorumHealthy).toEqual(true);
|
||||
expect(health.majorityHealthy).toEqual(true);
|
||||
expect(peerIds).toEqual(['node-2', 'node-3']);
|
||||
expect(health.erasure?.totalShards).toEqual(6);
|
||||
expect(health.erasure?.erasureSetCount).toEqual(1);
|
||||
});
|
||||
|
||||
tap.test('all nodes should converge to the same multi-node topology', async () => {
|
||||
for (const node of nodes) {
|
||||
await waitFor(async () => {
|
||||
const health = await node.getClusterHealth();
|
||||
if (health.peers?.length !== 2 || health.erasure?.erasureSetCount !== 1) {
|
||||
throw new Error(JSON.stringify(health));
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('seed node should write shards to the declared remote drives', async () => {
|
||||
const seed = nodes[0];
|
||||
const descriptor = await seed.getStorageDescriptor();
|
||||
const client = new S3Client({
|
||||
endpoint: `http://${descriptor.endpoint}:${descriptor.port}`,
|
||||
region: 'us-east-1',
|
||||
credentials: {
|
||||
accessKeyId: descriptor.accessKey,
|
||||
secretAccessKey: descriptor.accessSecret,
|
||||
},
|
||||
forcePathStyle: true,
|
||||
});
|
||||
const bucket = 'multinode-bucket';
|
||||
const key = 'distributed.txt';
|
||||
const body = 'hello distributed shards';
|
||||
|
||||
await client.send(new CreateBucketCommand({ Bucket: bucket }));
|
||||
await client.send(new PutObjectCommand({ Bucket: bucket, Key: key, Body: body }));
|
||||
|
||||
const getResponse = await client.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
|
||||
expect(await streamToString(getResponse.Body as Readable)).toEqual(body);
|
||||
|
||||
const manifestPath = join(
|
||||
baseDir,
|
||||
'node-1',
|
||||
'storage',
|
||||
'.manifests',
|
||||
bucket,
|
||||
`${key}.manifest.json`,
|
||||
);
|
||||
const manifest = JSON.parse(await readFile(manifestPath, 'utf8')) as {
|
||||
chunks: Array<{
|
||||
shardPlacements: Array<{ shardIndex: number; nodeId: string; driveId: string }>;
|
||||
}>;
|
||||
};
|
||||
const placements = manifest.chunks[0].shardPlacements;
|
||||
|
||||
expect(placements.length).toEqual(6);
|
||||
expect(placements.some((placement) => placement.nodeId === 'node-2' && placement.driveId === '1'))
|
||||
.toEqual(true);
|
||||
expect(placements.some((placement) => placement.nodeId === 'node-3' && placement.driveId === '1'))
|
||||
.toEqual(true);
|
||||
|
||||
for (const placement of placements) {
|
||||
const drivePath = makeDrivePaths(placement.nodeId)[Number(placement.driveId)];
|
||||
const shardFile = `shard-${placement.shardIndex}.dat`;
|
||||
expect(await fileExistsBelow(join(drivePath, '.smartstorage', 'data'), shardFile)).toEqual(true);
|
||||
}
|
||||
});
|
||||
|
||||
tap.test('restarted peer should keep durable identity and rejoin topology', async () => {
|
||||
await nodes[1].stop();
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
|
||||
nodes[1] = await smartstorage.SmartStorage.createAndStart({
|
||||
server: {
|
||||
address: '127.0.0.1',
|
||||
port: 3351,
|
||||
silent: true,
|
||||
},
|
||||
storage: {
|
||||
directory: join(baseDir, 'node-2', 'storage'),
|
||||
},
|
||||
cluster: {
|
||||
enabled: true,
|
||||
nodeId: 'node-2',
|
||||
quicPort: 4351,
|
||||
seedNodes: ['127.0.0.1:4350'],
|
||||
erasure: {
|
||||
dataShards: 4,
|
||||
parityShards: 2,
|
||||
chunkSizeBytes: 1024 * 1024,
|
||||
},
|
||||
drives: {
|
||||
paths: makeDrivePaths('node-2'),
|
||||
},
|
||||
heartbeatIntervalMs: 500,
|
||||
heartbeatTimeoutMs: 3000,
|
||||
},
|
||||
});
|
||||
|
||||
await waitFor(async () => {
|
||||
const health = await nodes[1].getClusterHealth();
|
||||
if (health.nodeId !== 'node-2' || health.peers?.length !== 2) {
|
||||
throw new Error(JSON.stringify(health));
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
const identityPath = join(
|
||||
baseDir,
|
||||
'node-2',
|
||||
'storage',
|
||||
'.smartstorage',
|
||||
'cluster',
|
||||
'identity.json',
|
||||
);
|
||||
const topologyPath = join(
|
||||
baseDir,
|
||||
'node-2',
|
||||
'storage',
|
||||
'.smartstorage',
|
||||
'cluster',
|
||||
'topology.json',
|
||||
);
|
||||
const identity = JSON.parse(await readFile(identityPath, 'utf8')) as {
|
||||
nodeId: string;
|
||||
clusterId: string;
|
||||
};
|
||||
const topology = JSON.parse(await readFile(topologyPath, 'utf8')) as {
|
||||
cluster_id: string;
|
||||
nodes: Array<{ node_id: string }>;
|
||||
};
|
||||
|
||||
expect(identity.nodeId).toEqual('node-2');
|
||||
expect(identity.clusterId).toEqual(topology.cluster_id);
|
||||
expect(topology.nodes.some((node) => node.node_id === 'node-1')).toEqual(true);
|
||||
expect(topology.nodes.some((node) => node.node_id === 'node-3')).toEqual(true);
|
||||
});
|
||||
|
||||
tap.test('teardown: stop clustered nodes and clean files', async () => {
|
||||
for (const node of nodes.reverse()) {
|
||||
await node.stop();
|
||||
}
|
||||
await rm(baseDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
@@ -65,11 +65,11 @@ tap.test('startup credentials authenticate successfully', async () => {
|
||||
expect(response.$metadata.httpStatusCode).toEqual(200);
|
||||
});
|
||||
|
||||
tap.test('listCredentials returns the active startup credential set', async () => {
|
||||
tap.test('listCredentials returns active credential metadata without secrets', async () => {
|
||||
const credentials = await testSmartStorageInstance.listCredentials();
|
||||
expect(credentials.length).toEqual(1);
|
||||
expect(credentials[0].accessKeyId).toEqual(INITIAL_CREDENTIAL.accessKeyId);
|
||||
expect(credentials[0].secretAccessKey).toEqual(INITIAL_CREDENTIAL.secretAccessKey);
|
||||
expect((credentials[0] as any).secretAccessKey).toEqual(undefined);
|
||||
});
|
||||
|
||||
tap.test('invalid replacement input fails cleanly and leaves old credentials active', async () => {
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as smartstorage from '../ts/index.js';
|
||||
|
||||
const TEST_PORT = 3353;
|
||||
let testSmartStorageInstance: smartstorage.SmartStorage;
|
||||
|
||||
tap.test('setup: start storage server for operational endpoint checks', async () => {
|
||||
testSmartStorageInstance = await smartstorage.SmartStorage.createAndStart({
|
||||
server: {
|
||||
port: TEST_PORT,
|
||||
silent: true,
|
||||
region: 'us-east-1',
|
||||
},
|
||||
storage: {
|
||||
cleanSlate: true,
|
||||
},
|
||||
auth: {
|
||||
enabled: false,
|
||||
credentials: [],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
tap.test('operational endpoints expose live ready health and metrics', async () => {
|
||||
const live = await fetch(`http://localhost:${TEST_PORT}/-/live`);
|
||||
expect(live.status).toEqual(200);
|
||||
expect((await live.json()).status).toEqual('alive');
|
||||
|
||||
const ready = await fetch(`http://localhost:${TEST_PORT}/-/ready`);
|
||||
expect(ready.status).toEqual(200);
|
||||
expect((await ready.json()).status).toEqual('ready');
|
||||
|
||||
const health = await fetch(`http://localhost:${TEST_PORT}/-/health`);
|
||||
expect(health.status).toEqual(200);
|
||||
const healthBody = await health.json();
|
||||
expect(healthBody.ok).toEqual(true);
|
||||
expect(healthBody.cluster.enabled).toEqual(false);
|
||||
|
||||
const metrics = await fetch(`http://localhost:${TEST_PORT}/-/metrics`);
|
||||
expect(metrics.status).toEqual(200);
|
||||
const metricsBody = await metrics.text();
|
||||
expect(metricsBody.includes('smartstorage_requests_total')).toEqual(true);
|
||||
expect(metricsBody.includes('smartstorage_cluster_enabled 0')).toEqual(true);
|
||||
});
|
||||
|
||||
tap.test('teardown: stop storage server', async () => {
|
||||
await testSmartStorageInstance.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
||||
Reference in New Issue
Block a user