Files
elasticsearch/ts/examples/bulk/bulk-indexer-example.ts

369 lines
12 KiB
TypeScript

/**
* Comprehensive Bulk Indexer Example
*
* Demonstrates high-throughput document ingestion with adaptive batching
*/
import {
createConfig,
ElasticsearchConnectionManager,
LogLevel,
BulkIndexer,
type BulkProgress,
type BulkBatchResult,
} from '../../index.js';
interface Product {
id: string;
name: string;
description: string;
category: string;
price: number;
stock: number;
createdAt: Date;
}
async function main() {
console.log('=== Bulk Indexer Example ===\n');
// ============================================================================
// Step 1: Configuration
// ============================================================================
console.log('Step 1: Configuring Elasticsearch connection...');
const config = createConfig()
.fromEnv()
.nodes(process.env.ELASTICSEARCH_URL || 'http://localhost:9200')
.basicAuth(
process.env.ELASTICSEARCH_USERNAME || 'elastic',
process.env.ELASTICSEARCH_PASSWORD || 'changeme'
)
.timeout(30000)
.retries(3)
.logLevel(LogLevel.INFO)
.enableMetrics(true)
.build();
// ============================================================================
// Step 2: Initialize Connection
// ============================================================================
console.log('Step 2: Initializing connection manager...');
const connectionManager = ElasticsearchConnectionManager.getInstance(config);
await connectionManager.initialize();
console.log('✓ Connection manager initialized\n');
// ============================================================================
// Step 3: Basic Bulk Indexing
// ============================================================================
console.log('Step 3: Basic bulk indexing...');
const basicIndexer = new BulkIndexer({
batchingStrategy: 'fixed',
batchSize: 100,
flushIntervalMs: 2000,
workers: 2,
});
await basicIndexer.start();
// Index documents
const startTime = Date.now();
for (let i = 1; i <= 500; i++) {
await basicIndexer.index('products-basic', `product-${i}`, {
id: `product-${i}`,
name: `Product ${i}`,
description: `Description for product ${i}`,
category: `Category ${(i % 5) + 1}`,
price: Math.random() * 1000,
stock: Math.floor(Math.random() * 100),
createdAt: new Date(),
});
}
await basicIndexer.flush();
await basicIndexer.stop();
const duration = Date.now() - startTime;
const stats = basicIndexer.getStats();
console.log('✓ Basic indexing complete');
console.log(` Indexed: ${stats.totalSuccessful} documents`);
console.log(` Duration: ${duration}ms`);
console.log(` Throughput: ${((stats.totalSuccessful / duration) * 1000).toFixed(0)} docs/sec`);
console.log(` Avg batch size: ${stats.avgBatchSize.toFixed(0)}`);
console.log(` Avg batch duration: ${stats.avgBatchDurationMs.toFixed(0)}ms`);
console.log();
// ============================================================================
// Step 4: Adaptive Batching
// ============================================================================
console.log('Step 4: Adaptive batching...');
const adaptiveIndexer = new BulkIndexer({
batchingStrategy: 'adaptive',
minBatchSize: 50,
maxBatchSize: 500,
flushIntervalMs: 3000,
workers: 3,
onProgress: (progress: BulkProgress) => {
if (progress.totalProcessed % 200 === 0 && progress.totalProcessed > 0) {
console.log(
` Progress: ${progress.totalProcessed}/${progress.totalSubmitted} ` +
`(${progress.operationsPerSecond} ops/sec, ${progress.queueSize} queued)`
);
}
},
});
await adaptiveIndexer.start();
// Index larger dataset
console.log(' Indexing 1000 documents with adaptive batching...');
for (let i = 1; i <= 1000; i++) {
await adaptiveIndexer.index('products-adaptive', `product-${i}`, {
id: `product-${i}`,
name: `Adaptive Product ${i}`,
description: `Description for adaptive product ${i}`,
category: `Category ${(i % 10) + 1}`,
price: Math.random() * 2000,
stock: Math.floor(Math.random() * 200),
createdAt: new Date(),
});
}
await adaptiveIndexer.flush();
await adaptiveIndexer.stop();
const adaptiveStats = adaptiveIndexer.getStats();
console.log('✓ Adaptive indexing complete');
console.log(` Indexed: ${adaptiveStats.totalSuccessful} documents`);
console.log(` Avg batch size: ${adaptiveStats.avgBatchSize.toFixed(0)} (adapted based on performance)`);
console.log(` Avg ops/sec: ${adaptiveStats.avgOpsPerSecond.toFixed(0)}`);
console.log();
// ============================================================================
// Step 5: Progress Callbacks
// ============================================================================
console.log('Step 5: Using progress callbacks...');
let lastProgress = 0;
const progressIndexer = new BulkIndexer({
batchSize: 100,
workers: 4,
onProgress: (progress: BulkProgress) => {
const percent = (progress.totalProcessed / progress.totalSubmitted) * 100;
if (percent - lastProgress >= 20) {
console.log(` ${percent.toFixed(0)}% complete (${progress.totalProcessed}/${progress.totalSubmitted})`);
if (progress.estimatedTimeRemainingMs !== undefined) {
console.log(` ETA: ${(progress.estimatedTimeRemainingMs / 1000).toFixed(1)}s`);
}
lastProgress = percent;
}
},
onBatchSuccess: (result: BulkBatchResult) => {
if (result.failed > 0) {
console.log(` Batch completed: ${result.successful} ok, ${result.failed} failed`);
}
},
});
await progressIndexer.start();
for (let i = 1; i <= 500; i++) {
await progressIndexer.index('products-progress', `product-${i}`, {
id: `product-${i}`,
name: `Progress Product ${i}`,
description: `Description ${i}`,
category: `Category ${(i % 3) + 1}`,
price: Math.random() * 500,
stock: Math.floor(Math.random() * 50),
createdAt: new Date(),
});
}
await progressIndexer.flush();
await progressIndexer.stop();
console.log('✓ Progress tracking complete\n');
// ============================================================================
// Step 6: Backpressure Handling
// ============================================================================
console.log('Step 6: Demonstrating backpressure handling...');
const backpressureIndexer = new BulkIndexer({
batchSize: 50,
maxQueueSize: 200,
flushIntervalMs: 1000,
workers: 1, // Single worker to create backpressure
});
await backpressureIndexer.start();
console.log(' Submitting operations rapidly...');
let backpressureHits = 0;
for (let i = 1; i <= 300; i++) {
const backpressure = backpressureIndexer.getBackpressure();
if (backpressure.active && i % 50 === 0) {
console.log(
` Backpressure detected: ${backpressure.queueUtilization.toFixed(0)}% queue utilization ` +
`(waiting ${backpressure.recommendedWaitMs}ms)`
);
backpressureHits++;
}
await backpressureIndexer.index('products-backpressure', `product-${i}`, {
id: `product-${i}`,
name: `Backpressure Product ${i}`,
description: `Test ${i}`,
category: `Cat ${i % 2}`,
price: i * 10,
stock: i,
createdAt: new Date(),
});
}
await backpressureIndexer.flush();
await backpressureIndexer.stop();
console.log('✓ Backpressure handling demonstrated');
console.log(` Backpressure events: ${backpressureHits}`);
console.log();
// ============================================================================
// Step 7: Mixed Operations
// ============================================================================
console.log('Step 7: Mixed operations (index, update, delete)...');
const mixedIndexer = new BulkIndexer({
batchSize: 50,
workers: 2,
});
await mixedIndexer.start();
// Index documents
for (let i = 1; i <= 100; i++) {
await mixedIndexer.index('products-mixed', `product-${i}`, {
id: `product-${i}`,
name: `Mixed Product ${i}`,
description: `Original description ${i}`,
category: `Category ${(i % 5) + 1}`,
price: i * 100,
stock: i * 10,
createdAt: new Date(),
});
}
// Update some documents
for (let i = 1; i <= 30; i++) {
await mixedIndexer.update<Product>('products-mixed', `product-${i}`, {
price: i * 150, // Updated price
stock: i * 15, // Updated stock
});
}
// Delete some documents
for (let i = 91; i <= 100; i++) {
await mixedIndexer.delete('products-mixed', `product-${i}`);
}
await mixedIndexer.flush();
const mixedStats = mixedIndexer.getStats();
await mixedIndexer.stop();
console.log('✓ Mixed operations complete');
console.log(` Total operations: ${mixedStats.totalProcessed}`);
console.log(` Index: 100, Update: 30, Delete: 10`);
console.log(` Successful: ${mixedStats.totalSuccessful}`);
console.log(` Failed: ${mixedStats.totalFailed}`);
console.log();
// ============================================================================
// Step 8: Dead-Letter Queue
// ============================================================================
console.log('Step 8: Dead-letter queue for failed operations...');
const dlqIndexer = new BulkIndexer({
batchSize: 50,
maxRetries: 2,
retryDelayMs: 500,
enableDeadLetterQueue: true,
deadLetterIndex: 'failed-operations-{now/d}',
workers: 2,
});
await dlqIndexer.start();
// Index valid documents
for (let i = 1; i <= 50; i++) {
await dlqIndexer.index('products-dlq', `product-${i}`, {
id: `product-${i}`,
name: `DLQ Product ${i}`,
description: `Description ${i}`,
category: `Cat ${i % 3}`,
price: i * 50,
stock: i * 5,
createdAt: new Date(),
});
}
await dlqIndexer.flush();
// Wait a bit for any retries
await new Promise((resolve) => setTimeout(resolve, 2000));
const dlqStats = dlqIndexer.getStats();
await dlqIndexer.stop();
console.log('✓ Dead-letter queue test complete');
console.log(` Successful: ${dlqStats.totalSuccessful}`);
console.log(` Failed (after retries): ${dlqStats.totalFailed}`);
console.log(` Sent to DLQ: ${dlqStats.totalDeadLettered}`);
console.log();
// ============================================================================
// Step 9: Statistics Summary
// ============================================================================
console.log('Step 9: Final statistics summary...\n');
const finalStats = dlqIndexer.getStats();
console.log('Sample Indexer Statistics:');
console.log(` Total submitted: ${finalStats.totalSubmitted}`);
console.log(` Total processed: ${finalStats.totalProcessed}`);
console.log(` Total successful: ${finalStats.totalSuccessful}`);
console.log(` Total failed: ${finalStats.totalFailed}`);
console.log(` Total dead-lettered: ${finalStats.totalDeadLettered}`);
console.log(` Total batches: ${finalStats.totalBatches}`);
console.log(` Avg batch size: ${finalStats.avgBatchSize.toFixed(1)}`);
console.log(` Avg batch duration: ${finalStats.avgBatchDurationMs.toFixed(1)}ms`);
console.log(` Avg ops/sec: ${finalStats.avgOpsPerSecond.toFixed(0)}`);
console.log();
// ============================================================================
// Step 10: Cleanup
// ============================================================================
console.log('Step 10: Cleanup...');
await connectionManager.destroy();
console.log('✓ Connection closed\n');
console.log('=== Bulk Indexer Example Complete ===');
console.log('\nKey Features Demonstrated:');
console.log(' ✓ Fixed batch size strategy');
console.log(' ✓ Adaptive batching (adjusts based on performance)');
console.log(' ✓ Progress callbacks with ETA');
console.log(' ✓ Backpressure handling');
console.log(' ✓ Mixed operations (index, update, delete)');
console.log(' ✓ Dead-letter queue for failed operations');
console.log(' ✓ Automatic retries with exponential backoff');
console.log(' ✓ Parallel workers');
console.log(' ✓ Comprehensive statistics');
}
// Run the example
main().catch((error) => {
console.error('Example failed:', error);
process.exit(1);
});