elasticsearch/ts/els.classes.elasticdoc.ts

250 lines
6.8 KiB
TypeScript
Raw Normal View History

2023-08-01 10:38:53 +00:00
import { Client as ElasticClient } from '@elastic/elasticsearch';
export interface IElasticDocConstructorOptions {
index: string;
node: string;
auth?: {
username: string;
password: string;
};
}
2023-08-02 01:11:17 +00:00
export interface ISnapshot {
date: string;
aggregationData: any;
}
2023-08-29 07:18:15 +00:00
export type SnapshotProcessor = (
iterator: AsyncIterable<any>,
prevSnapshot: ISnapshot | null
) => Promise<ISnapshot>;
2023-08-02 01:11:17 +00:00
2023-08-01 10:38:53 +00:00
export class ElasticDoc {
public client: ElasticClient;
public index: string;
private sessionDocs: Set<string> = new Set();
2023-08-25 14:06:52 +00:00
private indexInitialized: boolean = false;
2023-08-29 07:18:15 +00:00
private latestTimestamp: string | null = null; // Store the latest timestamp
private onlyNew: boolean = false; // Whether to only pipe new docs
2023-08-29 09:48:32 +00:00
public fastForward: boolean = false; // Whether to fast forward to the latest timestamp
2023-08-25 14:06:52 +00:00
2023-08-01 10:38:53 +00:00
private BATCH_SIZE = 1000;
constructor(options: IElasticDocConstructorOptions) {
this.client = new ElasticClient({
node: options.node,
...(options.auth && { auth: options.auth }),
});
this.index = options.index;
}
2023-08-25 14:06:52 +00:00
private async ensureIndexExists(doc: any) {
if (!this.indexInitialized) {
2023-08-29 09:11:25 +00:00
const indexExists = await this.client.indices.exists({ index: this.index });
2023-08-25 14:06:52 +00:00
if (!indexExists) {
const mappings = this.createMappingsFromDoc(doc);
await this.client.indices.create({
index: this.index,
body: {
2023-08-30 10:49:39 +00:00
// mappings,
2023-08-25 14:06:52 +00:00
settings: {
// You can define the settings according to your requirements here
},
},
});
}
this.indexInitialized = true;
}
}
private createMappingsFromDoc(doc: any): any {
const properties: any = {};
for (const key in doc) {
if (key === '@timestamp') {
properties[key] = { type: 'date' };
continue;
}
properties[key] = { type: typeof doc[key] === 'number' ? 'float' : 'text' };
}
return { properties };
}
2023-08-29 07:18:15 +00:00
async startPipingSession(options: { onlyNew?: boolean }) {
2023-08-01 10:38:53 +00:00
this.sessionDocs.clear();
2023-08-29 07:18:15 +00:00
this.onlyNew = options.onlyNew;
2023-08-29 09:15:22 +00:00
const indexExists = await this.client.indices.exists({ index: this.index });
if (this.onlyNew && indexExists) {
const response = await this.client.search({
index: this.index,
sort: '@timestamp:desc',
size: 1,
});
// If the search query succeeded, the index exists.
const hit = response.hits.hits[0];
this.latestTimestamp = hit?._source?.['@timestamp'] || null;
if (this.latestTimestamp) {
console.log(`Working in "onlyNew" mode. Hence we are omitting documents prior to ${this.latestTimestamp}`);
} else {
console.log(`Working in "onlyNew" mode, but no documents found in index ${this.index}. Hence processing all documents now.`);
2023-08-29 07:18:15 +00:00
}
2023-08-29 09:15:22 +00:00
} else if (this.onlyNew && !indexExists) {
console.log(`Working in "onlyNew" mode, but index ${this.index} does not exist. Hence processing all documents now.`);
2023-08-29 07:18:15 +00:00
}
2023-08-01 10:38:53 +00:00
}
2023-08-29 07:56:58 +00:00
2023-08-29 08:00:42 +00:00
2023-08-01 10:38:53 +00:00
2023-08-29 07:18:15 +00:00
async pipeDocument(optionsArg: { docId: string; timestamp?: string | number; doc: any }) {
2023-08-25 14:06:52 +00:00
await this.ensureIndexExists(optionsArg.doc);
const documentBody = {
...optionsArg.doc,
...(optionsArg.timestamp && { '@timestamp': optionsArg.timestamp }),
};
2023-08-29 07:56:58 +00:00
// If 'onlyNew' is true, compare the document timestamp with the latest timestamp
2023-08-29 07:31:42 +00:00
if (this.onlyNew) {
if (this.latestTimestamp && optionsArg.timestamp <= this.latestTimestamp) {
2023-08-29 09:48:32 +00:00
this.fastForward = true;
2023-08-29 07:31:42 +00:00
} else {
2023-08-29 09:48:32 +00:00
this.fastForward = false;
2023-08-29 07:31:42 +00:00
await this.client.index({
index: this.index,
id: optionsArg.docId,
body: documentBody,
});
}
2023-08-29 10:10:38 +00:00
} else {
this.fastForward = false;
await this.client.index({
index: this.index,
id: optionsArg.docId,
body: documentBody,
});
2023-08-29 07:31:42 +00:00
}
2023-08-25 14:06:52 +00:00
this.sessionDocs.add(optionsArg.docId);
2023-08-01 10:38:53 +00:00
}
async endPipingSession() {
const allDocIds: string[] = [];
const responseQueue = [];
2023-08-29 07:18:15 +00:00
let response = await this.client.search({
index: this.index,
scroll: '1m',
size: this.BATCH_SIZE,
});
2023-08-01 10:38:53 +00:00
while (true) {
2023-08-29 09:11:25 +00:00
response.hits.hits.forEach((hit: any) => allDocIds.push(hit._id));
if (!response.hits.hits.length) {
2023-08-01 10:38:53 +00:00
break;
}
2023-08-29 09:11:25 +00:00
response = await this.client.scroll({ scroll_id: response._scroll_id, scroll: '1m' });
2023-08-01 10:38:53 +00:00
}
for (const docId of allDocIds) {
if (!this.sessionDocs.has(docId)) {
responseQueue.push({
delete: {
_index: this.index,
_id: docId,
},
});
if (responseQueue.length >= this.BATCH_SIZE) {
await this.client.bulk({ refresh: true, body: responseQueue });
responseQueue.length = 0;
}
}
}
if (responseQueue.length > 0) {
await this.client.bulk({ refresh: true, body: responseQueue });
}
this.sessionDocs.clear();
}
2023-08-02 01:11:17 +00:00
async takeSnapshot(processIterator: SnapshotProcessor) {
const snapshotIndex = `${this.index}_snapshots`;
2023-08-29 07:18:15 +00:00
2023-08-29 09:11:25 +00:00
const indexExists = await this.client.indices.exists({ index: snapshotIndex });
2023-08-02 01:11:17 +00:00
if (!indexExists) {
2023-08-29 07:18:15 +00:00
await this.client.indices.create({
2023-08-02 01:11:17 +00:00
index: snapshotIndex,
body: {
mappings: {
properties: {
date: {
2023-08-29 07:18:15 +00:00
type: 'date',
2023-08-02 01:11:17 +00:00
},
aggregationData: {
type: 'object',
2023-08-29 07:18:15 +00:00
enabled: true,
},
},
},
},
2023-08-02 01:11:17 +00:00
});
}
2023-08-29 07:18:15 +00:00
2023-08-02 01:11:17 +00:00
const documentIterator = this.getDocumentIterator();
2023-08-29 07:18:15 +00:00
2023-08-02 01:11:17 +00:00
const newSnapshot = await processIterator(documentIterator, await this.getLastSnapshot());
2023-08-29 07:18:15 +00:00
2023-08-02 01:11:17 +00:00
await this.storeSnapshot(newSnapshot);
}
2023-08-29 07:18:15 +00:00
private async getLastSnapshot(): Promise<ISnapshot | null> {
2023-08-02 01:11:17 +00:00
const snapshotIndex = `${this.index}_snapshots`;
2023-08-29 09:11:25 +00:00
const indexExists = await this.client.indices.exists({ index: snapshotIndex });
2023-08-02 01:11:17 +00:00
if (!indexExists) {
return null;
}
const response = await this.client.search({
index: snapshotIndex,
sort: 'date:desc',
2023-08-29 07:18:15 +00:00
size: 1,
2023-08-02 01:11:17 +00:00
});
2023-08-29 09:11:25 +00:00
if (response.hits.hits.length > 0) {
const hit = response.hits.hits[0];
2023-08-02 01:11:17 +00:00
return {
2023-08-29 09:11:25 +00:00
date: hit._source['date'],
aggregationData: hit._source['aggregationData'],
2023-08-02 01:11:17 +00:00
};
} else {
return null;
}
}
private async *getDocumentIterator() {
2023-08-29 07:18:15 +00:00
let response = await this.client.search({
index: this.index,
scroll: '1m',
size: this.BATCH_SIZE,
});
2023-08-02 01:11:17 +00:00
while (true) {
2023-08-29 09:11:25 +00:00
for (const hit of response.hits.hits) {
2023-08-02 01:11:17 +00:00
yield hit._source;
}
2023-08-29 09:11:25 +00:00
if (!response.hits.hits.length) {
2023-08-02 01:11:17 +00:00
break;
}
2023-08-29 09:11:25 +00:00
response = await this.client.scroll({ scroll_id: response._scroll_id, scroll: '1m' });
2023-08-02 01:11:17 +00:00
}
}
private async storeSnapshot(snapshot: ISnapshot) {
await this.client.index({
index: `${this.index}_snapshots`,
body: snapshot,
});
}
2023-08-01 10:38:53 +00:00
}