|
|
|
@ -14,13 +14,19 @@ export interface ISnapshot {
|
|
|
|
|
aggregationData: any;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export type SnapshotProcessor = (iterator: AsyncIterable<any>, prevSnapshot: ISnapshot | null) => Promise<ISnapshot>;
|
|
|
|
|
export type SnapshotProcessor = (
|
|
|
|
|
iterator: AsyncIterable<any>,
|
|
|
|
|
prevSnapshot: ISnapshot | null
|
|
|
|
|
) => Promise<ISnapshot>;
|
|
|
|
|
|
|
|
|
|
export class ElasticDoc {
|
|
|
|
|
public client: ElasticClient;
|
|
|
|
|
public index: string;
|
|
|
|
|
private sessionDocs: Set<string> = new Set();
|
|
|
|
|
|
|
|
|
|
private indexInitialized: boolean = false;
|
|
|
|
|
private latestTimestamp: string | null = null; // Store the latest timestamp
|
|
|
|
|
private onlyNew: boolean = false; // Whether to only pipe new docs
|
|
|
|
|
|
|
|
|
|
private BATCH_SIZE = 1000;
|
|
|
|
|
|
|
|
|
|
constructor(options: IElasticDocConstructorOptions) {
|
|
|
|
@ -31,23 +37,99 @@ export class ElasticDoc {
|
|
|
|
|
this.index = options.index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async startPipingSession() {
|
|
|
|
|
this.sessionDocs.clear();
|
|
|
|
|
private async ensureIndexExists(doc: any) {
|
|
|
|
|
if (!this.indexInitialized) {
|
|
|
|
|
const { body: indexExists } = await this.client.indices.exists({ index: this.index });
|
|
|
|
|
if (!indexExists) {
|
|
|
|
|
const mappings = this.createMappingsFromDoc(doc);
|
|
|
|
|
await this.client.indices.create({
|
|
|
|
|
index: this.index,
|
|
|
|
|
body: {
|
|
|
|
|
mappings,
|
|
|
|
|
settings: {
|
|
|
|
|
// You can define the settings according to your requirements here
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
this.indexInitialized = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async pipeDocument(docId: string, doc: any) {
|
|
|
|
|
await this.client.index({
|
|
|
|
|
index: this.index,
|
|
|
|
|
id: docId,
|
|
|
|
|
body: doc,
|
|
|
|
|
});
|
|
|
|
|
this.sessionDocs.add(docId);
|
|
|
|
|
private createMappingsFromDoc(doc: any): any {
|
|
|
|
|
const properties: any = {};
|
|
|
|
|
for (const key in doc) {
|
|
|
|
|
if (key === '@timestamp') {
|
|
|
|
|
properties[key] = { type: 'date' };
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
properties[key] = { type: typeof doc[key] === 'number' ? 'float' : 'text' };
|
|
|
|
|
}
|
|
|
|
|
return { properties };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async startPipingSession(options: { onlyNew?: boolean }) {
|
|
|
|
|
this.sessionDocs.clear();
|
|
|
|
|
this.onlyNew = options.onlyNew;
|
|
|
|
|
|
|
|
|
|
if (this.onlyNew) {
|
|
|
|
|
// Check if index exists before attempting to search for the latest timestamp
|
|
|
|
|
const { body: indexExists } = await this.client.indices.exists({ index: this.index });
|
|
|
|
|
|
|
|
|
|
if (indexExists) {
|
|
|
|
|
const response = await this.client.search({
|
|
|
|
|
index: this.index,
|
|
|
|
|
sort: '@timestamp:desc',
|
|
|
|
|
size: 1,
|
|
|
|
|
});
|
|
|
|
|
const hit = response.body.hits.hits[0];
|
|
|
|
|
this.latestTimestamp = hit?._source?.['@timestamp'] || null;
|
|
|
|
|
|
|
|
|
|
if (this.latestTimestamp) {
|
|
|
|
|
console.log(`Working in "onlyNew" mode. Hence we are omitting documents prior to ${this.latestTimestamp}`);
|
|
|
|
|
} else {
|
|
|
|
|
console.log(`Working in "onlyNew" mode, but no documents found in index ${this.index}. Hence processing all documents now.`);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
this.latestTimestamp = null;
|
|
|
|
|
console.log(`Index ${this.index} does not exist. Working in "onlyNew" mode, but will process all documents as the index is empty.`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async pipeDocument(optionsArg: { docId: string; timestamp?: string | number; doc: any }) {
|
|
|
|
|
await this.ensureIndexExists(optionsArg.doc);
|
|
|
|
|
|
|
|
|
|
const documentBody = {
|
|
|
|
|
...optionsArg.doc,
|
|
|
|
|
...(optionsArg.timestamp && { '@timestamp': optionsArg.timestamp }),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// If 'onlyNew' is true, compare the document timestamp with the latest timestamp
|
|
|
|
|
if (this.onlyNew) {
|
|
|
|
|
if (this.latestTimestamp && optionsArg.timestamp <= this.latestTimestamp) {
|
|
|
|
|
// Omit the document
|
|
|
|
|
return;
|
|
|
|
|
} else {
|
|
|
|
|
await this.client.index({
|
|
|
|
|
index: this.index,
|
|
|
|
|
id: optionsArg.docId,
|
|
|
|
|
body: documentBody,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
this.sessionDocs.add(optionsArg.docId);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async endPipingSession() {
|
|
|
|
|
const allDocIds: string[] = [];
|
|
|
|
|
const responseQueue = [];
|
|
|
|
|
let response = await this.client.search({ index: this.index, scroll: '1m', size: this.BATCH_SIZE });
|
|
|
|
|
let response = await this.client.search({
|
|
|
|
|
index: this.index,
|
|
|
|
|
scroll: '1m',
|
|
|
|
|
size: this.BATCH_SIZE,
|
|
|
|
|
});
|
|
|
|
|
while (true) {
|
|
|
|
|
response.body.hits.hits.forEach((hit: any) => allDocIds.push(hit._id));
|
|
|
|
|
if (!response.body.hits.hits.length) {
|
|
|
|
@ -81,35 +163,35 @@ export class ElasticDoc {
|
|
|
|
|
|
|
|
|
|
async takeSnapshot(processIterator: SnapshotProcessor) {
|
|
|
|
|
const snapshotIndex = `${this.index}_snapshots`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const { body: indexExists } = await this.client.indices.exists({ index: snapshotIndex });
|
|
|
|
|
if (!indexExists) {
|
|
|
|
|
await this.client.indices.create({
|
|
|
|
|
await this.client.indices.create({
|
|
|
|
|
index: snapshotIndex,
|
|
|
|
|
body: {
|
|
|
|
|
mappings: {
|
|
|
|
|
properties: {
|
|
|
|
|
date: {
|
|
|
|
|
type: 'date'
|
|
|
|
|
type: 'date',
|
|
|
|
|
},
|
|
|
|
|
aggregationData: {
|
|
|
|
|
type: 'object',
|
|
|
|
|
enabled: true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
enabled: true,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const documentIterator = this.getDocumentIterator();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const newSnapshot = await processIterator(documentIterator, await this.getLastSnapshot());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
await this.storeSnapshot(newSnapshot);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async getLastSnapshot(): Promise<ISnapshot | null> {
|
|
|
|
|
private async getLastSnapshot(): Promise<ISnapshot | null> {
|
|
|
|
|
const snapshotIndex = `${this.index}_snapshots`;
|
|
|
|
|
const { body: indexExists } = await this.client.indices.exists({ index: snapshotIndex });
|
|
|
|
|
|
|
|
|
@ -120,7 +202,7 @@ private async getLastSnapshot(): Promise<ISnapshot | null> {
|
|
|
|
|
const response = await this.client.search({
|
|
|
|
|
index: snapshotIndex,
|
|
|
|
|
sort: 'date:desc',
|
|
|
|
|
size: 1
|
|
|
|
|
size: 1,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (response.body.hits.hits.length > 0) {
|
|
|
|
@ -134,9 +216,12 @@ private async getLastSnapshot(): Promise<ISnapshot | null> {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private async *getDocumentIterator() {
|
|
|
|
|
let response = await this.client.search({ index: this.index, scroll: '1m', size: this.BATCH_SIZE });
|
|
|
|
|
let response = await this.client.search({
|
|
|
|
|
index: this.index,
|
|
|
|
|
scroll: '1m',
|
|
|
|
|
size: this.BATCH_SIZE,
|
|
|
|
|
});
|
|
|
|
|
while (true) {
|
|
|
|
|
for (const hit of response.body.hits.hits) {
|
|
|
|
|
yield hit._source;
|
|
|
|
|