feat(laws,opendata): add local law storage and migrate OpenData persistence to smartdb-backed local storage

This commit is contained in:
2026-04-17 11:51:02 +00:00
parent 79e74a34ed
commit 73801f785a
40 changed files with 8514 additions and 7266 deletions
+1 -1
View File
@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@fin.cx/opendata',
version: '3.5.0',
version: '3.6.0',
description: 'A comprehensive TypeScript library for accessing business data and real-time financial information. Features include German company data management with MongoDB integration, JSONL bulk processing, automated Handelsregister interactions, and real-time stock market data from multiple providers.'
}
+1 -1
View File
@@ -18,7 +18,7 @@ export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc<
// INSTANCE
@plugins.smartdata.unI()
id: string;
id!: string;
@plugins.smartdata.svDb()
data: {
+44 -19
View File
@@ -20,15 +20,44 @@ export class HandelsRegister {
this.uniqueDowloadFolder = plugins.path.join(this.downloadDir, plugins.smartunique.uniSimple());
}
private async resetDownloadFolder() {
await plugins.smartfs.directory(this.uniqueDowloadFolder).recursive().delete().catch(() => {});
await plugins.smartfs.directory(this.uniqueDowloadFolder).create();
}
private async waitForDownloadedFile() {
for (let attempt = 0; attempt < 120; attempt++) {
const directoryEntries = await plugins.fs.readdir(this.uniqueDowloadFolder);
const fileName = directoryEntries.find(
(entry) => !entry.endsWith('.crdownload') && !entry.endsWith('.tmp')
);
if (fileName) {
const filePath = plugins.path.join(this.uniqueDowloadFolder, fileName);
const firstStat = await plugins.fs.stat(filePath);
await plugins.smartdelay.delayFor(500);
const secondStat = await plugins.fs.stat(filePath);
if (firstStat.size === secondStat.size) {
return filePath;
}
}
await plugins.smartdelay.delayFor(500);
}
throw new Error('Timed out while waiting for the download to finish.');
}
public async start() {
// Start the browser
await plugins.smartfile.fs.ensureDir(this.uniqueDowloadFolder);
await this.resetDownloadFolder();
await this.smartbrowserInstance.start();
}
public async stop() {
// Stop the browser
await plugins.smartfile.fs.remove(this.uniqueDowloadFolder);
await plugins.smartfs.directory(this.uniqueDowloadFolder).recursive().delete();
await this.smartbrowserInstance.stop();
}
@@ -184,24 +213,16 @@ export class HandelsRegister {
throw new Error('Invalid file type');
}
}, typeArg);
const downloadedFilePath = await this.waitForDownloadedFile();
const renamedFilePath = plugins.path.join(
this.uniqueDowloadFolder,
typeArg === 'AD' ? 'ad.pdf' : 'si.xml'
);
await plugins.fs.rename(downloadedFilePath, renamedFilePath);
const file = await plugins.smartfileFactory.fromFilePath(renamedFilePath);
await plugins.smartfile.fs.waitForFileToBeReady(this.uniqueDowloadFolder);
const files = await plugins.smartfile.fs.fileTreeToObject(this.uniqueDowloadFolder, '**/*');
const file = files[0];
// lets clear the folder for the next download
await plugins.smartfile.fs.ensureEmptyDir(this.uniqueDowloadFolder);
switch (typeArg) {
case 'AD':
await file.rename(`ad.pdf`);
break;
case 'SI':
await file.rename(`si.xml`);
break;
break;
}
// Keep the download folder empty between requests.
await this.resetDownloadFolder();
return file;
}
@@ -297,6 +318,10 @@ export class HandelsRegister {
*/
public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) {
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
if (!companyArg?.type || !companyArg.number || !companyArg.court) {
throw new Error('A complete parsed German registration is required.');
}
const page = await this.getNewPage();
await this.navigateToPage(page, 'Normal search');
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
+7 -6
View File
@@ -53,14 +53,15 @@ export class JsonlDataProcessor<T> {
dataUrlArg = 'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2'
) {
const done = plugins.smartpromise.defer();
const dataExists = await plugins.smartfile.fs.isDirectory(this.germanBusinessDataDir);
const dataExists = await plugins.smartfs.directory(this.germanBusinessDataDir).exists();
if (!dataExists) {
await plugins.smartfile.fs.ensureDir(this.germanBusinessDataDir);
await plugins.smartfs.directory(this.germanBusinessDataDir).create();
} else {
}
const smartarchive = await plugins.smartarchive.SmartArchive.fromArchiveUrl(dataUrlArg);
const jsonlDataStream = await smartarchive.exportToStreamOfStreamFiles();
const jsonlDataStream = await plugins.smartarchive.SmartArchive.create()
.url(dataUrlArg)
.toStreamFiles();
let totalRecordsCounter = 0;
let nextRest: string = '';
jsonlDataStream.pipe(
@@ -74,11 +75,11 @@ export class JsonlDataProcessor<T> {
writeFunction: async (chunkArg: Buffer, streamToolsArg) => {
const currentString = nextRest + chunkArg.toString();
const lines = currentString.split('\n');
nextRest = lines.pop();
nextRest = lines.pop() ?? '';
console.log(`Got another ${lines.length} records.`);
const concurrentProcessor = new plugins.smartarray.ConcurrentProcessor<string>(
async (line) => {
let entry: T;
let entry: T | undefined;
if (!line) return;
try {
entry = JSON.parse(line);
+65 -32
View File
@@ -1,7 +1,6 @@
import { BusinessRecord } from './classes.businessrecord.js';
import { HandelsRegister } from './classes.handelsregister.js';
import { JsonlDataProcessor, type SeedEntryType } from './classes.jsonldata.js';
import * as paths from './paths.js';
import * as plugins from './plugins.js';
export interface IOpenDataConfig {
@@ -11,12 +10,13 @@ export interface IOpenDataConfig {
}
export class OpenData {
public db: plugins.smartdata.SmartdataDb;
private serviceQenv: plugins.qenv.Qenv;
public db!: plugins.smartdata.SmartdataDb;
private localSmartDb?: plugins.smartdb.LocalSmartDb;
private config: IOpenDataConfig;
private started = false;
public jsonLDataProcessor: JsonlDataProcessor<SeedEntryType>;
public handelsregister: HandelsRegister;
public jsonLDataProcessor!: JsonlDataProcessor<SeedEntryType>;
public handelsregister!: HandelsRegister;
public CBusinessRecord = plugins.smartdata.setDefaultManagerForDoc(this, BusinessRecord);
@@ -28,38 +28,62 @@ export class OpenData {
throw new Error('@fin.cx/opendata: All directory paths are required (downloadDir, germanBusinessDataDir, nogitDir).');
}
this.config = configArg;
this.serviceQenv = new plugins.qenv.Qenv(paths.packageDir, this.config.nogitDir);
}
public async start() {
// Ensure configured directories exist
await plugins.smartfile.fs.ensureDir(this.config.nogitDir);
await plugins.smartfile.fs.ensureDir(this.config.downloadDir);
await plugins.smartfile.fs.ensureDir(this.config.germanBusinessDataDir);
if (this.started) {
return;
}
this.db = new plugins.smartdata.SmartdataDb({
mongoDbUrl: await this.serviceQenv.getEnvVarOnDemand('MONGODB_URL'),
mongoDbName: await this.serviceQenv.getEnvVarOnDemand('MONGODB_NAME'),
mongoDbUser: await this.serviceQenv.getEnvVarOnDemand('MONGODB_USER'),
mongoDbPass: await this.serviceQenv.getEnvVarOnDemand('MONGODB_PASS'),
// Ensure configured directories exist
await plugins.smartfs.directory(this.config.nogitDir).create();
await plugins.smartfs.directory(this.config.downloadDir).create();
await plugins.smartfs.directory(this.config.germanBusinessDataDir).create();
this.localSmartDb = new plugins.smartdb.LocalSmartDb({
folderPath: plugins.path.join(this.config.nogitDir, 'opendata-smartdb'),
});
await this.db.init();
this.jsonLDataProcessor = new JsonlDataProcessor(
this.config.germanBusinessDataDir,
async (entryArg) => {
const businessRecord = new this.CBusinessRecord();
businessRecord.id = await this.CBusinessRecord.getNewId();
businessRecord.data.name = entryArg.name;
businessRecord.data.germanParsedRegistration = {
court: entryArg.all_attributes.registered_office,
number: entryArg.all_attributes._registerNummer,
type: entryArg.all_attributes._registerArt as 'HRA' | 'HRB',
};
await businessRecord.save();
const connectionInfo = await this.localSmartDb.start();
this.db = new plugins.smartdata.SmartdataDb({
mongoDbUrl: connectionInfo.connectionUri,
mongoDbName: 'opendata',
});
try {
await this.db.init();
await this.db.mongoDb.collection('_opendata_bootstrap').insertOne({
createdAt: new Date(),
});
await this.db.mongoDb.collection('_opendata_bootstrap').deleteMany({});
this.jsonLDataProcessor = new JsonlDataProcessor(
this.config.germanBusinessDataDir,
async (entryArg) => {
const businessRecord = new this.CBusinessRecord();
businessRecord.id = await this.CBusinessRecord.getNewId();
businessRecord.data.name = entryArg.name;
businessRecord.data.germanParsedRegistration = {
court: entryArg.all_attributes.registered_office,
number: entryArg.all_attributes._registerNummer,
type: entryArg.all_attributes._registerArt as 'HRA' | 'HRB',
};
await businessRecord.save();
}
);
this.handelsregister = new HandelsRegister(this, this.config.downloadDir);
await this.handelsregister.start();
this.started = true;
} catch (error) {
if (this.handelsregister) {
await this.handelsregister.stop().catch(() => {});
}
);
this.handelsregister = new HandelsRegister(this, this.config.downloadDir);
await this.handelsregister.start();
await this.db.close().catch(() => {});
await this.localSmartDb.stop().catch(() => {});
this.localSmartDb = undefined;
throw error;
}
}
public async buildInitialDb() {
@@ -81,7 +105,16 @@ export class OpenData {
public async stop() {
if (!this.started) {
return;
}
if (this.handelsregister) {
await this.handelsregister.stop();
}
await this.db.close();
await this.handelsregister.stop();
await this.localSmartDb?.stop();
this.localSmartDb = undefined;
this.started = false;
}
}
+1
View File
@@ -1,2 +1,3 @@
export * from './classes.main.opendata.js';
export * from './laws/index.js';
export * from './stocks/index.js';
+81
View File
@@ -0,0 +1,81 @@
import * as plugins from '../plugins.js';
import type { TJurisdiction, TLawSource, TRawLawFormat } from './interfaces.law.js';
@plugins.smartdata.Manager()
export class LawRecord extends plugins.smartdata.SmartDataDbDoc<LawRecord, LawRecord> {
public static getByLookupKey = async (lookupKeyArg: string) => {
const lawRecords = await LawRecord.getInstances({
lookupKey: lookupKeyArg,
});
return lawRecords[0];
};
@plugins.smartdata.unI()
id!: string;
@plugins.smartdata.unI()
@plugins.smartdata.svDb()
lookupKey!: string;
@plugins.smartdata.index()
@plugins.smartdata.svDb()
jurisdiction!: TJurisdiction;
@plugins.smartdata.index()
@plugins.smartdata.svDb()
source!: TLawSource;
@plugins.smartdata.searchable()
@plugins.smartdata.svDb()
identifier!: string;
@plugins.smartdata.searchable()
@plugins.smartdata.svDb()
title!: string;
@plugins.smartdata.searchable()
@plugins.smartdata.svDb()
shortTitle: string = '';
@plugins.smartdata.searchable()
@plugins.smartdata.svDb()
citation: string = '';
@plugins.smartdata.index()
@plugins.smartdata.svDb()
type: string = '';
@plugins.smartdata.index()
@plugins.smartdata.svDb()
language: string = '';
@plugins.smartdata.svDb()
sourceUrl!: string;
@plugins.smartdata.svDb()
rawFormat!: TRawLawFormat;
@plugins.smartdata.svDb()
rawBody!: string;
@plugins.smartdata.searchable()
@plugins.smartdata.svDb()
text!: string;
@plugins.smartdata.index()
@plugins.smartdata.svDb()
dateIssued: string = '';
@plugins.smartdata.index()
@plugins.smartdata.svDb()
lastModified: string = '';
@plugins.smartdata.svDb()
sourceMeta: Record<string, string> = {};
@plugins.smartdata.svDb()
fetchedAt: Date = new Date();
@plugins.smartdata.svDb()
syncedAt: Date = new Date();
}
File diff suppressed because it is too large Load Diff
+3
View File
@@ -0,0 +1,3 @@
export * from './interfaces.law.js';
export * from './classes.lawrecord.js';
export * from './classes.lawservice.js';
+48
View File
@@ -0,0 +1,48 @@
export type TJurisdiction = 'de' | 'eu' | 'us';
export type TLawSource =
| 'gesetze-im-internet'
| 'eur-lex'
| 'law-cornell-lii'
| 'govinfo-plaw'
| 'govinfo-uscode';
export type TRawLawFormat = 'xml' | 'html' | 'text' | 'json';
export type TUsLawCollection = 'PLAW' | 'USCODE';
export interface ILawServiceConfig {
dbFolderPath?: string;
dbName?: string;
govInfoApiKey?: string;
}
export interface ILawLookupRequest {
jurisdiction: TJurisdiction;
identifier: string;
language?: string;
usCollection?: TUsLawCollection;
forceSync?: boolean;
}
export interface ILawSyncRequest {
jurisdiction: TJurisdiction;
limit?: number;
offset?: number;
language?: string;
govInfoApiKey?: string;
usCollection?: TUsLawCollection;
since?: Date;
}
export interface ILawSearchRequest {
query: string;
jurisdiction?: TJurisdiction;
limit?: number;
}
export interface ILawSyncResult {
jurisdiction: TJurisdiction;
syncedCount: number;
identifiers: string[];
}
+19
View File
@@ -1,7 +1,9 @@
// node native scope
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
export {
fs,
path,
}
@@ -14,23 +16,40 @@ import * as smartbrowser from '@push.rocks/smartbrowser';
import * as smartdata from '@push.rocks/smartdata';
import * as smartdelay from '@push.rocks/smartdelay';
import * as smartfile from '@push.rocks/smartfile';
import { SmartFs, SmartFsProviderNode } from '@push.rocks/smartfs';
import * as smartlog from '@push.rocks/smartlog';
import * as smartpath from '@push.rocks/smartpath';
import * as smartpromise from '@push.rocks/smartpromise';
import * as smartrequest from '@push.rocks/smartrequest';
import * as smartdb from '@push.rocks/smartdb';
import * as smartstream from '@push.rocks/smartstream';
import * as smartunique from '@push.rocks/smartunique';
import * as smartxml from '@push.rocks/smartxml';
const smartfs = new SmartFs(new SmartFsProviderNode());
const smartfileFactory = new smartfile.SmartFileFactory(smartfs);
const getErrorMessage = (error: unknown) => {
if (error instanceof Error) {
return error.message;
}
return String(error);
};
export {
getErrorMessage,
lik,
qenv,
smartarchive,
smartarray,
smartbrowser,
smartdb,
smartdata,
smartdelay,
smartfile,
smartfileFactory,
smartfs,
smartlog,
smartpath,
smartpromise,
+1 -1
View File
@@ -166,7 +166,7 @@ export class FundamentalsService implements IFundamentalsProviderRegistry {
lastError = error as Error;
console.warn(
`Provider ${provider.name} failed for ${this.getRequestDescription(request)}: ${error.message}`
`Provider ${provider.name} failed for ${this.getRequestDescription(request)}: ${plugins.getErrorMessage(error)}`
);
}
}
+17 -7
View File
@@ -32,7 +32,17 @@ export class StockDataService {
private logger = console;
private config: Required<IStockDataServiceConfig> = {
private config: {
cache: {
priceTTL: number;
fundamentalsTTL: number;
maxEntries: number;
};
timeout: {
price: number;
fundamentals: number;
};
} = {
cache: {
priceTTL: 24 * 60 * 60 * 1000, // 24 hours
fundamentalsTTL: 90 * 24 * 60 * 60 * 1000, // 90 days
@@ -200,7 +210,7 @@ export class StockDataService {
entry.lastErrorTime = new Date();
lastError = error as Error;
console.warn(`Provider ${provider.name} failed for ${ticker}: ${error.message}`);
console.warn(`Provider ${provider.name} failed for ${ticker}: ${plugins.getErrorMessage(error)}`);
}
}
@@ -250,7 +260,7 @@ export class StockDataService {
entry.lastErrorTime = new Date();
lastError = error as Error;
console.warn(`Provider ${provider.name} failed for batch prices: ${error.message}`);
console.warn(`Provider ${provider.name} failed for batch prices: ${plugins.getErrorMessage(error)}`);
}
}
@@ -301,7 +311,7 @@ export class StockDataService {
entry.lastErrorTime = new Date();
lastError = error as Error;
console.warn(`Provider ${provider.name} failed for ${ticker} fundamentals: ${error.message}`);
console.warn(`Provider ${provider.name} failed for ${ticker} fundamentals: ${plugins.getErrorMessage(error)}`);
}
}
@@ -352,7 +362,7 @@ export class StockDataService {
entry.lastErrorTime = new Date();
lastError = error as Error;
console.warn(`Provider ${provider.name} failed for batch fundamentals: ${error.message}`);
console.warn(`Provider ${provider.name} failed for batch fundamentals: ${plugins.getErrorMessage(error)}`);
}
}
@@ -382,7 +392,7 @@ export class StockDataService {
fundamentals = this.enrichWithPrice(fundamentals, price.price);
}
} catch (error) {
console.warn(`Failed to fetch fundamentals for ${normalizedRequest.ticker}: ${error.message}`);
console.warn(`Failed to fetch fundamentals for ${normalizedRequest.ticker}: ${plugins.getErrorMessage(error)}`);
// Continue without fundamentals
}
}
@@ -426,7 +436,7 @@ export class StockDataService {
fundamentalsMap = new Map(fundamentals.map(f => [f.ticker, f]));
}
} catch (error) {
console.warn(`Failed to fetch batch fundamentals: ${error.message}`);
console.warn(`Failed to fetch batch fundamentals: ${plugins.getErrorMessage(error)}`);
// Continue without fundamentals
}
}
+2 -2
View File
@@ -205,7 +205,7 @@ export class StockPriceService implements IProviderRegistry {
lastError = error as Error;
console.warn(
`Provider ${provider.name} failed for ${this.getRequestDescription(request)}: ${error.message}`
`Provider ${provider.name} failed for ${this.getRequestDescription(request)}: ${plugins.getErrorMessage(error)}`
);
}
}
@@ -522,4 +522,4 @@ export class StockPriceService implements IProviderRegistry {
ttl: ttl || this.cacheConfig.ttl
});
}
}
}
+9 -7
View File
@@ -45,7 +45,7 @@ export class MarketstackProvider implements IStockProvider {
public priority = 90; // Increased from 80 - now supports real-time intraday data during market hours
public readonly requiresAuth = true;
public readonly rateLimit = {
requestsPerMinute: undefined, // No per-minute limit specified
requestsPerMinute: 0, // No per-minute limit specified
requestsPerDay: undefined // Varies by plan
};
@@ -99,8 +99,9 @@ export class MarketstackProvider implements IStockProvider {
}
} catch (error) {
// If intraday fails, fallback to EOD with warning
if (error.message?.includes('intraday') || error.message?.includes('Marketstack API error')) {
this.logger.warn(`Intraday endpoint failed for ${request.ticker}, falling back to EOD:`, error.message);
const errorMessage = plugins.getErrorMessage(error);
if (errorMessage.includes('intraday') || errorMessage.includes('Marketstack API error')) {
this.logger.warn(`Intraday endpoint failed for ${request.ticker}, falling back to EOD:`, errorMessage);
try {
return await this.fetchCurrentPriceEod(request);
} catch (eodError) {
@@ -245,7 +246,7 @@ export class MarketstackProvider implements IStockProvider {
return allPrices;
} catch (error) {
this.logger.error(`Failed to fetch historical prices for ${request.ticker}:`, error);
throw new Error(`Marketstack: Failed to fetch historical prices for ${request.ticker}: ${error.message}`);
throw new Error(`Marketstack: Failed to fetch historical prices for ${request.ticker}: ${plugins.getErrorMessage(error)}`);
}
}
@@ -337,7 +338,7 @@ export class MarketstackProvider implements IStockProvider {
return allPrices;
} catch (error) {
this.logger.error(`Failed to fetch intraday prices for ${request.ticker}:`, error);
throw new Error(`Marketstack: Failed to fetch intraday prices for ${request.ticker}: ${error.message}`);
throw new Error(`Marketstack: Failed to fetch intraday prices for ${request.ticker}: ${plugins.getErrorMessage(error)}`);
}
}
@@ -359,8 +360,9 @@ export class MarketstackProvider implements IStockProvider {
}
} catch (error) {
// Fallback to EOD if intraday fails
if (error.message?.includes('intraday') || error.message?.includes('Marketstack API error')) {
this.logger.warn(`Intraday batch endpoint failed, falling back to EOD:`, error.message);
const errorMessage = plugins.getErrorMessage(error);
if (errorMessage.includes('intraday') || errorMessage.includes('Marketstack API error')) {
this.logger.warn(`Intraday batch endpoint failed, falling back to EOD:`, errorMessage);
try {
return await this.fetchBatchCurrentPricesEod(request);
} catch (eodError) {
+2 -2
View File
@@ -141,7 +141,7 @@ export class SecEdgarProvider implements IFundamentalsProvider {
return this.parseCompanyFacts(request.ticker, cik, companyFacts);
} catch (error) {
this.logger.error(`Failed to fetch fundamentals for ${request.ticker}:`, error);
throw new Error(`SEC EDGAR: Failed to fetch fundamentals for ${request.ticker}: ${error.message}`);
throw new Error(`SEC EDGAR: Failed to fetch fundamentals for ${request.ticker}: ${plugins.getErrorMessage(error)}`);
}
}
@@ -163,7 +163,7 @@ export class SecEdgarProvider implements IFundamentalsProvider {
results.push(fundamentals);
} catch (error) {
this.logger.warn(`Failed to fetch fundamentals for ${ticker}:`, error);
errors.push(`${ticker}: ${error.message}`);
errors.push(`${ticker}: ${plugins.getErrorMessage(error)}`);
// Continue with other tickers
}
}
+3 -3
View File
@@ -86,7 +86,7 @@ export class YahooFinanceProvider implements IStockProvider {
return stockPrice;
} catch (error) {
console.error(`Failed to fetch price for ${request.ticker}:`, error);
throw new Error(`Yahoo Finance: Failed to fetch price for ${request.ticker}: ${error.message}`);
throw new Error(`Yahoo Finance: Failed to fetch price for ${request.ticker}: ${plugins.getErrorMessage(error)}`);
}
}
@@ -145,7 +145,7 @@ export class YahooFinanceProvider implements IStockProvider {
return prices;
} catch (error) {
console.error(`Failed to fetch batch prices:`, error);
throw new Error(`Yahoo Finance: Failed to fetch batch prices: ${error.message}`);
throw new Error(`Yahoo Finance: Failed to fetch batch prices: ${plugins.getErrorMessage(error)}`);
}
}
@@ -191,4 +191,4 @@ export class YahooFinanceProvider implements IStockProvider {
return timeDiff > 3600 ? 'CLOSED' : 'REGULAR';
}
}
}
}