22 Commits

Author SHA1 Message Date
8ebbc16bcd 1.4.0
Some checks failed
Default (tags) / security (push) Failing after 16s
Default (tags) / test (push) Failing after 11s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-04 02:27:53 +01:00
c177193438 feat(HandelsRegister): Add file download functionality to HandelsRegister 2025-01-04 02:27:53 +01:00
7c07bc59e4 1.3.1
Some checks failed
Default (tags) / security (push) Failing after 15s
Default (tags) / test (push) Failing after 12s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-03 02:19:07 +01:00
e4a8d371f7 fix(HandelsRegister): Refined HandelsRegister functionality for better error handling and response capture. 2025-01-03 02:19:07 +01:00
1c0e04cb0d 1.3.0
Some checks failed
Default (tags) / security (push) Failing after 16s
Default (tags) / test (push) Failing after 12s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-03 01:36:27 +01:00
c3f6ef531b feat(core): Enhanced data handling capabilities and improved company search functionalities. 2025-01-03 01:36:26 +01:00
a67a0993d6 1.2.1
Some checks failed
Default (tags) / security (push) Failing after 16s
Default (tags) / test (push) Failing after 12s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-02 03:16:02 +01:00
bc43e4c44a fix(BusinessRecord): Add missing field registrationType to BusinessRecord data 2025-01-02 03:16:02 +01:00
9b2dcd7377 1.2.0
Some checks failed
Default (tags) / security (push) Failing after 15s
Default (tags) / test (push) Failing after 12s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-02 01:26:34 +01:00
1eda50ad13 feat(core): Integrate Handelsregister search for company data retrieval 2025-01-02 01:26:34 +01:00
506a644c6b 1.1.5
Some checks failed
Default (tags) / security (push) Failing after 16s
Default (tags) / test (push) Failing after 12s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-01 18:40:36 +01:00
555e156b5e fix(GermanBusinessData): Add console log for total records processed at the end of the stream. 2025-01-01 18:40:36 +01:00
b67e18f2fe 1.1.4
Some checks failed
Default (tags) / security (push) Failing after 12s
Default (tags) / test (push) Failing after 11s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2024-12-31 19:58:18 +01:00
09c9712191 fix(documentation): Update description and keywords in package.json 2024-12-31 19:58:18 +01:00
6258dcdff1 1.1.3
Some checks failed
Default (tags) / security (push) Failing after 15s
Default (tags) / test (push) Failing after 14s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2024-12-31 19:54:32 +01:00
605b050177 fix(core): Added missing license file for project completeness. 2024-12-31 19:54:32 +01:00
c97c8e711a 1.1.2
Some checks failed
Default (tags) / security (push) Failing after 15s
Default (tags) / test (push) Failing after 14s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2024-12-31 19:49:12 +01:00
d5654a7bc7 fix(GermanBusinessData): Ensure unique ID generation for BusinessRecord 2024-12-31 19:49:12 +01:00
c91439ab6b 1.1.1
Some checks failed
Default (tags) / security (push) Failing after 15s
Default (tags) / test (push) Failing after 11s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2024-12-31 17:38:36 +01:00
ad0c6a4112 fix(dependencies): Update dependencies and devDependencies to newer versions. 2024-12-31 17:38:36 +01:00
dd110a198d 1.1.0
Some checks failed
Default (tags) / security (push) Failing after 15s
Default (tags) / test (push) Failing after 12s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2024-12-31 14:43:08 +01:00
c30fc40e06 feat(core): Enhanced data handling and retrieval features, improved usage documentation 2024-12-31 14:43:08 +01:00
14 changed files with 8534 additions and 5289 deletions

View File

@ -1,5 +1,88 @@
# Changelog
## 2025-01-04 - 1.4.0 - feat(HandelsRegister)
Add file download functionality to HandelsRegister
- Implemented file download feature in the HandelsRegister class.
- Configured pages in Puppeteer to allow downloads and set download paths.
- Parsed German registration information with more robust error handling.
- Added specific methods for downloading and handling 'SI' and 'AD' files.
## 2025-01-03 - 1.3.1 - fix(HandelsRegister)
Refined HandelsRegister functionality for better error handling and response capture.
- Improved parsing logic in parseGermanRegistration function.
- Enhanced navigateToPage and clickFindButton methods with error messages for clarity.
- Implemented a new responseListener to handle and log HTTP responses correctly.
## 2025-01-03 - 1.3.0 - feat(core)
Enhanced data handling capabilities and improved company search functionalities.
- Updated business record handling to support more registration types.
- Improved search capabilities for fetching company data with refined registration type matching.
- Added robust logging for JSONL data processing with early exit on successful parse.
- Reorganized test cases to include specific company data retrieval.
## 2025-01-02 - 1.2.1 - fix(BusinessRecord)
Add missing field registrationType to BusinessRecord data
- Introduced the 'registrationType' field to the BusinessRecord data schema with possible values 'HRA' or 'HRB'.
## 2025-01-02 - 1.2.0 - feat(core)
Integrate Handelsregister search for company data retrieval
- Added support for searching company data via Handelsregister.
- Replaced GermanBusinessData functionality with JsonlDataProcessor.
- Included smartbrowser dependency for handling web requests to Handelsregister.
## 2025-01-01 - 1.1.5 - fix(GermanBusinessData)
Add console log for total records processed at the end of the stream.
- Ensure that the number of records processed is logged at the end of data stream processing.
## 2024-12-31 - 1.1.4 - fix(documentation)
Update description and keywords in package.json
- Corrected the package description to reflect the focus on managing, accessing, and updating open data with MongoDB integration.
- Expanded the keywords in the package metadata to include data integration and processing terms.
- Improved README.md with more extensive setup, usage, and introduction of the library's functionalities.
## 2024-12-31 - 1.1.3 - fix(core)
Added missing license file for project completeness.
- Introduced a LICENSE file to the project, ensuring clarity on software usage permissions.
## 2024-12-31 - 1.1.2 - fix(GermanBusinessData)
Ensure unique ID generation for BusinessRecord
- Added generation of a new ID for each BusinessRecord in GermanBusinessData.
- Ensures each business record has a unique identifier.
## 2024-12-31 - 1.1.1 - fix(dependencies)
Update dependencies and devDependencies to newer versions.
- @git.zone/tsbuild from ^2.1.25 to ^2.2.0
- @git.zone/tsbundle from ^2.0.5 to ^2.1.0
- @git.zone/tsrun from ^1.2.46 to ^1.3.3
- @git.zone/tstest from ^1.0.84 to ^1.0.90
- @push.rocks/tapbundle from ^5.0.15 to ^5.5.4
- @types/node from ^20.9.0 to ^22.10.2
- @push.rocks/qenv from ^6.0.4 to ^6.1.0
- @push.rocks/smartarchive from ^4.0.19 to ^4.0.39
- @push.rocks/smartdata from ^5.0.33 to ^5.2.10
- @push.rocks/smartfile from ^11.0.0 to ^11.0.23
- @push.rocks/smartpath from ^5.0.11 to ^5.0.18
- @push.rocks/smartpromise from ^4.0.3 to ^4.0.4
- @push.rocks/smartrequest from ^2.0.21 to ^2.0.23
- @push.rocks/smartstream from ^3.0.30 to ^3.2.5
## 2024-12-31 - 1.1.0 - feat(core)
Enhanced data handling and retrieval features, improved usage documentation
- Updated package description and added project keywords in package.json.
- Extended readme with detailed usage examples and class structures.
- Added getBusinessRecordByName function for dynamic business record retrieval.
## 2024-07-02 - 1.0.3 - fix(core)
No new changes detected. Preparing for patch release.

19
license Normal file
View File

@ -0,0 +1,19 @@
Copyright (c) 2022 Task Venture Capital GmbH (hello@task.vc)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -5,10 +5,26 @@
"githost": "gitlab.com",
"gitscope": "fin.cx",
"gitrepo": "opendata",
"description": "open business data",
"description": "A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.",
"npmPackagename": "@fin.cx/opendata",
"license": "MIT",
"projectDomain": "fin.cx"
"projectDomain": "fin.cx",
"keywords": [
"TypeScript",
"open data",
"business data",
"German companies",
"data management",
"business registry",
"npm package",
"MongoDB",
"automation",
"data integration",
"database",
"data processing",
"data retrieval",
"data update"
]
}
},
"npmci": {

View File

@ -1,8 +1,8 @@
{
"name": "@fin.cx/opendata",
"version": "1.0.3",
"version": "1.4.0",
"private": false,
"description": "open business data",
"description": "A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.",
"main": "dist_ts/index.js",
"typings": "dist_ts/index.d.ts",
"type": "module",
@ -14,23 +14,27 @@
"buildDocs": "(tsdoc)"
},
"devDependencies": {
"@git.zone/tsbuild": "^2.1.25",
"@git.zone/tsbundle": "^2.0.5",
"@git.zone/tsrun": "^1.2.46",
"@git.zone/tstest": "^1.0.84",
"@push.rocks/tapbundle": "^5.0.15",
"@types/node": "^20.9.0"
"@git.zone/tsbuild": "^2.2.0",
"@git.zone/tsbundle": "^2.1.0",
"@git.zone/tsrun": "^1.3.3",
"@git.zone/tstest": "^1.0.90",
"@push.rocks/tapbundle": "^5.5.4",
"@types/node": "^22.10.4"
},
"dependencies": {
"@push.rocks/qenv": "^6.0.4",
"@push.rocks/smartarchive": "^4.0.19",
"@push.rocks/smartdata": "^5.0.33",
"@push.rocks/lik": "^6.1.0",
"@push.rocks/qenv": "^6.1.0",
"@push.rocks/smartarchive": "^4.0.39",
"@push.rocks/smartbrowser": "^2.0.8",
"@push.rocks/smartdata": "^5.2.10",
"@push.rocks/smartdelay": "^3.0.5",
"@push.rocks/smartfile": "^11.0.0",
"@push.rocks/smartpath": "^5.0.11",
"@push.rocks/smartpromise": "^4.0.3",
"@push.rocks/smartrequest": "^2.0.21",
"@push.rocks/smartstream": "^3.0.30"
"@push.rocks/smartfile": "^11.0.23",
"@push.rocks/smartpath": "^5.0.18",
"@push.rocks/smartpromise": "^4.0.4",
"@push.rocks/smartrequest": "^2.0.23",
"@push.rocks/smartstream": "^3.2.5",
"@push.rocks/smartunique": "^3.0.9",
"@tsclass/tsclass": "^4.2.0"
},
"repository": {
"type": "git",
@ -54,5 +58,21 @@
"cli.js",
"npmextra.json",
"readme.md"
],
"keywords": [
"TypeScript",
"open data",
"business data",
"German companies",
"data management",
"business registry",
"npm package",
"MongoDB",
"automation",
"data integration",
"database",
"data processing",
"data retrieval",
"data update"
]
}

12917
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

235
readme.md
View File

@ -1,31 +1,218 @@
# @fin.cx/opendata
open business data
## Availabililty and Links
* [npmjs.org (npm package)](https://www.npmjs.com/package/@fin.cx/opendata)
* [gitlab.com (source)](https://gitlab.com/fin.cx/opendata)
* [github.com (source mirror)](https://github.com/fin.cx/opendata)
* [docs (typedoc)](https://fin.cx.gitlab.io/opendata/)
A TypeScript-based library for accessing and managing open business data, specifically for German companies.
## Status for master
## Install
Status Category | Status Badge
-- | --
GitLab Pipelines | [![pipeline status](https://gitlab.com/fin.cx/opendata/badges/master/pipeline.svg)](https://lossless.cloud)
GitLab Pipline Test Coverage | [![coverage report](https://gitlab.com/fin.cx/opendata/badges/master/coverage.svg)](https://lossless.cloud)
npm | [![npm downloads per month](https://badgen.net/npm/dy/@fin.cx/opendata)](https://lossless.cloud)
Snyk | [![Known Vulnerabilities](https://badgen.net/snyk/fin.cx/opendata)](https://lossless.cloud)
TypeScript Support | [![TypeScript](https://badgen.net/badge/TypeScript/>=%203.x/blue?icon=typescript)](https://lossless.cloud)
node Support | [![node](https://img.shields.io/badge/node->=%2010.x.x-blue.svg)](https://nodejs.org/dist/latest-v10.x/docs/api/)
Code Style | [![Code Style](https://badgen.net/badge/style/prettier/purple)](https://lossless.cloud)
PackagePhobia (total standalone install weight) | [![PackagePhobia](https://badgen.net/packagephobia/install/@fin.cx/opendata)](https://lossless.cloud)
PackagePhobia (package size on registry) | [![PackagePhobia](https://badgen.net/packagephobia/publish/@fin.cx/opendata)](https://lossless.cloud)
BundlePhobia (total size when bundled) | [![BundlePhobia](https://badgen.net/bundlephobia/minzip/@fin.cx/opendata)](https://lossless.cloud)
To install the `@fin.cx/opendata` package, you can use npm or yarn as your package manager. Here's how you can do it:
Using npm:
```bash
npm install @fin.cx/opendata
```
Using yarn:
```bash
yarn add @fin.cx/opendata
```
## Usage
Use TypeScript for best in class intellisense
For further information read the linked docs at the top of this readme.
## Legal
> MIT licensed | **©** [Task Venture Capital GmbH](https://task.vc)
| By using this npm module you agree to our [privacy policy](https://lossless.gmbH/privacy)
The `@fin.cx/opendata` package provides a comprehensive set of functionalities for handling open business data, focusing on German business data. Let's explore the detailed capabilities of this library through extensive examples and instructions.
### Setting Up the Environment
First, make sure you've set up the necessary environment variables for MongoDB. You will need the following environment variables:
- `MONGODB_URL`: The URL for your MongoDB instance.
- `MONGODB_NAME`: The name of the database to use.
- `MONGODB_USER`: A valid username for accessing the database.
- `MONGODB_PASS`: The password associated with the MongoDB user.
These variables can be configured in a `.env` file or managed through a specific service used for secure environment variables handling.
### Importing and Initializing the Library
To start working with the library, import the necessary classes and initialize the `OpenData` class.
```typescript
import { OpenData } from '@fin.cx/opendata';
const initializeOpenData = async () => {
const openData = new OpenData();
try {
await openData.start();
console.log('OpenData instance has started successfully.');
// Example usage:
await createAndManageBusinessRecords(openData);
} catch (error) {
console.error('Error starting OpenData:', error);
} finally {
await openData.stop();
console.log('OpenData instance has stopped.');
}
};
initializeOpenData();
```
### Managing Business Records
The `BusinessRecord` class represents a company's data. Let's explore how you can create, retrieve, update, and manage these records.
#### Creating a New BusinessRecord
Creating a new business record involves instantiating the `BusinessRecord` class and setting the relevant properties.
```typescript
import { BusinessRecord } from '@fin.cx/opendata';
const createBusinessRecord = async (openData: OpenData) => {
const businessRecord = new openData.CBusinessRecord();
businessRecord.data = {
name: "Tech Innovations GmbH",
address: "Tech Park 42",
postalCode: "80333",
city: "Munich",
country: "Germany",
phone: "+49 89 123456",
email: "info@techinnovations.de",
website: "https://techinnovations.de",
businessType: "GmbH",
registrationNumber: "HRB 654321",
registrationCourt: "Munich",
legalForm: "GmbH",
managingDirectors: ["Alice Müller", "Bob Schmidt"],
foundingDate: new Date("2020-01-01").toISOString(),
capital: "100,000 EUR",
purpose: "Developing innovative tech solutions",
lastUpdate: new Date().toISOString()
};
await businessRecord.save();
console.log('BusinessRecord saved:', businessRecord);
};
```
#### Retrieving Business Records
You can retrieve existing business records by querying the database using various data fields.
```typescript
const retrieveBusinessRecords = async (openData: OpenData) => {
const records = await openData.db
.collection<BusinessRecord>('businessrecords')
.find({ city: "Munich" })
.toArray();
console.log('Found Business Records:', records);
};
```
#### Updating Existing Records
To update an existing `BusinessRecord`, you retrieve the record, modify its data, and save it again.
```typescript
const updateBusinessRecord = async (openData: OpenData, recordId: string) => {
const businessRecord = await openData.CBusinessRecord.getInstance(recordId);
if (businessRecord) {
businessRecord.data.phone = "+49 89 987654";
businessRecord.data.lastUpdate = new Date().toISOString();
await businessRecord.save();
console.log('BusinessRecord updated:', businessRecord);
} else {
console.log('BusinessRecord not found for id:', recordId);
}
};
```
#### Deleting a Business Record
You can delete a business record using its unique identifier.
```typescript
const deleteBusinessRecord = async (openData: OpenData, recordId: string) => {
const businessRecord = await openData.CBusinessRecord.getInstance(recordId);
if (businessRecord) {
await businessRecord.delete();
console.log(`BusinessRecord with id ${recordId} deleted successfully.`);
} else {
console.log('No record found for the provided id:', recordId);
}
};
```
### Updating German Business Data
The package includes functionalities to keep your business data up-to-date by downloading from official German open data repositories.
```typescript
const updateGermanBusinessData = async (openData: OpenData) => {
try {
await openData.germanBusinesses.update();
console.log('German business data has been updated successfully.');
} catch (error) {
console.error('Error updating German business data:', error);
}
};
```
This function fetches the latest open data regarding German companies, processes it, and updates your local database.
### Detailed Class Insights
#### OpenData Class
The `OpenData` class serves as the core of the library, initializing necessary components and controlling data flows:
- **db**: Represents the connection to your MongoDB database.
- **germanBusinesses**: An instance handling specific operations related to German business data updates.
```typescript
class OpenData {
db: plugins.smartdata.SmartdataDb;
germanBusinesses: GermanBusinessData;
private serviceQenv = new plugins.qenv.Qenv(paths.packageDir, paths.nogitDir);
public async start() {
// Database initialization logic
}
public async stop() {
// Cleanup logic
}
}
```
#### GermanBusinessData Class
This class deals specifically with German company data — fetching, processing, and updating local databases consistently with official German data sources.
```typescript
class GermanBusinessData {
public async start() {
await this.update();
}
public async update() {
// Logic for updating business data using import streams and parsing JSON lines.
}
}
```
### Ensuring Data Accuracy and Integrity
When working with business data, ensuring integrity and accuracy is crucial. Each record should be checked for validity before being saved or updated, minimizing inconsistencies. Moreover, robust error handling is essential in every step, from data retrieval to database operations, particularly when dealing with external data sources.
The `@fin.cx/opendata` module provides an extensive toolset for accessing and managing business data, particularly for companies based in Germany. Its functionalities include creating, updating, retrieving, and deleting business records, as well as keeping them current with the latest open data releases. This makes it an invaluable asset for developers aiming to integrate open data seamlessly into their systems, ensuring robust data management capabilities within their applications.
Happy exploring and integrating open data into your projects!
undefined

View File

@ -12,4 +12,27 @@ tap.test('should start the instance', async () => {
await testOpenDataInstance.start();
})
tap.skip.test('should build initial data', async () => {
await testOpenDataInstance.buildInitialDb();
});
const resultsSearch = tap.test('should get the data for a company', async () => {
const result = await testOpenDataInstance.handelsregister.searchCompany('Volkswagen');
console.log(result);
return result;
});
tap.test('should get the data for a specific company', async () => {
const testCompany = (await resultsSearch.testResultPromise)[21]['germanParsedRegistration'];
console.log(`trying to find specific company with:`);
console.log(testCompany);
const result = await testOpenDataInstance.handelsregister.getSpecificCompany(testCompany);
console.log(result);
});
tap.test('should stop the instance', async () => {
await testOpenDataInstance.stop();
});
tap.start()

View File

@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@fin.cx/opendata',
version: '1.0.3',
description: 'open business data'
version: '1.4.0',
description: 'A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.'
}

View File

@ -1,28 +1,45 @@
import * as plugins from './plugins.js';
@plugins.smartdata.Manager()
export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc<BusinessRecord, BusinessRecord> {
export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc<
BusinessRecord,
BusinessRecord
> {
@plugins.smartdata.unI()
id: string;
@plugins.smartdata.svDb()
data: {
name?: string,
address?: string,
postalCode?: string,
city?: string,
country?: string,
phone?: string,
fax?: string,
email?: string,
website?: string,
businessType?: string,
registrationNumber?: string,
registrationCourt?: string,
legalForm?: string,
managingDirectors?: string[],
boardOfDirectors?: string[],
supervisoryBoard?: string[],
foundingDate?: string,
capital?: string,
purpose?: string,
lastUpdate?: string
name?: string;
address?: string;
postalCode?: string;
city?: string;
country?: string;
phone?: string;
fax?: string;
email?: string;
website?: string;
businessType?: string;
registrationId?: string;
germanParsedRegistration?: {
court?: string;
type?: 'HRA' | 'HRB' | 'GnR' | 'PR' | 'VR' | 'GsR';
number?: string;
};
legalForm?:
| 'GmbH'
| 'GmbH & Co. KG'
| 'AG'
| 'LLC'
| 'LLP'
| 'GmbH & Co. KGaA'
| 'GmbH & Co. KGaA, LLC';
managingDirectors?: string[];
boardOfDirectors?: string[];
supervisoryBoard?: string[];
foundingDate?: string;
capital?: string;
purpose?: string;
lastUpdate?: string;
} = {};
}
}

View File

@ -0,0 +1,339 @@
import type { BusinessRecord } from './classes.businessrecord.js';
import type { OpenData } from './classes.main.opendata.js';
import * as plugins from './plugins.js';
import * as paths from './paths.js';
/**
* the HandlesRegister exposed as a class
*/
export class HandelsRegister {
private openDataRef: OpenData;
private asyncExecutionStack = new plugins.lik.AsyncExecutionStack();
private uniqueDowloadFolder = plugins.path.join(paths.downloadDir, plugins.smartunique.uniSimple());
// Puppeteer wrapper instance
public smartbrowserInstance = new plugins.smartbrowser.SmartBrowser();
constructor(openDataRef: OpenData) {
this.openDataRef = openDataRef;
}
public async start() {
// Start the browser
await plugins.smartfile.fs.ensureDir(this.uniqueDowloadFolder);
await this.smartbrowserInstance.start();
}
public async stop() {
// Stop the browser
await plugins.smartfile.fs.remove(this.uniqueDowloadFolder);
await this.smartbrowserInstance.stop();
}
/**
* Creates a new page and configures it to allow file downloads
* to a predefined path.
*/
public getNewPage = async () => {
const page = await this.smartbrowserInstance.headlessBrowser.newPage();
// 1) Create a DevTools session for this page
const cdpSession = await page.target().createCDPSession();
// 2) Allow file downloads and set the download path
await cdpSession.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: this.uniqueDowloadFolder, // <-- Change this to your desired absolute path
});
// Optionally set viewport and go to page
await page.setViewport({ width: 1920, height: 1080 });
await page.goto('https://www.handelsregister.de/');
return page;
};
private navigateToPage = async (
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
pageNameArg: string
) => {
try {
await pageArg.evaluate((pageNameArg2) => {
const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span'));
const targetElement = elements.find((el) => el.textContent?.trim() === pageNameArg2);
if (targetElement) {
(targetElement as HTMLElement).click();
}
}, pageNameArg);
console.log(`Navigated to the ${pageNameArg} page successfully.`);
} catch (error) {
console.error(`Failed to navigate to the ${pageNameArg} page:`, error);
}
};
private waitForResults = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
await pageArg
.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', {
timeout: 30000,
})
.catch(async (err) => {
await pageArg.screenshot({ path: paths.downloadDir + '/error.png' });
throw err;
});
const businessRecords: BusinessRecord['data'][] = await pageArg.evaluate(() => {
const rows = document.querySelectorAll(
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data > tr'
);
const records: BusinessRecord['data'][] = [];
rows.forEach((row) => {
const nameElement = row.querySelector('td.ui-panelgrid-cell span.marginLeft20');
const cityElement = row.querySelector('td.ui-panelgrid-cell.sitzSuchErgebnisse span');
const statusElement = row.querySelector('td.ui-panelgrid-cell span.verticalText');
const registrationCourtElement = row.querySelector(
'td.ui-panelgrid-cell.fontTableNameSize'
);
const name = nameElement?.textContent?.trim();
const city = cityElement?.textContent?.trim();
const status = statusElement?.textContent?.trim();
const registrationId = registrationCourtElement?.textContent?.trim();
// Push parsed data into records array
records.push({
name,
city,
registrationId,
businessType: status,
});
});
return records;
});
return businessRecords;
};
private clickFindButton = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
try {
// Wait for the button with the text "Find" to appear
await pageArg.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 });
// adjust to 100 results per page
await pageArg.select('#form\\:ergebnisseProSeite_input', '100');
// Locate and click the button using its text
await pageArg.evaluate(() => {
const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c'));
const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find');
if (targetButton) {
const parentButton = targetButton.closest('button') || targetButton;
(parentButton as HTMLElement).click();
}
});
console.log('Find button clicked successfully!');
} catch (error) {
console.error('Failed to find or click the "Find" button:', error);
}
};
private async downloadFile(
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
typeArg: 'SI' | 'AD'
) {
// Trigger the file download by clicking on the relevant link
await pageArg.evaluate((typeArg2) => {
// Locate the table body
const tableBody = document.querySelector(
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data'
);
if (!tableBody) {
throw new Error('Table body not found');
}
// Locate the first row
const firstRow = tableBody.querySelector('tr:nth-child(1)');
if (!firstRow) {
throw new Error('First row not found');
}
// Locate the last cell in the first row
const lastCell = firstRow.querySelector('td:last-child');
if (!lastCell) {
throw new Error('Last cell not found in the first row');
}
// Locate the download links
const adLink = lastCell.querySelector('a:first-of-type');
const siLink = lastCell.querySelector('a:last-of-type');
if (!siLink) {
throw new Error('SI link not found in the last cell');
}
// Simulate a click on the last <a> element
switch (typeArg2) {
case 'AD':
(adLink as HTMLElement).click();
break;
case 'SI':
(siLink as HTMLElement).click();
break;
default:
throw new Error('Invalid file type');
}
}, typeArg);
// Wait a bit for the download to complete (you might want to implement
// a more robust file-exists check or a wait-for-download library)
await pageArg.waitForTimeout(10000);
const files = await plugins.smartfile.fs.fileTreeToObject(this.uniqueDowloadFolder, '**/*');
await plugins.smartfile.fs.ensureEmptyDir(this.uniqueDowloadFolder);
return files [0];
}
/**
* Helper method to parse the German registration string
*/
private async parseGermanRegistration(
input: string
): Promise<BusinessRecord['data']['germanParsedRegistration']> {
// e.g. District court Berlin (Charlottenburg) HRB 123456
const regex =
/District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u;
const match = input.match(regex);
if (match) {
return {
court: match[1],
type: match[2] as 'HRA' | 'HRB', // Adjust if needed
number: match[3],
};
}
}
/**
* Search for a company by name and return basic info
*/
public async searchCompany(companyNameArg: string) {
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
const page = await this.getNewPage();
await this.navigateToPage(page, 'Normal search');
try {
// Wait for the textarea to appear
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
// Enter text into the textarea
const inputText = companyNameArg;
await page.evaluate((text) => {
const textarea = document.querySelector<HTMLTextAreaElement>('#form\\:schlagwoerter');
if (textarea) {
textarea.value = text; // Set the value
// Trigger the change event manually if required
const event = new Event('change', { bubbles: true });
textarea.dispatchEvent(event);
}
}, inputText);
console.log('Text entered successfully!');
} catch (error) {
console.error('Failed to find or enter text into the textarea:', error);
}
try {
// Wait for the radio button's label to appear
await page.waitForSelector('label[for="form:schlagwortOptionen:0"]', { timeout: 5000 });
// Click the label to select the radio button
await page.evaluate(() => {
const label = document.querySelector<HTMLLabelElement>(
'label[for="form:schlagwortOptionen:0"]'
);
if (label) {
label.click();
}
});
console.log('Radio button clicked successfully!');
} catch (error) {
console.error('Failed to find or click the radio button:', error);
}
await this.clickFindButton(page);
const businessRecords = await this.waitForResults(page);
// Parse out the registration info
for (const record of businessRecords) {
if (record.registrationId) {
record.germanParsedRegistration = await this.parseGermanRegistration(
record.registrationId
);
}
}
await page.close();
return businessRecords;
}, 60000);
}
/**
* Search for a specific company (known register type/number/court),
* then click on an element that triggers a file download.
*/
public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) {
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
const page = await this.getNewPage();
await this.navigateToPage(page, 'Normal search');
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
// 1) Type of Register (e.g. HRB, HRA, etc.)
await page.waitForSelector('#form\\:registerArt_label');
await page.click('#form\\:registerArt_label');
await page.waitForSelector('#form\\:registerArt_items');
await page.evaluate((type) => {
const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li'));
const targetOption = options.find((option) => option.textContent?.trim() === type);
(targetOption as HTMLElement)?.click();
}, companyArg.type);
// 2) Register number
await page.waitForSelector('#form\\:registerNummer');
await page.type('#form\\:registerNummer', companyArg.number);
// 3) Register court
await page.waitForSelector('#form\\:registergericht_label');
await page.click('#form\\:registergericht_label');
await page.waitForSelector('#form\\:registergericht_items');
await page.evaluate((court) => {
const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li'));
const targetOption = options.find((option) => option.textContent?.trim() === court);
(targetOption as HTMLElement)?.click();
}, companyArg.court);
// Click 'Find'
await this.clickFindButton(page);
// Optionally grab the results, just for logging
const businessRecords = await this.waitForResults(page);
console.log(businessRecords);
const files: plugins.smartfile.SmartFile[] = [];
// download files
files.push(await this.downloadFile(page, 'SI'));
files.push(await this.downloadFile(page, 'AD'));
// At this point, the file should have been downloaded automatically
// to the path specified by `Page.setDownloadBehavior`
await page.close();
return {
businessRecords,
files,
};
}, 60000);
}
}

View File

@ -2,28 +2,23 @@ import * as plugins from './plugins.js';
import * as paths from './paths.js';
import type { OpenData } from './classes.main.opendata.js';
export class GermanBusinessData {
export class JsonlDataProcessor {
public openDataRef: OpenData;
constructor(openDataRefArg: OpenData) {
this.openDataRef = openDataRefArg;
}
public async start() {
await this.update();
}
public async stop() {}
public async update() {
// TODO: define a mapper as argument instead of hard-coding it
public async processDataFromUrl(dataUrlArg = 'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2') {
const done = plugins.smartpromise.defer();
const promiseArray: Promise<any>[] = [];
const dataUrl = 'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2';
const dataExists = await plugins.smartfile.fs.isDirectory(paths.germanBusinessDataDir);
if (!dataExists) {
await plugins.smartfile.fs.ensureDir(paths.germanBusinessDataDir);
} else {
}
const smartarchive = await plugins.smartarchive.SmartArchive.fromArchiveUrl(dataUrl);
const smartarchive = await plugins.smartarchive.SmartArchive.fromArchiveUrl(dataUrlArg);
promiseArray
.push
// smartarchive.exportToFs(paths.germanBusinessDataDir, 'de_companies_ocdata.jsonl')
@ -49,6 +44,8 @@ export class GermanBusinessData {
if (!line) continue;
try {
entry = JSON.parse(line);
console.log(JSON.stringify(entry, null, 2));
process.exit(0);
} catch (err) {
console.log(line);
await plugins.smartdelay.delayFor(10000);
@ -57,12 +54,13 @@ export class GermanBusinessData {
totalRecordsCounter++;
if (totalRecordsCounter % 10000 === 0) console.log(`${totalRecordsCounter} total records.`);
const businessRecord = new this.openDataRef.CBusinessRecord();
businessRecord.id = await this.openDataRef.CBusinessRecord.getNewId();
businessRecord.data.name = entry.name;
await businessRecord.save();
// console.log(`stored ${businessRecord.data.name}`);
}
},
finalFunction: async (streamToolsArg) => {
console.log(`finished processing ${totalRecordsCounter} records.`);
if (!nextRest) return;
JSON.parse(nextRest);
}
@ -72,4 +70,13 @@ export class GermanBusinessData {
})
);
}
public async getBusinessRecordByName(nameArg: string) {
const businessRecord = await this.openDataRef.CBusinessRecord.getInstance({
data: {
name: { $regex: `${nameArg}`, $options: "i" } as any,
}
});
return businessRecord;
}
}

View File

@ -1,13 +1,16 @@
import { BusinessRecord } from './classes.businessrecord.js';
import { GermanBusinessData } from './classes.germanbusinessdata.js';
import { HandelsRegister } from './classes.handelsregister.js';
import { JsonlDataProcessor } from './classes.jsonldata.js';
import * as paths from './paths.js';
import * as plugins from './plugins.js';
export class OpenData {
db: plugins.smartdata.SmartdataDb;
germanBusinesses: GermanBusinessData;
public db: plugins.smartdata.SmartdataDb;
private serviceQenv = new plugins.qenv.Qenv(paths.packageDir, paths.nogitDir);
public jsonLDataProcessor: JsonlDataProcessor;
public handelsregister: HandelsRegister;
public CBusinessRecord = plugins.smartdata.setDefaultManagerForDoc(this, BusinessRecord);
public async start() {
@ -18,8 +21,17 @@ export class OpenData {
mongoDbPass: await this.serviceQenv.getEnvVarOnDemand('MONGODB_PASS'),
});
await this.db.init();
this.germanBusinesses = new GermanBusinessData(this);
await this.germanBusinesses.start();
this.jsonLDataProcessor = new JsonlDataProcessor(this);
this.handelsregister = new HandelsRegister(this);
await this.handelsregister.start();
}
public async buildInitialDb() {
await this.jsonLDataProcessor.processDataFromUrl();
}
public async stop() {
await this.db.close();
await this.handelsregister.stop();
}
public async stop() {}
}

View File

@ -8,4 +8,8 @@ export const packageDir = plugins.path.join(
export const nogitDir = plugins.path.join(packageDir, './.nogit/');
plugins.smartfile.fs.ensureDirSync(nogitDir);
export const downloadDir = plugins.path.join(nogitDir, 'downloads');
plugins.smartfile.fs.ensureDirSync(downloadDir);
export const germanBusinessDataDir = plugins.path.join(nogitDir, 'germanbusinessdata');

View File

@ -6,8 +6,10 @@ export {
}
// @push.rocks scope
import * as lik from '@push.rocks/lik';
import * as qenv from '@push.rocks/qenv';
import * as smartarchive from '@push.rocks/smartarchive';
import * as smartbrowser from '@push.rocks/smartbrowser';
import * as smartdata from '@push.rocks/smartdata';
import * as smartdelay from '@push.rocks/smartdelay';
import * as smartfile from '@push.rocks/smartfile';
@ -15,10 +17,13 @@ import * as smartpath from '@push.rocks/smartpath';
import * as smartpromise from '@push.rocks/smartpromise';
import * as smartrequest from '@push.rocks/smartrequest';
import * as smartstream from '@push.rocks/smartstream';
import * as smartunique from '@push.rocks/smartunique';
export {
lik,
qenv,
smartarchive,
smartbrowser,
smartdata,
smartdelay,
smartfile,
@ -26,4 +31,12 @@ export {
smartpromise,
smartrequest,
smartstream,
}
smartunique,
}
// @tsclass scope
import * as tsclass from '@tsclass/tsclass';
export {
tsclass,
}