Compare commits
28 Commits
Author | SHA1 | Date | |
---|---|---|---|
25147deb7f | |||
4030bef7a8 | |||
c6964f0310 | |||
9a9f203af2 | |||
174086defc | |||
43c9d3b3b6 | |||
39724b61d6 | |||
d9588f8f65 | |||
6ce6153ccf | |||
ec2d4f9fbc | |||
a19be31381 | |||
9c3f012da7 | |||
8ebbc16bcd | |||
c177193438 | |||
7c07bc59e4 | |||
e4a8d371f7 | |||
1c0e04cb0d | |||
c3f6ef531b | |||
a67a0993d6 | |||
bc43e4c44a | |||
9b2dcd7377 | |||
1eda50ad13 | |||
506a644c6b | |||
555e156b5e | |||
b67e18f2fe | |||
09c9712191 | |||
6258dcdff1 | |||
605b050177 |
93
changelog.md
93
changelog.md
@ -1,5 +1,98 @@
|
||||
# Changelog
|
||||
|
||||
## 2025-04-08 - 1.4.6 - fix(tests & jsonl)
|
||||
Improve test structure and refine JSONL parsing for incomplete data
|
||||
|
||||
- Refactored test files to remove redundant get-specific-company tests in test.ts and added missing tests in test.handelsregister.ts
|
||||
- Updated JSONL data processor to conditionally parse remaining data when available
|
||||
|
||||
## 2025-04-05 - 1.4.5 - fix(metadata)
|
||||
Update repository, bugs, and homepage URLs to code.foss.global
|
||||
|
||||
- Repository URL updated from gitlab.com to code.foss.global
|
||||
- Bugs URL updated from gitlab.com to code.foss.global
|
||||
- Homepage URL updated to code.foss.global
|
||||
|
||||
## 2025-04-05 - 1.4.4 - fix(dependencies & tests)
|
||||
Update dependency versions and refine test search query
|
||||
|
||||
- Bumped versions for several dependencies in package.json, including @git.zone/tsbuild, @git.zone/tsbundle, @git.zone/tstest, @push.rocks/tapbundle, @push.rocks/smartdata, @push.rocks/smartfile, @push.rocks/smartpromise, @push.rocks/smartrequest, and @tsclass/tsclass
|
||||
- Updated test file to replace the search query 'Volkswagen' with 'LADR'
|
||||
- Re-enabled the build initial data test by removing tap.skip
|
||||
|
||||
## 2025-01-07 - 1.4.3 - fix(test)
|
||||
Corrected index value in test for fetching specific company data
|
||||
|
||||
- Updated the index from 8 to 7 for the germanParsedRegistration fetch in test
|
||||
|
||||
## 2025-01-07 - 1.4.2 - fix(core)
|
||||
Fix concurrency and download handling in HandelsRegister class and adjust test cases
|
||||
|
||||
- Improved the clickFindButton function to include an argument for results limit.
|
||||
- Enhanced the downloadFile function to rename and ensure files are correctly handled.
|
||||
- Updated searchCompany method to allow specifying a limit on the number of search results.
|
||||
- Adjusted test cases to select specific test data indices and output test files to a dedicated directory.
|
||||
|
||||
## 2025-01-04 - 1.4.1 - fix(core)
|
||||
Fix issues with JSONL data processing and improve error handling in business record validation
|
||||
|
||||
- Fixed JSONL data processing by adding concurrent processing for each JSON line to enhance performance.
|
||||
- Added validation logic in BusinessRecord class to ensure that the mandatory fields are checked.
|
||||
- Adjusted environment variable loading in OpenData class to ensure correct database initialization.
|
||||
- Included missing dependencies and exports in the project files to ensure proper functionality.
|
||||
|
||||
## 2025-01-04 - 1.4.0 - feat(HandelsRegister)
|
||||
Add file download functionality to HandelsRegister
|
||||
|
||||
- Implemented file download feature in the HandelsRegister class.
|
||||
- Configured pages in Puppeteer to allow downloads and set download paths.
|
||||
- Parsed German registration information with more robust error handling.
|
||||
- Added specific methods for downloading and handling 'SI' and 'AD' files.
|
||||
|
||||
## 2025-01-03 - 1.3.1 - fix(HandelsRegister)
|
||||
Refined HandelsRegister functionality for better error handling and response capture.
|
||||
|
||||
- Improved parsing logic in parseGermanRegistration function.
|
||||
- Enhanced navigateToPage and clickFindButton methods with error messages for clarity.
|
||||
- Implemented a new responseListener to handle and log HTTP responses correctly.
|
||||
|
||||
## 2025-01-03 - 1.3.0 - feat(core)
|
||||
Enhanced data handling capabilities and improved company search functionalities.
|
||||
|
||||
- Updated business record handling to support more registration types.
|
||||
- Improved search capabilities for fetching company data with refined registration type matching.
|
||||
- Added robust logging for JSONL data processing with early exit on successful parse.
|
||||
- Reorganized test cases to include specific company data retrieval.
|
||||
|
||||
## 2025-01-02 - 1.2.1 - fix(BusinessRecord)
|
||||
Add missing field registrationType to BusinessRecord data
|
||||
|
||||
- Introduced the 'registrationType' field to the BusinessRecord data schema with possible values 'HRA' or 'HRB'.
|
||||
|
||||
## 2025-01-02 - 1.2.0 - feat(core)
|
||||
Integrate Handelsregister search for company data retrieval
|
||||
|
||||
- Added support for searching company data via Handelsregister.
|
||||
- Replaced GermanBusinessData functionality with JsonlDataProcessor.
|
||||
- Included smartbrowser dependency for handling web requests to Handelsregister.
|
||||
|
||||
## 2025-01-01 - 1.1.5 - fix(GermanBusinessData)
|
||||
Add console log for total records processed at the end of the stream.
|
||||
|
||||
- Ensure that the number of records processed is logged at the end of data stream processing.
|
||||
|
||||
## 2024-12-31 - 1.1.4 - fix(documentation)
|
||||
Update description and keywords in package.json
|
||||
|
||||
- Corrected the package description to reflect the focus on managing, accessing, and updating open data with MongoDB integration.
|
||||
- Expanded the keywords in the package metadata to include data integration and processing terms.
|
||||
- Improved README.md with more extensive setup, usage, and introduction of the library's functionalities.
|
||||
|
||||
## 2024-12-31 - 1.1.3 - fix(core)
|
||||
Added missing license file for project completeness.
|
||||
|
||||
- Introduced a LICENSE file to the project, ensuring clarity on software usage permissions.
|
||||
|
||||
## 2024-12-31 - 1.1.2 - fix(GermanBusinessData)
|
||||
Ensure unique ID generation for BusinessRecord
|
||||
|
||||
|
19
license
Normal file
19
license
Normal file
@ -0,0 +1,19 @@
|
||||
Copyright (c) 2022 Task Venture Capital GmbH (hello@task.vc)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -5,7 +5,7 @@
|
||||
"githost": "gitlab.com",
|
||||
"gitscope": "fin.cx",
|
||||
"gitrepo": "opendata",
|
||||
"description": "A TypeScript-based library for accessing and managing open business data, specifically for German companies.",
|
||||
"description": "A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.",
|
||||
"npmPackagename": "@fin.cx/opendata",
|
||||
"license": "MIT",
|
||||
"projectDomain": "fin.cx",
|
||||
@ -17,14 +17,21 @@
|
||||
"data management",
|
||||
"business registry",
|
||||
"npm package",
|
||||
"database",
|
||||
"MongoDB",
|
||||
"automation"
|
||||
"automation",
|
||||
"data integration",
|
||||
"database",
|
||||
"data processing",
|
||||
"data retrieval",
|
||||
"data update"
|
||||
]
|
||||
}
|
||||
},
|
||||
"npmci": {
|
||||
"npmGlobalTools": [],
|
||||
"npmAccessLevel": "public"
|
||||
},
|
||||
"tsdoc": {
|
||||
"legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n"
|
||||
}
|
||||
}
|
47
package.json
47
package.json
@ -1,8 +1,8 @@
|
||||
{
|
||||
"name": "@fin.cx/opendata",
|
||||
"version": "1.1.2",
|
||||
"version": "1.4.6",
|
||||
"private": false,
|
||||
"description": "A TypeScript-based library for accessing and managing open business data, specifically for German companies.",
|
||||
"description": "A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.",
|
||||
"main": "dist_ts/index.js",
|
||||
"typings": "dist_ts/index.d.ts",
|
||||
"type": "module",
|
||||
@ -14,32 +14,38 @@
|
||||
"buildDocs": "(tsdoc)"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@git.zone/tsbuild": "^2.2.0",
|
||||
"@git.zone/tsbundle": "^2.1.0",
|
||||
"@git.zone/tsbuild": "^2.3.2",
|
||||
"@git.zone/tsbundle": "^2.2.5",
|
||||
"@git.zone/tsrun": "^1.3.3",
|
||||
"@git.zone/tstest": "^1.0.90",
|
||||
"@push.rocks/tapbundle": "^5.5.4",
|
||||
"@types/node": "^22.10.2"
|
||||
"@git.zone/tstest": "^1.0.96",
|
||||
"@push.rocks/tapbundle": "^5.6.2",
|
||||
"@types/node": "^22.14.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@push.rocks/lik": "^6.1.0",
|
||||
"@push.rocks/qenv": "^6.1.0",
|
||||
"@push.rocks/smartarchive": "^4.0.39",
|
||||
"@push.rocks/smartdata": "^5.2.10",
|
||||
"@push.rocks/smartarray": "^1.1.0",
|
||||
"@push.rocks/smartbrowser": "^2.0.8",
|
||||
"@push.rocks/smartdata": "^5.2.12",
|
||||
"@push.rocks/smartdelay": "^3.0.5",
|
||||
"@push.rocks/smartfile": "^11.0.23",
|
||||
"@push.rocks/smartfile": "^11.2.0",
|
||||
"@push.rocks/smartpath": "^5.0.18",
|
||||
"@push.rocks/smartpromise": "^4.0.4",
|
||||
"@push.rocks/smartrequest": "^2.0.23",
|
||||
"@push.rocks/smartstream": "^3.2.5"
|
||||
"@push.rocks/smartpromise": "^4.2.3",
|
||||
"@push.rocks/smartrequest": "^2.1.0",
|
||||
"@push.rocks/smartstream": "^3.2.5",
|
||||
"@push.rocks/smartunique": "^3.0.9",
|
||||
"@push.rocks/smartxml": "^1.1.1",
|
||||
"@tsclass/tsclass": "^8.2.0"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://gitlab.com/fin.cx/opendata.git"
|
||||
"url": "https://code.foss.global/fin.cx/opendata.git"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://gitlab.com/fin.cx/opendata/issues"
|
||||
"url": "https://code.foss.global/fin.cx/opendata/issues"
|
||||
},
|
||||
"homepage": "https://gitlab.com/fin.cx/opendata#readme",
|
||||
"homepage": "https://code.foss.global/fin.cx/opendata#readme",
|
||||
"browserslist": [
|
||||
"last 1 chrome versions"
|
||||
],
|
||||
@ -63,8 +69,13 @@
|
||||
"data management",
|
||||
"business registry",
|
||||
"npm package",
|
||||
"database",
|
||||
"MongoDB",
|
||||
"automation"
|
||||
]
|
||||
"automation",
|
||||
"data integration",
|
||||
"database",
|
||||
"data processing",
|
||||
"data retrieval",
|
||||
"data update"
|
||||
],
|
||||
"packageManager": "pnpm@10.7.0+sha512.6b865ad4b62a1d9842b61d674a393903b871d9244954f652b8842c2b553c72176b278f64c463e52d40fff8aba385c235c8c9ecf5cc7de4fd78b8bb6d49633ab6"
|
||||
}
|
||||
|
2840
pnpm-lock.yaml
generated
2840
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
291
readme.md
291
readme.md
@ -1,6 +1,6 @@
|
||||
```markdown
|
||||
# @fin.cx/opendata
|
||||
open business data
|
||||
|
||||
A TypeScript-based library for accessing and managing open business data, specifically for German companies.
|
||||
|
||||
## Install
|
||||
|
||||
@ -20,234 +20,217 @@ yarn add @fin.cx/opendata
|
||||
|
||||
## Usage
|
||||
|
||||
The `@fin.cx/opendata` package offers functionalities for handling open business data, with a primary focus on German business data. Let's explore its capabilities through detailed examples.
|
||||
The `@fin.cx/opendata` package provides a comprehensive set of functionalities for handling open business data, focusing on German business data. Let's explore the detailed capabilities of this library through extensive examples and instructions.
|
||||
|
||||
### Setting Up
|
||||
### Setting Up the Environment
|
||||
|
||||
#### Importing the Module
|
||||
First, make sure you've set up the necessary environment variables for MongoDB. You will need the following environment variables:
|
||||
- `MONGODB_URL`: The URL for your MongoDB instance.
|
||||
- `MONGODB_NAME`: The name of the database to use.
|
||||
- `MONGODB_USER`: A valid username for accessing the database.
|
||||
- `MONGODB_PASS`: The password associated with the MongoDB user.
|
||||
|
||||
Begin by importing necessary components from the `@fin.cx/opendata` package. You'll also need to set up some environment variables for the MongoDB instance.
|
||||
These variables can be configured in a `.env` file or managed through a specific service used for secure environment variables handling.
|
||||
|
||||
### Importing and Initializing the Library
|
||||
|
||||
To start working with the library, import the necessary classes and initialize the `OpenData` class.
|
||||
|
||||
```typescript
|
||||
import { OpenData } from '@fin.cx/opendata';
|
||||
|
||||
const startOpenDataInstance = async () => {
|
||||
const initializeOpenData = async () => {
|
||||
const openData = new OpenData();
|
||||
|
||||
try {
|
||||
await openData.start();
|
||||
console.log('OpenData instance has started successfully.');
|
||||
|
||||
// Example usage:
|
||||
await createAndManageBusinessRecords(openData);
|
||||
|
||||
await openData.start(); // Start the open data instance
|
||||
console.log('OpenData instance started.');
|
||||
|
||||
// your code here
|
||||
|
||||
await openData.stop();
|
||||
console.log('OpenData instance stopped.');
|
||||
} catch (error) {
|
||||
console.error('Error starting OpenData:', error);
|
||||
} finally {
|
||||
await openData.stop();
|
||||
console.log('OpenData instance has stopped.');
|
||||
}
|
||||
};
|
||||
|
||||
startOpenDataInstance().catch(console.error);
|
||||
initializeOpenData();
|
||||
```
|
||||
|
||||
### BusinessRecord Usage
|
||||
### Managing Business Records
|
||||
|
||||
A `BusinessRecord` is the main entity you'll be working with. Here's how you manage business records.
|
||||
The `BusinessRecord` class represents a company's data. Let's explore how you can create, retrieve, update, and manage these records.
|
||||
|
||||
#### Creating a New BusinessRecord
|
||||
|
||||
Creating a new business record involves instantiating the `BusinessRecord` class and setting the relevant properties.
|
||||
|
||||
```typescript
|
||||
import { BusinessRecord } from '@fin.cx/opendata';
|
||||
|
||||
const createBusinessRecord = async (openData: OpenData) => {
|
||||
const businessRecord = new openData.CBusinessRecord();
|
||||
businessRecord.data.name = "Example Company";
|
||||
businessRecord.data.address = "Example Street 1";
|
||||
businessRecord.data.postalCode = "12345";
|
||||
businessRecord.data.city = "Example City";
|
||||
businessRecord.data.country = "Germany";
|
||||
businessRecord.data.phone = "+49 123 456789";
|
||||
businessRecord.data.email = "contact@example.com";
|
||||
businessRecord.data.website = "https://example.com";
|
||||
businessRecord.data.businessType = "GmbH";
|
||||
businessRecord.data.registrationNumber = "HRB 123456";
|
||||
businessRecord.data.registrationCourt = "Munich";
|
||||
businessRecord.data.legalForm = "GmbH";
|
||||
businessRecord.data.managingDirectors = ["John Doe", "Jane Smith"];
|
||||
businessRecord.data.foundingDate = new Date().toISOString();
|
||||
businessRecord.data.capital = "50,000 EUR";
|
||||
businessRecord.data.purpose = "Tech Solutions";
|
||||
businessRecord.data.lastUpdate = new Date().toISOString();
|
||||
|
||||
businessRecord.data = {
|
||||
name: "Tech Innovations GmbH",
|
||||
address: "Tech Park 42",
|
||||
postalCode: "80333",
|
||||
city: "Munich",
|
||||
country: "Germany",
|
||||
phone: "+49 89 123456",
|
||||
email: "info@techinnovations.de",
|
||||
website: "https://techinnovations.de",
|
||||
businessType: "GmbH",
|
||||
registrationNumber: "HRB 654321",
|
||||
registrationCourt: "Munich",
|
||||
legalForm: "GmbH",
|
||||
managingDirectors: ["Alice Müller", "Bob Schmidt"],
|
||||
foundingDate: new Date("2020-01-01").toISOString(),
|
||||
capital: "100,000 EUR",
|
||||
purpose: "Developing innovative tech solutions",
|
||||
lastUpdate: new Date().toISOString()
|
||||
};
|
||||
|
||||
await businessRecord.save();
|
||||
console.log('BusinessRecord saved:', businessRecord);
|
||||
};
|
||||
```
|
||||
|
||||
#### Retrieving BusinessRecord
|
||||
#### Retrieving Business Records
|
||||
|
||||
Retrieve a business record by querying the database.
|
||||
You can retrieve existing business records by querying the database using various data fields.
|
||||
|
||||
```typescript
|
||||
import { BusinessRecord } from '@fin.cx/opendata';
|
||||
const retrieveBusinessRecords = async (openData: OpenData) => {
|
||||
const records = await openData.db
|
||||
.collection<BusinessRecord>('businessrecords')
|
||||
.find({ city: "Munich" })
|
||||
.toArray();
|
||||
|
||||
const findBusinessRecord = async (openData: OpenData) => {
|
||||
const businessRecords = await openData.db.collection<BusinessRecord>('businessrecords').find().toArray();
|
||||
console.log('Retrieved Business Records:', businessRecords);
|
||||
console.log('Found Business Records:', records);
|
||||
};
|
||||
```
|
||||
|
||||
### Updating Business Data
|
||||
#### Updating Existing Records
|
||||
|
||||
The `GermanBusinessData` class handles the specifics of updating and maintaining the data.
|
||||
|
||||
#### Updating German Business Data
|
||||
To update an existing `BusinessRecord`, you retrieve the record, modify its data, and save it again.
|
||||
|
||||
```typescript
|
||||
import { OpenData } from '@fin.cx/opendata';
|
||||
const updateBusinessRecord = async (openData: OpenData, recordId: string) => {
|
||||
const businessRecord = await openData.CBusinessRecord.getInstance(recordId);
|
||||
if (businessRecord) {
|
||||
businessRecord.data.phone = "+49 89 987654";
|
||||
businessRecord.data.lastUpdate = new Date().toISOString();
|
||||
|
||||
await businessRecord.save();
|
||||
console.log('BusinessRecord updated:', businessRecord);
|
||||
} else {
|
||||
console.log('BusinessRecord not found for id:', recordId);
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
#### Deleting a Business Record
|
||||
|
||||
You can delete a business record using its unique identifier.
|
||||
|
||||
```typescript
|
||||
const deleteBusinessRecord = async (openData: OpenData, recordId: string) => {
|
||||
const businessRecord = await openData.CBusinessRecord.getInstance(recordId);
|
||||
if (businessRecord) {
|
||||
await businessRecord.delete();
|
||||
console.log(`BusinessRecord with id ${recordId} deleted successfully.`);
|
||||
} else {
|
||||
console.log('No record found for the provided id:', recordId);
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### Updating German Business Data
|
||||
|
||||
The package includes functionalities to keep your business data up-to-date by downloading from official German open data repositories.
|
||||
|
||||
```typescript
|
||||
const updateGermanBusinessData = async (openData: OpenData) => {
|
||||
await openData.germanBusinesses.update();
|
||||
console.log('German business data updated.');
|
||||
try {
|
||||
await openData.germanBusinesses.update();
|
||||
console.log('German business data has been updated successfully.');
|
||||
} catch (error) {
|
||||
console.error('Error updating German business data:', error);
|
||||
}
|
||||
};
|
||||
|
||||
startOpenDataInstance()
|
||||
.then((openData) => {
|
||||
// Use the instance
|
||||
return updateGermanBusinessData(openData);
|
||||
})
|
||||
.catch(console.error);
|
||||
```
|
||||
|
||||
This function downloads the latest data from the German business data source, processes it, and updates the local database.
|
||||
This function fetches the latest open data regarding German companies, processes it, and updates your local database.
|
||||
|
||||
### Detailed Class Structures and Methods
|
||||
### Detailed Class Insights
|
||||
|
||||
#### OpenData Class
|
||||
|
||||
The `OpenData` class is the main entry point.
|
||||
The `OpenData` class serves as the core of the library, initializing necessary components and controlling data flows:
|
||||
|
||||
- **db**: Represents the connection to your MongoDB database.
|
||||
- **germanBusinesses**: An instance handling specific operations related to German business data updates.
|
||||
|
||||
```typescript
|
||||
import { OpenData } from '@fin.cx/opendata';
|
||||
|
||||
class OpenData {
|
||||
db: plugins.smartdata.SmartdataDb;
|
||||
germanBusinesses: GermanBusinessData;
|
||||
|
||||
private serviceQenv = new plugins.qenv.Qenv(paths.packageDir, paths.nogitDir);
|
||||
|
||||
public CBusinessRecord = plugins.smartdata.setDefaultManagerForDoc(this, BusinessRecord);
|
||||
|
||||
public async start() {
|
||||
// Initialize smart data DB
|
||||
this.db = new plugins.smartdata.SmartdataDb({
|
||||
mongoDbUrl: await this.serviceQenv.getEnvVarOnDemand('MONGODB_URL'),
|
||||
mongoDbName: await this.serviceQenv.getEnvVarOnDemand('MONGODB_NAME'),
|
||||
mongoDbUser: await this.serviceQenv.getEnvVarOnDemand('MONGODB_USER'),
|
||||
mongoDbPass: await this.serviceQenv.getEnvVarOnDemand('MONGODB_PASS'),
|
||||
});
|
||||
|
||||
await this.db.init();
|
||||
this.germanBusinesses = new GermanBusinessData(this);
|
||||
await this.germanBusinesses.start();
|
||||
// Database initialization logic
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
// Clean up resources if necessary
|
||||
// Cleanup logic
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### GermanBusinessData Class
|
||||
|
||||
The `GermanBusinessData` class handles the specifics of German business data.
|
||||
This class deals specifically with German company data — fetching, processing, and updating local databases consistently with official German data sources.
|
||||
|
||||
```typescript
|
||||
import { OpenData } from '@fin.cx/opendata';
|
||||
import * as plugins from './plugins';
|
||||
import * as paths from './paths';
|
||||
|
||||
class GermanBusinessData {
|
||||
public openDataRef: OpenData;
|
||||
|
||||
constructor(openDataRef: OpenData) {
|
||||
this.openDataRef = openDataRef;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
await this.update();
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
// Stop any ongoing processing
|
||||
}
|
||||
|
||||
public async update() {
|
||||
const dataUrl = 'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2';
|
||||
const dataExists = await plugins.smartfile.fs.isDirectory(paths.germanBusinessDataDir);
|
||||
|
||||
if (!dataExists) {
|
||||
await plugins.smartfile.fs.ensureDir(paths.germanBusinessDataDir);
|
||||
}
|
||||
|
||||
const smartarchive = await plugins.smartarchive.SmartArchive.fromArchiveUrl(dataUrl);
|
||||
const jsonlDataStream = await smartarchive.exportToStreamOfStreamFiles();
|
||||
|
||||
let totalRecordsCounter = 0;
|
||||
let nextRest: string = '';
|
||||
|
||||
jsonlDataStream.pipe(
|
||||
new plugins.smartstream.SmartDuplex({
|
||||
objectMode: true,
|
||||
writeFunction: async (chunkArg: plugins.smartfile.StreamFile, streamToolsArg) => {
|
||||
const readStream = await chunkArg.createReadStream();
|
||||
readStream.pipe(
|
||||
new plugins.smartstream.SmartDuplex({
|
||||
objectMode: true,
|
||||
writeFunction: async (chunkArg: Buffer, streamToolsArg) => {
|
||||
const currentString = nextRest + chunkArg.toString();
|
||||
|
||||
const lines = currentString.split('\n');
|
||||
nextRest = lines.pop();
|
||||
|
||||
for (const line of lines) {
|
||||
let entry: any;
|
||||
try {
|
||||
entry = JSON.parse(line);
|
||||
} catch (err) {
|
||||
console.error('Error parsing line:', err);
|
||||
continue;
|
||||
}
|
||||
|
||||
totalRecordsCounter++;
|
||||
if (totalRecordsCounter % 10000 === 0) {
|
||||
console.log(`${totalRecordsCounter} total records.`);
|
||||
}
|
||||
|
||||
const businessRecord = new this.openDataRef.CBusinessRecord();
|
||||
businessRecord.data.name = entry?.name;
|
||||
|
||||
await businessRecord.save();
|
||||
}
|
||||
},
|
||||
finalFunction: async (streamToolsArg) => {
|
||||
if (nextRest) {
|
||||
try {
|
||||
JSON.parse(nextRest);
|
||||
} catch (err) {
|
||||
console.error('Error parsing final chunk:', err);
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
);
|
||||
},
|
||||
})
|
||||
);
|
||||
// Logic for updating business data using import streams and parsing JSON lines.
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Conclusion
|
||||
### Ensuring Data Accuracy and Integrity
|
||||
|
||||
This module is designed to make it easier to manage open business data, especially focusing on German business data. The examples above demonstrate the core functionalities, including starting and stopping the service, managing business records, and updating data.
|
||||
When working with business data, ensuring integrity and accuracy is crucial. Each record should be checked for validity before being saved or updated, minimizing inconsistencies. Moreover, robust error handling is essential in every step, from data retrieval to database operations, particularly when dealing with external data sources.
|
||||
|
||||
As you work with `@fin.cx/opendata`, you’ll discover it offers a robust and flexible approach for working with open business data seamlessly. Happy coding!
|
||||
```
|
||||
undefined
|
||||
The `@fin.cx/opendata` module provides an extensive toolset for accessing and managing business data, particularly for companies based in Germany. Its functionalities include creating, updating, retrieving, and deleting business records, as well as keeping them current with the latest open data releases. This makes it an invaluable asset for developers aiming to integrate open data seamlessly into their systems, ensuring robust data management capabilities within their applications.
|
||||
|
||||
Happy exploring and integrating open data into your projects!
|
||||
|
||||
## License and Legal Information
|
||||
|
||||
This repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository.
|
||||
|
||||
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
|
||||
|
||||
### Trademarks
|
||||
|
||||
This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.
|
||||
|
||||
### Company Information
|
||||
|
||||
Task Venture Capital GmbH
|
||||
Registered at District court Bremen HRB 35230 HB, Germany
|
||||
|
||||
For any legal inquiries or if you require further information, please contact us via email at hello@task.vc.
|
||||
|
||||
By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.
|
42
test/test.handelsregister.ts
Normal file
42
test/test.handelsregister.ts
Normal file
@ -0,0 +1,42 @@
|
||||
import { expect, expectAsync, tap } from '@push.rocks/tapbundle';
|
||||
import * as opendata from '../ts/index.js'
|
||||
|
||||
import { BusinessRecord } from '../ts/classes.businessrecord.js';
|
||||
|
||||
let testOpenDataInstance: opendata.OpenData;
|
||||
|
||||
tap.test('first test', async () => {
|
||||
testOpenDataInstance = new opendata.OpenData();
|
||||
expect(testOpenDataInstance).toBeInstanceOf(opendata.OpenData);
|
||||
});
|
||||
|
||||
tap.test('should start the instance', async () => {
|
||||
await testOpenDataInstance.start();
|
||||
});
|
||||
|
||||
const resultsSearch = tap.test('should get the data for a company', async () => {
|
||||
const result = await testOpenDataInstance.handelsregister.searchCompany('LADR', 20);
|
||||
console.log(result);
|
||||
return result;
|
||||
});
|
||||
|
||||
tap.test('should get the data for a specific company', async () => {
|
||||
let testCompany: BusinessRecord['data']['germanParsedRegistration'] = (await resultsSearch.testResultPromise)[0]['germanParsedRegistration'];
|
||||
console.log(`trying to find specific company with:`);
|
||||
console.log(testCompany);
|
||||
const result = await testOpenDataInstance.handelsregister.getSpecificCompany(testCompany);
|
||||
console.log(result);
|
||||
|
||||
result.files.map(async (file) => {
|
||||
await file.writeToDir('./.nogit/testoutput');
|
||||
});
|
||||
|
||||
|
||||
});
|
||||
|
||||
tap.test('should stop the instance', async () => {
|
||||
await testOpenDataInstance.stop();
|
||||
});
|
||||
|
||||
|
||||
tap.start()
|
11
test/test.ts
11
test/test.ts
@ -1,6 +1,8 @@
|
||||
import { expect, expectAsync, tap } from '@push.rocks/tapbundle';
|
||||
import * as opendata from '../ts/index.js'
|
||||
|
||||
import { BusinessRecord } from '../ts/classes.businessrecord.js';
|
||||
|
||||
let testOpenDataInstance: opendata.OpenData;
|
||||
|
||||
tap.test('first test', async () => {
|
||||
@ -12,4 +14,13 @@ tap.test('should start the instance', async () => {
|
||||
await testOpenDataInstance.start();
|
||||
})
|
||||
|
||||
tap.test('should build initial data', async () => {
|
||||
await testOpenDataInstance.buildInitialDb();
|
||||
});
|
||||
|
||||
tap.test('should stop the instance', async () => {
|
||||
await testOpenDataInstance.stop();
|
||||
});
|
||||
|
||||
|
||||
tap.start()
|
||||
|
@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@fin.cx/opendata',
|
||||
version: '1.1.2',
|
||||
description: 'A TypeScript-based library for accessing and managing open business data, specifically for German companies.'
|
||||
version: '1.4.6',
|
||||
description: 'A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.'
|
||||
}
|
||||
|
@ -1,32 +1,67 @@
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
@plugins.smartdata.Manager()
|
||||
export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc<BusinessRecord, BusinessRecord> {
|
||||
|
||||
export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc<
|
||||
BusinessRecord,
|
||||
BusinessRecord
|
||||
> {
|
||||
// STATIC
|
||||
public static getByGermanParsedRegistration = async (parsedGermanRegistrationArg: BusinessRecord['data']['germanParsedRegistration']) => {
|
||||
const businessRecords = await BusinessRecord.getInstance({
|
||||
data: {
|
||||
germanParsedRegistration: parsedGermanRegistrationArg,
|
||||
}
|
||||
});
|
||||
return businessRecords;
|
||||
};
|
||||
|
||||
|
||||
// INSTANCE
|
||||
@plugins.smartdata.unI()
|
||||
id: string;
|
||||
|
||||
|
||||
@plugins.smartdata.svDb()
|
||||
data: {
|
||||
name?: string,
|
||||
address?: string,
|
||||
postalCode?: string,
|
||||
city?: string,
|
||||
country?: string,
|
||||
phone?: string,
|
||||
fax?: string,
|
||||
email?: string,
|
||||
website?: string,
|
||||
businessType?: string,
|
||||
registrationNumber?: string,
|
||||
registrationCourt?: string,
|
||||
legalForm?: string,
|
||||
managingDirectors?: string[],
|
||||
boardOfDirectors?: string[],
|
||||
supervisoryBoard?: string[],
|
||||
foundingDate?: string,
|
||||
capital?: string,
|
||||
purpose?: string,
|
||||
lastUpdate?: string
|
||||
name?: string;
|
||||
startDate?: string;
|
||||
endDate?: string;
|
||||
status?: 'active' | 'liquidating' | 'closed';
|
||||
address?: string;
|
||||
postalCode?: string;
|
||||
city?: string;
|
||||
country?: string;
|
||||
phone?: string;
|
||||
fax?: string;
|
||||
email?: string;
|
||||
website?: string;
|
||||
businessType?: string;
|
||||
registrationId?: string;
|
||||
germanParsedRegistration?: {
|
||||
court?: string;
|
||||
type?: 'HRA' | 'HRB' | 'GnR' | 'PR' | 'VR' | 'GsR';
|
||||
number?: string;
|
||||
};
|
||||
legalForm?:
|
||||
| 'GmbH'
|
||||
| 'GmbH & Co. KG'
|
||||
| 'AG'
|
||||
| 'LLC'
|
||||
| 'LLP'
|
||||
| 'GmbH & Co. KGaA'
|
||||
| 'GmbH & Co. KGaA, LLC';
|
||||
managingDirectors?: string[];
|
||||
boardOfDirectors?: string[];
|
||||
supervisoryBoard?: string[];
|
||||
foundingDate?: string;
|
||||
capital?: string;
|
||||
purpose?: string;
|
||||
lastUpdate?: string;
|
||||
} = {};
|
||||
}
|
||||
|
||||
/**
|
||||
* validates the record against the Handelregister.
|
||||
*/
|
||||
public async validate() {
|
||||
if (!this.data.name) throw new Error('Name is required.');
|
||||
}
|
||||
}
|
||||
|
@ -1,85 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import type { OpenData } from './classes.main.opendata.js';
|
||||
|
||||
export class GermanBusinessData {
|
||||
public openDataRef: OpenData;
|
||||
constructor(openDataRefArg: OpenData) {
|
||||
this.openDataRef = openDataRefArg;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
await this.update();
|
||||
}
|
||||
public async stop() {}
|
||||
|
||||
public async update() {
|
||||
const done = plugins.smartpromise.defer();
|
||||
const promiseArray: Promise<any>[] = [];
|
||||
const dataUrl = 'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2';
|
||||
const dataExists = await plugins.smartfile.fs.isDirectory(paths.germanBusinessDataDir);
|
||||
if (!dataExists) {
|
||||
await plugins.smartfile.fs.ensureDir(paths.germanBusinessDataDir);
|
||||
} else {
|
||||
}
|
||||
|
||||
const smartarchive = await plugins.smartarchive.SmartArchive.fromArchiveUrl(dataUrl);
|
||||
promiseArray
|
||||
.push
|
||||
// smartarchive.exportToFs(paths.germanBusinessDataDir, 'de_companies_ocdata.jsonl')
|
||||
();
|
||||
const jsonlDataStream = await smartarchive.exportToStreamOfStreamFiles();
|
||||
let totalRecordsCounter = 0;
|
||||
let nextRest: string = '';
|
||||
jsonlDataStream.pipe(
|
||||
new plugins.smartstream.SmartDuplex({
|
||||
objectMode: true,
|
||||
writeFunction: async (chunkArg: plugins.smartfile.StreamFile, streamToolsArg) => {
|
||||
const readStream = await chunkArg.createReadStream();
|
||||
readStream.pipe(
|
||||
new plugins.smartstream.SmartDuplex({
|
||||
objectMode: true,
|
||||
writeFunction: async (chunkArg: Buffer, streamToolsArg) => {
|
||||
const currentString = nextRest + chunkArg.toString();
|
||||
const lines = currentString.split('\n');
|
||||
nextRest = lines.pop();
|
||||
console.log(`Got another ${lines.length} records.`);
|
||||
for (const line of lines) {
|
||||
let entry: any;
|
||||
if (!line) continue;
|
||||
try {
|
||||
entry = JSON.parse(line);
|
||||
} catch (err) {
|
||||
console.log(line);
|
||||
await plugins.smartdelay.delayFor(10000);
|
||||
}
|
||||
if (!entry) continue;
|
||||
totalRecordsCounter++;
|
||||
if (totalRecordsCounter % 10000 === 0) console.log(`${totalRecordsCounter} total records.`);
|
||||
const businessRecord = new this.openDataRef.CBusinessRecord();
|
||||
businessRecord.id = await this.openDataRef.CBusinessRecord.getNewId();
|
||||
businessRecord.data.name = entry.name;
|
||||
await businessRecord.save();
|
||||
// console.log(`stored ${businessRecord.data.name}`);
|
||||
}
|
||||
},
|
||||
finalFunction: async (streamToolsArg) => {
|
||||
if (!nextRest) return;
|
||||
JSON.parse(nextRest);
|
||||
}
|
||||
})
|
||||
);
|
||||
},
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
public async getBusinessRecordByName(nameArg: string) {
|
||||
const businessRecord = await this.openDataRef.CBusinessRecord.getInstance({
|
||||
data: {
|
||||
name: { $regex: `${nameArg}`, $options: "i" } as any,
|
||||
}
|
||||
});
|
||||
return businessRecord;
|
||||
}
|
||||
}
|
358
ts/classes.handelsregister.ts
Normal file
358
ts/classes.handelsregister.ts
Normal file
@ -0,0 +1,358 @@
|
||||
import type { BusinessRecord } from './classes.businessrecord.js';
|
||||
import type { OpenData } from './classes.main.opendata.js';
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
|
||||
/**
|
||||
* the HandlesRegister exposed as a class
|
||||
*/
|
||||
export class HandelsRegister {
|
||||
private openDataRef: OpenData;
|
||||
private asyncExecutionStack = new plugins.lik.AsyncExecutionStack();
|
||||
private uniqueDowloadFolder = plugins.path.join(paths.downloadDir, plugins.smartunique.uniSimple());
|
||||
|
||||
// Puppeteer wrapper instance
|
||||
public smartbrowserInstance = new plugins.smartbrowser.SmartBrowser();
|
||||
|
||||
constructor(openDataRef: OpenData) {
|
||||
this.openDataRef = openDataRef;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
// Start the browser
|
||||
await plugins.smartfile.fs.ensureDir(this.uniqueDowloadFolder);
|
||||
await this.smartbrowserInstance.start();
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
// Stop the browser
|
||||
await plugins.smartfile.fs.remove(this.uniqueDowloadFolder);
|
||||
await this.smartbrowserInstance.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new page and configures it to allow file downloads
|
||||
* to a predefined path.
|
||||
*/
|
||||
public getNewPage = async () => {
|
||||
const page = await this.smartbrowserInstance.headlessBrowser.newPage();
|
||||
|
||||
// 1) Create a DevTools session for this page
|
||||
const cdpSession = await page.target().createCDPSession();
|
||||
|
||||
// 2) Allow file downloads and set the download path
|
||||
await cdpSession.send('Page.setDownloadBehavior', {
|
||||
behavior: 'allow',
|
||||
downloadPath: this.uniqueDowloadFolder, // <-- Change this to your desired absolute path
|
||||
});
|
||||
|
||||
// Optionally set viewport and go to page
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
await page.goto('https://www.handelsregister.de/');
|
||||
return page;
|
||||
};
|
||||
|
||||
private navigateToPage = async (
|
||||
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
|
||||
pageNameArg: string
|
||||
) => {
|
||||
try {
|
||||
await pageArg.evaluate((pageNameArg2) => {
|
||||
const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span'));
|
||||
const targetElement = elements.find((el) => el.textContent?.trim() === pageNameArg2);
|
||||
if (targetElement) {
|
||||
(targetElement as HTMLElement).click();
|
||||
}
|
||||
}, pageNameArg);
|
||||
console.log(`Navigated to the ${pageNameArg} page successfully.`);
|
||||
} catch (error) {
|
||||
console.error(`Failed to navigate to the ${pageNameArg} page:`, error);
|
||||
}
|
||||
};
|
||||
|
||||
private waitForResults = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
|
||||
await pageArg
|
||||
.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', {
|
||||
timeout: 30000,
|
||||
})
|
||||
.catch(async (err) => {
|
||||
await pageArg.screenshot({ path: paths.downloadDir + '/error.png' });
|
||||
throw err;
|
||||
});
|
||||
|
||||
const businessRecords: BusinessRecord['data'][] = await pageArg.evaluate(() => {
|
||||
const rows = document.querySelectorAll(
|
||||
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data > tr'
|
||||
);
|
||||
const records: BusinessRecord['data'][] = [];
|
||||
|
||||
rows.forEach((row) => {
|
||||
const nameElement = row.querySelector('td.ui-panelgrid-cell span.marginLeft20');
|
||||
const cityElement = row.querySelector('td.ui-panelgrid-cell.sitzSuchErgebnisse span');
|
||||
const statusElement = row.querySelector('td.ui-panelgrid-cell span.verticalText');
|
||||
const registrationCourtElement = row.querySelector(
|
||||
'td.ui-panelgrid-cell.fontTableNameSize'
|
||||
);
|
||||
|
||||
const name = nameElement?.textContent?.trim();
|
||||
const city = cityElement?.textContent?.trim();
|
||||
const status = statusElement?.textContent?.trim();
|
||||
const registrationId = registrationCourtElement?.textContent?.trim();
|
||||
|
||||
// Push parsed data into records array
|
||||
records.push({
|
||||
name,
|
||||
city,
|
||||
registrationId,
|
||||
businessType: status,
|
||||
});
|
||||
});
|
||||
|
||||
return records;
|
||||
});
|
||||
return businessRecords;
|
||||
};
|
||||
|
||||
private clickFindButton = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page, resultsLimitArg: number = 100) => {
|
||||
try {
|
||||
// Wait for the button with the text "Find" to appear
|
||||
await pageArg.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 });
|
||||
|
||||
// adjust to 100 results per page
|
||||
await pageArg.select('#form\\:ergebnisseProSeite_input', `${resultsLimitArg}`);
|
||||
|
||||
// Locate and click the button using its text
|
||||
await pageArg.evaluate(() => {
|
||||
const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c'));
|
||||
const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find');
|
||||
if (targetButton) {
|
||||
const parentButton = targetButton.closest('button') || targetButton;
|
||||
(parentButton as HTMLElement).click();
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Find button clicked successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or click the "Find" button:', error);
|
||||
}
|
||||
};
|
||||
|
||||
private async downloadFile(
|
||||
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
|
||||
typeArg: 'SI' | 'AD'
|
||||
) {
|
||||
// Trigger the file download by clicking on the relevant link
|
||||
await pageArg.evaluate((typeArg2) => {
|
||||
// Locate the table body
|
||||
const tableBody = document.querySelector(
|
||||
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data'
|
||||
);
|
||||
if (!tableBody) {
|
||||
throw new Error('Table body not found');
|
||||
}
|
||||
|
||||
// Locate the first row
|
||||
const firstRow = tableBody.querySelector('tr:nth-child(1)');
|
||||
if (!firstRow) {
|
||||
throw new Error('First row not found');
|
||||
}
|
||||
|
||||
// Locate the last cell in the first row
|
||||
const lastCell = firstRow.querySelector('td:last-child');
|
||||
if (!lastCell) {
|
||||
throw new Error('Last cell not found in the first row');
|
||||
}
|
||||
|
||||
// Locate the download links
|
||||
const adLink = lastCell.querySelector('a:first-of-type');
|
||||
const siLink = lastCell.querySelector('a:last-of-type');
|
||||
if (!siLink) {
|
||||
throw new Error('SI link not found in the last cell');
|
||||
}
|
||||
|
||||
// Simulate a click on the last <a> element
|
||||
switch (typeArg2) {
|
||||
case 'AD':
|
||||
(adLink as HTMLElement).click();
|
||||
break;
|
||||
case 'SI':
|
||||
(siLink as HTMLElement).click();
|
||||
break;
|
||||
default:
|
||||
throw new Error('Invalid file type');
|
||||
}
|
||||
}, typeArg);
|
||||
|
||||
|
||||
await plugins.smartfile.fs.waitForFileToBeReady(this.uniqueDowloadFolder);
|
||||
|
||||
const files = await plugins.smartfile.fs.fileTreeToObject(this.uniqueDowloadFolder, '**/*');
|
||||
const file = files[0];
|
||||
|
||||
// lets clear the folder for the next download
|
||||
await plugins.smartfile.fs.ensureEmptyDir(this.uniqueDowloadFolder);
|
||||
switch (typeArg) {
|
||||
case 'AD':
|
||||
await file.rename(`ad.pdf`);
|
||||
break;
|
||||
case 'SI':
|
||||
await file.rename(`si.xml`);
|
||||
break;
|
||||
break;
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to parse the German registration string
|
||||
*/
|
||||
private async parseGermanRegistration(
|
||||
input: string
|
||||
): Promise<BusinessRecord['data']['germanParsedRegistration']> {
|
||||
// e.g. District court Berlin (Charlottenburg) HRB 123456
|
||||
const regex =
|
||||
/District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u;
|
||||
const match = input.match(regex);
|
||||
|
||||
if (match) {
|
||||
return {
|
||||
court: match[1],
|
||||
type: match[2] as 'HRA' | 'HRB', // Adjust if needed
|
||||
number: match[3],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a company by name and return basic info
|
||||
*/
|
||||
public async searchCompany(companyNameArg: string, resultsLimitArg: number = 100) {
|
||||
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
|
||||
const page = await this.getNewPage();
|
||||
await this.navigateToPage(page, 'Normal search');
|
||||
|
||||
try {
|
||||
// Wait for the textarea to appear
|
||||
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
|
||||
|
||||
// Enter text into the textarea
|
||||
const inputText = companyNameArg;
|
||||
await page.evaluate((text) => {
|
||||
const textarea = document.querySelector<HTMLTextAreaElement>('#form\\:schlagwoerter');
|
||||
if (textarea) {
|
||||
textarea.value = text; // Set the value
|
||||
// Trigger the change event manually if required
|
||||
const event = new Event('change', { bubbles: true });
|
||||
textarea.dispatchEvent(event);
|
||||
}
|
||||
}, inputText);
|
||||
|
||||
console.log('Text entered successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or enter text into the textarea:', error);
|
||||
}
|
||||
|
||||
try {
|
||||
// Wait for the radio button's label to appear
|
||||
await page.waitForSelector('label[for="form:schlagwortOptionen:0"]', { timeout: 5000 });
|
||||
|
||||
// Click the label to select the radio button
|
||||
await page.evaluate(() => {
|
||||
const label = document.querySelector<HTMLLabelElement>(
|
||||
'label[for="form:schlagwortOptionen:0"]'
|
||||
);
|
||||
if (label) {
|
||||
label.click();
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Radio button clicked successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or click the radio button:', error);
|
||||
}
|
||||
|
||||
await this.clickFindButton(page, resultsLimitArg);
|
||||
|
||||
const businessRecords = await this.waitForResults(page);
|
||||
|
||||
// Parse out the registration info
|
||||
for (const record of businessRecords) {
|
||||
if (record.registrationId) {
|
||||
record.germanParsedRegistration = await this.parseGermanRegistration(
|
||||
record.registrationId
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
await page.close();
|
||||
return businessRecords;
|
||||
}, 60000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a specific company (known register type/number/court),
|
||||
* then click on an element that triggers a file download.
|
||||
*/
|
||||
public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) {
|
||||
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
|
||||
const page = await this.getNewPage();
|
||||
await this.navigateToPage(page, 'Normal search');
|
||||
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
|
||||
|
||||
// 1) Type of Register (e.g. HRB, HRA, etc.)
|
||||
await page.waitForSelector('#form\\:registerArt_label');
|
||||
await page.click('#form\\:registerArt_label');
|
||||
await page.waitForSelector('#form\\:registerArt_items');
|
||||
await page.evaluate((type) => {
|
||||
const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li'));
|
||||
const targetOption = options.find((option) => option.textContent?.trim() === type);
|
||||
(targetOption as HTMLElement)?.click();
|
||||
}, companyArg.type);
|
||||
|
||||
// 2) Register number
|
||||
await page.waitForSelector('#form\\:registerNummer');
|
||||
await page.type('#form\\:registerNummer', companyArg.number);
|
||||
|
||||
// 3) Register court
|
||||
await page.waitForSelector('#form\\:registergericht_label');
|
||||
await page.click('#form\\:registergericht_label');
|
||||
await page.waitForSelector('#form\\:registergericht_items');
|
||||
await page.evaluate((court) => {
|
||||
const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li'));
|
||||
const targetOption = options.find((option) => option.textContent?.trim() === court);
|
||||
(targetOption as HTMLElement)?.click();
|
||||
}, companyArg.court);
|
||||
|
||||
// Click 'Find'
|
||||
await this.clickFindButton(page);
|
||||
|
||||
// Optionally grab the results, just for logging
|
||||
const businessRecords = await this.waitForResults(page);
|
||||
console.log(businessRecords);
|
||||
|
||||
const files: plugins.smartfile.SmartFile[] = [];
|
||||
|
||||
// download files
|
||||
files.push(await this.downloadFile(page, 'SI'));
|
||||
files.push(await this.downloadFile(page, 'AD'));
|
||||
|
||||
// At this point, the file should have been downloaded automatically
|
||||
// to the path specified by `Page.setDownloadBehavior`
|
||||
await page.close();
|
||||
|
||||
return {
|
||||
businessRecords,
|
||||
files,
|
||||
};
|
||||
}, 60000);
|
||||
}
|
||||
|
||||
/**
|
||||
* get specific company by full name
|
||||
*/
|
||||
public async getSpecificCompanyByName(companyNameArg: string) {
|
||||
const businessRecords = await this.searchCompany(companyNameArg, 1);
|
||||
const result = this.getSpecificCompany(businessRecords[0].germanParsedRegistration);
|
||||
return result;
|
||||
}
|
||||
}
|
111
ts/classes.jsonldata.ts
Normal file
111
ts/classes.jsonldata.ts
Normal file
@ -0,0 +1,111 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import type { OpenData } from './classes.main.opendata.js';
|
||||
|
||||
export type SeedEntryType = {
|
||||
all_attributes: {
|
||||
_registerArt: string;
|
||||
_registerNummer: string;
|
||||
additional_data: {
|
||||
AD: boolean;
|
||||
CD: boolean;
|
||||
DK: boolean;
|
||||
HD: boolean;
|
||||
SI: boolean;
|
||||
UT: boolean;
|
||||
VÖ: boolean;
|
||||
};
|
||||
federal_state: string;
|
||||
native_company_number: string;
|
||||
registered_office: string;
|
||||
registrar: string;
|
||||
};
|
||||
company_number: string;
|
||||
current_status: string;
|
||||
jurisdiction_code: string;
|
||||
name: string;
|
||||
officers: {
|
||||
name: string;
|
||||
other_attributes: {
|
||||
city: string;
|
||||
firstname: string;
|
||||
flag: string;
|
||||
lastname: string;
|
||||
};
|
||||
position: string;
|
||||
start_date: string; // ISO 8601 date string
|
||||
type: string;
|
||||
}[];
|
||||
registered_address: string;
|
||||
retrieved_at: string; // ISO 8601 date string
|
||||
};
|
||||
|
||||
export class JsonlDataProcessor<T> {
|
||||
public forEachFunction: (entryArg: T) => Promise<void>;
|
||||
constructor(forEachFunctionArg: typeof this.forEachFunction) {
|
||||
this.forEachFunction = forEachFunctionArg;
|
||||
}
|
||||
|
||||
// TODO: define a mapper as argument instead of hard-coding it
|
||||
public async processDataFromUrl(
|
||||
dataUrlArg = 'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2'
|
||||
) {
|
||||
const done = plugins.smartpromise.defer();
|
||||
const dataExists = await plugins.smartfile.fs.isDirectory(paths.germanBusinessDataDir);
|
||||
if (!dataExists) {
|
||||
await plugins.smartfile.fs.ensureDir(paths.germanBusinessDataDir);
|
||||
} else {
|
||||
}
|
||||
|
||||
const smartarchive = await plugins.smartarchive.SmartArchive.fromArchiveUrl(dataUrlArg);
|
||||
const jsonlDataStream = await smartarchive.exportToStreamOfStreamFiles();
|
||||
let totalRecordsCounter = 0;
|
||||
let nextRest: string = '';
|
||||
jsonlDataStream.pipe(
|
||||
new plugins.smartstream.SmartDuplex({
|
||||
objectMode: true,
|
||||
writeFunction: async (chunkArg: plugins.smartfile.StreamFile, streamToolsArg) => {
|
||||
const readStream = await chunkArg.createReadStream();
|
||||
readStream.pipe(
|
||||
new plugins.smartstream.SmartDuplex({
|
||||
objectMode: true,
|
||||
writeFunction: async (chunkArg: Buffer, streamToolsArg) => {
|
||||
const currentString = nextRest + chunkArg.toString();
|
||||
const lines = currentString.split('\n');
|
||||
nextRest = lines.pop();
|
||||
console.log(`Got another ${lines.length} records.`);
|
||||
const concurrentProcessor = new plugins.smartarray.ConcurrentProcessor<string>(
|
||||
async (line) => {
|
||||
let entry: T;
|
||||
if (!line) return;
|
||||
try {
|
||||
entry = JSON.parse(line);
|
||||
} catch (err) {
|
||||
console.log(line);
|
||||
await plugins.smartdelay.delayFor(10000);
|
||||
}
|
||||
if (!entry) return;
|
||||
totalRecordsCounter++;
|
||||
if (totalRecordsCounter % 10000 === 0)
|
||||
console.log(`${totalRecordsCounter} total records.`);
|
||||
await this.forEachFunction(entry);
|
||||
},
|
||||
1000
|
||||
);
|
||||
await concurrentProcessor.process(lines);
|
||||
},
|
||||
finalFunction: async (streamToolsArg) => {
|
||||
console.log(`finished processing ${totalRecordsCounter} records.`);
|
||||
if (nextRest) {
|
||||
JSON.parse(nextRest);
|
||||
};
|
||||
done.resolve();
|
||||
},
|
||||
})
|
||||
);
|
||||
},
|
||||
})
|
||||
);
|
||||
await done.promise;
|
||||
}
|
||||
}
|
@ -1,25 +1,61 @@
|
||||
import { BusinessRecord } from './classes.businessrecord.js';
|
||||
import { GermanBusinessData } from './classes.germanbusinessdata.js';
|
||||
import { HandelsRegister } from './classes.handelsregister.js';
|
||||
import { JsonlDataProcessor, type SeedEntryType } from './classes.jsonldata.js';
|
||||
import * as paths from './paths.js';
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
export class OpenData {
|
||||
db: plugins.smartdata.SmartdataDb;
|
||||
germanBusinesses: GermanBusinessData;
|
||||
public db: plugins.smartdata.SmartdataDb;
|
||||
private serviceQenv = new plugins.qenv.Qenv(paths.packageDir, paths.nogitDir);
|
||||
|
||||
|
||||
public jsonLDataProcessor: JsonlDataProcessor<SeedEntryType>;
|
||||
public handelsregister: HandelsRegister;
|
||||
|
||||
public CBusinessRecord = plugins.smartdata.setDefaultManagerForDoc(this, BusinessRecord);
|
||||
|
||||
public async start() {
|
||||
this.db = new plugins.smartdata.SmartdataDb({
|
||||
mongoDbUrl: await this.serviceQenv.getEnvVarOnDemand('MONGODB_URL'),
|
||||
mongoDbName: await this.serviceQenv.getEnvVarOnDemand('MONGODB_NAME'),
|
||||
mongoDbUser: await this.serviceQenv.getEnvVarOnDemand('MONGODB_USER'),
|
||||
mongoDbPass: await this.serviceQenv.getEnvVarOnDemand('MONGODB_PASS'),
|
||||
mongoDbUrl: await this.serviceQenv.getEnvVarOnDemand('MONGODB_URL'),
|
||||
mongoDbName: await this.serviceQenv.getEnvVarOnDemand('MONGODB_NAME'),
|
||||
mongoDbUser: await this.serviceQenv.getEnvVarOnDemand('MONGODB_USER'),
|
||||
mongoDbPass: await this.serviceQenv.getEnvVarOnDemand('MONGODB_PASS'),
|
||||
});
|
||||
await this.db.init();
|
||||
this.germanBusinesses = new GermanBusinessData(this);
|
||||
await this.germanBusinesses.start();
|
||||
this.jsonLDataProcessor = new JsonlDataProcessor(async (entryArg) => {
|
||||
const businessRecord = new this.CBusinessRecord();
|
||||
businessRecord.id = await this.CBusinessRecord.getNewId();
|
||||
businessRecord.data.name = entryArg.name;
|
||||
businessRecord.data.germanParsedRegistration = {
|
||||
court: entryArg.all_attributes.registered_office,
|
||||
number: entryArg.all_attributes._registerNummer,
|
||||
type: entryArg.all_attributes._registerArt as 'HRA' | 'HRB',
|
||||
};
|
||||
await businessRecord.save();
|
||||
});
|
||||
this.handelsregister = new HandelsRegister(this);
|
||||
await this.handelsregister.start();
|
||||
}
|
||||
public async stop() {}
|
||||
}
|
||||
|
||||
public async buildInitialDb() {
|
||||
await this.jsonLDataProcessor.processDataFromUrl();
|
||||
}
|
||||
|
||||
public async slowValidateDb() {
|
||||
|
||||
}
|
||||
|
||||
public async validateSearchByName() {
|
||||
|
||||
}
|
||||
|
||||
public async searchDbByBusinessNameAndPostalCode(businessNameArg: string, postalCodeArg: string) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
public async stop() {
|
||||
await this.db.close();
|
||||
await this.handelsregister.stop();
|
||||
}
|
||||
}
|
||||
|
@ -8,4 +8,8 @@ export const packageDir = plugins.path.join(
|
||||
export const nogitDir = plugins.path.join(packageDir, './.nogit/');
|
||||
plugins.smartfile.fs.ensureDirSync(nogitDir);
|
||||
|
||||
export const downloadDir = plugins.path.join(nogitDir, 'downloads');
|
||||
plugins.smartfile.fs.ensureDirSync(downloadDir);
|
||||
|
||||
|
||||
export const germanBusinessDataDir = plugins.path.join(nogitDir, 'germanbusinessdata');
|
@ -6,8 +6,11 @@ export {
|
||||
}
|
||||
|
||||
// @push.rocks scope
|
||||
import * as lik from '@push.rocks/lik';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartarchive from '@push.rocks/smartarchive';
|
||||
import * as smartarray from '@push.rocks/smartarray';
|
||||
import * as smartbrowser from '@push.rocks/smartbrowser';
|
||||
import * as smartdata from '@push.rocks/smartdata';
|
||||
import * as smartdelay from '@push.rocks/smartdelay';
|
||||
import * as smartfile from '@push.rocks/smartfile';
|
||||
@ -15,10 +18,15 @@ import * as smartpath from '@push.rocks/smartpath';
|
||||
import * as smartpromise from '@push.rocks/smartpromise';
|
||||
import * as smartrequest from '@push.rocks/smartrequest';
|
||||
import * as smartstream from '@push.rocks/smartstream';
|
||||
import * as smartunique from '@push.rocks/smartunique';
|
||||
import * as smartxml from '@push.rocks/smartxml';
|
||||
|
||||
export {
|
||||
lik,
|
||||
qenv,
|
||||
smartarchive,
|
||||
smartarray,
|
||||
smartbrowser,
|
||||
smartdata,
|
||||
smartdelay,
|
||||
smartfile,
|
||||
@ -26,4 +34,13 @@ export {
|
||||
smartpromise,
|
||||
smartrequest,
|
||||
smartstream,
|
||||
}
|
||||
smartunique,
|
||||
smartxml,
|
||||
}
|
||||
|
||||
// @tsclass scope
|
||||
import * as tsclass from '@tsclass/tsclass';
|
||||
|
||||
export {
|
||||
tsclass,
|
||||
}
|
||||
|
Reference in New Issue
Block a user